diff options
author | Galen Guyer <galen@galenguyer.com> | 2021-02-26 10:51:43 -0500 |
---|---|---|
committer | Galen Guyer <galen@galenguyer.com> | 2021-02-26 10:51:43 -0500 |
commit | 0d57a01ba2c4da533ad4459fcf5a45b79a17cd53 (patch) | |
tree | d61df9691f62952643b5062b32aaac4fdb09df6e | |
parent | 79f606ccb9a190d9efde490c1c4a3121815544fe (diff) |
Add background loop and dedup
-rw-r--r-- | poller/__init__.py | 91 | ||||
-rw-r--r-- | poller/models.py | 2 |
2 files changed, 92 insertions, 1 deletions
diff --git a/poller/__init__.py b/poller/__init__.py index 1e1580b..2562f2d 100644 --- a/poller/__init__.py +++ b/poller/__init__.py @@ -3,10 +3,20 @@ Startup code """ import os +import json import logging +import requests +import datetime +import threading from flask import Flask +from bs4 import BeautifulSoup from flask_sqlalchemy import SQLAlchemy + +POOL_TIME = 5 * 60 # Seconds +DASHBOARD_URL = 'https://rit.edu/ready/spring-dashboard' +DATA_THREAD = threading.Thread() + APP = Flask(__name__) # Load default configuration and any environment variable overrides @@ -33,3 +43,84 @@ from . import commands from . import routes db.create_all() + +from .models import Day + +def data_are_same(old, new): + return old.total_students == new.total_students and \ + old.total_staff == new.total_staff and \ + old.new_students == new.new_students and \ + old.new_staff == new.new_staff and \ + old.quarantine_on_campus == new.quarantine_on_campus and \ + old.quarantine_off_campus == new.quarantine_off_campus and \ + old.isolation_on_campus == new.isolation_on_campus and \ + old.isolation_off_campus == new.isolation_off_campus and \ + old.beds_available == new.beds_available and \ + old.tests_administered == new.tests_administered and \ + old.alert_level == new.alert_level + + +def get_data(): + print('fetching data') + global DATA_THREAD + DATA_THREAD = threading.Timer(POOL_TIME, get_data, ()) + DATA_THREAD.start() + page = requests.get(DASHBOARD_URL, headers={'Cache-Control': 'no-cache'}) + soup = BeautifulSoup(page.content, 'html.parser') + total_students = int(soup.find('div', attrs={'class': 'statistic-13872'}).find_all("p", attrs={'class': 'card-header'})[0].text.strip()) + total_staff = int(soup.find('div', attrs={'class': 'statistic-13875'}).find_all("p", attrs={'class': 'card-header'})[0].text.strip()) + new_students = int(soup.find('div', attrs={'class': 'statistic-14332'}).find_all("p", attrs={'class': 'card-header'})[0].text.strip()) + new_staff = int(soup.find('div', attrs={'class': 'statistic-14335'}).find_all("p", attrs={'class': 'card-header'})[0].text.strip()) + quarantine_on_campus = int(soup.find('div', attrs={'class': 'statistic-13893'}).find_all("p", attrs={'class': 'card-header'})[0].text.strip()) + quarantine_off_campus = int(soup.find('div', attrs={'class': 'statistic-13896'}).find_all("p", attrs={'class': 'card-header'})[0].text.strip()) + isolation_on_campus = int(soup.find('div', attrs={'class': 'statistic-13905'}).find_all("p", attrs={'class': 'card-header'})[0].text.strip()) + isolation_off_campus = int(soup.find('div', attrs={'class': 'statistic-13908'}).find_all("p", attrs={'class': 'card-header'})[0].text.strip()) + beds_available = int(soup.find('div', attrs={'class': 'statistic-13935'}).find_all("p", attrs={'class': 'card-header'})[0].text.strip().strip('%')) + tests_administered = int(soup.find('div', attrs={'class': 'statistic-13923'}).find_all("p", attrs={'class': 'card-header'})[0].text.strip().replace("*", " ").replace(",", "")) + container = soup.find('div', attrs={'id': 'pandemic-message-container'}) + alert_level = container.find('a').text + color = "" + if "Green" in alert_level: + color = 'green' + elif "Yellow" in alert_level: + color = 'yellow' + elif "Orange" in alert_level: + color = 'orange' + elif "Red" in alert_level: + color = 'red' + + fall_data = None + with open('history/fall-2020.json', 'r') as fd: + fall_data = json.loads(fd.read()) + current_data = Day( + last_updated=datetime.datetime.now(), + alert_level=color, + beds_available=beds_available, + isolation_off_campus=isolation_off_campus, + isolation_on_campus=isolation_on_campus, + new_staff=new_staff, + new_students=new_students, + quarantine_off_campus=quarantine_off_campus, + quarantine_on_campus=quarantine_on_campus, + tests_administered=tests_administered + fall_data['tests_administered'], + total_staff=total_staff + fall_data['total_staff'], + total_students=total_students + fall_data['total_students']) + print(current_data.serialize()) + if not data_are_same(Day.get_all()[-1], current_data): + db.session.add(current_data) + dedup() + return current_data + +def dedup(): + data = Day.get_all() + # get first date + starting_date = data[-1].serialize()['last_updated'].split(' ')[0] + for i in range(len(data)-2, 0, -1): + if data[i].serialize()['last_updated'].split(' ')[0] != starting_date: + starting_date = data[i].serialize()['last_updated'].split(' ')[0] + else: + db.session.delete(data[i]) + print('dropped ' + data[i].serialize()['last_updated']) + db.session.commit() + +get_data()
\ No newline at end of file diff --git a/poller/models.py b/poller/models.py index 15b223d..4bcca06 100644 --- a/poller/models.py +++ b/poller/models.py @@ -38,7 +38,7 @@ class Day(db.Model): used for json serialization """ return { - 'last_updated': self.last_updated, + 'last_updated': self.last_updated.strftime('%Y-%m-%d %H:%M:%S'), 'alert_level': self.alert_level, 'beds_available': self.beds_available, 'isolation_off_campus': self.isolation_off_campus, |