diff options
author | Galen Guyer <galen@galenguyer.com> | 2021-03-01 20:02:34 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-01 20:02:34 -0500 |
commit | 8139cc88f9a2ce69909b1c9ed57b70fda15dbbf8 (patch) | |
tree | d61df9691f62952643b5062b32aaac4fdb09df6e /poller | |
parent | d1bbe068e77e2504d6e300fdf586ddd8afc29a2f (diff) | |
parent | 0d57a01ba2c4da533ad4459fcf5a45b79a17cd53 (diff) |
Merge pull request #1 from galenguyer/v2
v2 - SQLAlchemy
Diffstat (limited to 'poller')
-rw-r--r-- | poller/__init__.py | 248 | ||||
-rw-r--r-- | poller/commands.py | 36 | ||||
-rw-r--r-- | poller/db.py | 67 | ||||
-rw-r--r-- | poller/dedup.py | 71 | ||||
-rw-r--r-- | poller/models.py | 53 | ||||
-rw-r--r-- | poller/routes.py | 11 |
6 files changed, 186 insertions, 300 deletions
diff --git a/poller/__init__.py b/poller/__init__.py index ebb2653..2562f2d 100644 --- a/poller/__init__.py +++ b/poller/__init__.py @@ -1,75 +1,70 @@ -""" A small flask Hello World """ +""" +Startup code +""" import os -import threading -import sqlite3 -import atexit -import datetime import json - -from flask import Flask, jsonify, request, make_response +import logging import requests +import datetime +import threading +from flask import Flask from bs4 import BeautifulSoup +from flask_sqlalchemy import SQLAlchemy -from .dedup import dedup -from .db import create_tables, get_all_from_db, get_latest_from_db POOL_TIME = 5 * 60 # Seconds DASHBOARD_URL = 'https://rit.edu/ready/spring-dashboard' -LATEST_DATA = None -data_thread = threading.Thread() -db_lock = threading.Lock() - -if not os.path.exists('./data'): - os.mkdir('./data') - -def update_db(): - print('updating db') - with db_lock: - db_conn = sqlite3.connect('data/data.sqlite3') - c = db_conn.cursor() - sql = f'INSERT INTO `alertlevel` VALUES (datetime(\'now\'), \'{LATEST_DATA["alert_level"]}\');' - c.execute(sql) - sql = f'INSERT INTO `total` VALUES (datetime(\'now\'), {LATEST_DATA["total_students"]}, {LATEST_DATA["total_staff"]});' - c.execute(sql) - sql = f'INSERT INTO `new` VALUES (datetime(\'now\'), {LATEST_DATA["new_students"]}, {LATEST_DATA["new_staff"]});' - c.execute(sql) - sql = f'INSERT INTO `quarantine` VALUES (datetime(\'now\'), {LATEST_DATA["quarantine_on_campus"]}, {LATEST_DATA["quarantine_off_campus"]});' - c.execute(sql) - sql = f'INSERT INTO `isolation` VALUES (datetime(\'now\'), {LATEST_DATA["isolation_on_campus"]}, {LATEST_DATA["isolation_off_campus"]});' - c.execute(sql) - sql = f'INSERT INTO `beds` VALUES (datetime(\'now\'), {LATEST_DATA["beds_available"]});' - c.execute(sql) - sql = f'INSERT INTO `tests` VALUES (datetime(\'now\'), {LATEST_DATA["tests_administered"]});' - c.execute(sql) - db_conn.commit() - db_conn.close() - dedup() - -def data_is_same(current_data): - global LATEST_DATA - if LATEST_DATA is None or current_data is None: - return False - for key in list(LATEST_DATA.keys()): - if key != 'last_updated' and current_data[key] != LATEST_DATA[key]: - return False - return True - -def db_is_same(current_data): - latest_data = get_latest_from_db() - if latest_data is None or current_data is None: - return False - for key in list(latest_data.keys()): - if key != 'last_updated' and current_data[key] != latest_data[key]: - return False - return True +DATA_THREAD = threading.Thread() + +APP = Flask(__name__) + +# Load default configuration and any environment variable overrides +_root_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) +APP.config.from_pyfile(os.path.join(_root_dir, 'config.env.py')) + +# Load file based configuration overrides if present +_pyfile_config = os.path.join(_root_dir, 'config.py') +if os.path.exists(_pyfile_config): + APP.config.from_pyfile(_pyfile_config) + +# Logger configuration +logging.getLogger().setLevel(APP.config['LOG_LEVEL']) +#pylint: disable=no-member +APP.logger.info('Launching rit-covid-poller v' + APP.config['VERSION']) + +db = SQLAlchemy(APP) +APP.logger.info('SQLAlchemy pointed at ' + repr(db.engine.url)) +#pylint: enable=no-member + +#pylint: disable=wrong-import-position +from . import models +from . import commands +from . import routes + +db.create_all() + +from .models import Day + +def data_are_same(old, new): + return old.total_students == new.total_students and \ + old.total_staff == new.total_staff and \ + old.new_students == new.new_students and \ + old.new_staff == new.new_staff and \ + old.quarantine_on_campus == new.quarantine_on_campus and \ + old.quarantine_off_campus == new.quarantine_off_campus and \ + old.isolation_on_campus == new.isolation_on_campus and \ + old.isolation_off_campus == new.isolation_off_campus and \ + old.beds_available == new.beds_available and \ + old.tests_administered == new.tests_administered and \ + old.alert_level == new.alert_level + def get_data(): print('fetching data') - global data_thread - data_thread = threading.Timer(POOL_TIME, get_data, ()) - data_thread.start() - create_tables() + global DATA_THREAD + DATA_THREAD = threading.Timer(POOL_TIME, get_data, ()) + DATA_THREAD.start() page = requests.get(DASHBOARD_URL, headers={'Cache-Control': 'no-cache'}) soup = BeautifulSoup(page.content, 'html.parser') total_students = int(soup.find('div', attrs={'class': 'statistic-13872'}).find_all("p", attrs={'class': 'card-header'})[0].text.strip()) @@ -83,7 +78,7 @@ def get_data(): beds_available = int(soup.find('div', attrs={'class': 'statistic-13935'}).find_all("p", attrs={'class': 'card-header'})[0].text.strip().strip('%')) tests_administered = int(soup.find('div', attrs={'class': 'statistic-13923'}).find_all("p", attrs={'class': 'card-header'})[0].text.strip().replace("*", " ").replace(",", "")) container = soup.find('div', attrs={'id': 'pandemic-message-container'}) - alert_level = container.find("a").text + alert_level = container.find('a').text color = "" if "Green" in alert_level: color = 'green' @@ -93,110 +88,39 @@ def get_data(): color = 'orange' elif "Red" in alert_level: color = 'red' - global LATEST_DATA fall_data = None with open('history/fall-2020.json', 'r') as fd: fall_data = json.loads(fd.read()) - current_data = { - 'alert_level': color, - 'total_students': total_students + fall_data['total_students'], - 'total_staff': total_staff + fall_data['total_staff'], - 'new_students': new_students, - 'new_staff': new_staff, - 'quarantine_on_campus': quarantine_on_campus, - 'quarantine_off_campus': quarantine_off_campus, - 'isolation_on_campus': isolation_on_campus, - 'isolation_off_campus': isolation_off_campus, - 'beds_available': beds_available, - 'tests_administered': tests_administered + fall_data['tests_administered'], - 'last_updated': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') - } - LATEST_DATA = current_data - if not db_is_same(current_data): - update_db() + current_data = Day( + last_updated=datetime.datetime.now(), + alert_level=color, + beds_available=beds_available, + isolation_off_campus=isolation_off_campus, + isolation_on_campus=isolation_on_campus, + new_staff=new_staff, + new_students=new_students, + quarantine_off_campus=quarantine_off_campus, + quarantine_on_campus=quarantine_on_campus, + tests_administered=tests_administered + fall_data['tests_administered'], + total_staff=total_staff + fall_data['total_staff'], + total_students=total_students + fall_data['total_students']) + print(current_data.serialize()) + if not data_are_same(Day.get_all()[-1], current_data): + db.session.add(current_data) + dedup() return current_data -get_data() - -APP = Flask(__name__) - -# Load file based configuration overrides if present -if os.path.exists(os.path.join(os.getcwd(), 'config.py')): - APP.config.from_pyfile(os.path.join(os.getcwd(), 'config.py')) -else: - APP.config.from_pyfile(os.path.join(os.getcwd(), 'config.env.py')) - -APP.secret_key = APP.config['SECRET_KEY'] - -@APP.route('/api/v0/latest') -def _api_v0_latest(): - return jsonify(LATEST_DATA) - -@APP.route('/api/v0/latestdb') -def _api_v0_latestdb(): - data = get_latest_from_db() - return jsonify(data) - -@APP.route('/api/v0/history') -def _api_v0_history(): - data = get_all_from_db() - return jsonify(data) - -@APP.route('/api/v0/difference') -def _api_v0_difference(): - data = get_all_from_db() - latest = data[-1] - prev = data[-2] - data = { - 'alert_level': f'{prev["alert_level"]} -> {latest["alert_level"]}', - 'total_students': latest["total_students"] - prev["total_students"], - 'total_staff': latest["total_staff"] - prev["total_staff"], - 'new_students': latest["new_students"] - prev["new_students"], - 'new_staff': latest["new_staff"] - prev["new_staff"], - 'quarantine_on_campus': latest["quarantine_on_campus"] - prev["quarantine_on_campus"], - 'quarantine_off_campus': latest["quarantine_off_campus"] - prev["quarantine_off_campus"], - 'isolation_on_campus': latest["isolation_on_campus"] - prev["isolation_on_campus"], - 'isolation_off_campus': latest["isolation_off_campus"] - prev["isolation_off_campus"], - 'beds_available': latest["beds_available"] - prev["beds_available"], - 'tests_administered': latest["tests_administered"] - prev["tests_administered"], - } - return jsonify(data) - -@APP.route('/api/v0/diff') -def _api_v0_diff(): - first = request.args.get('first') - last = request.args.get('last') - data = get_all_from_db() - if first is None: - first = 0 - else: - try: - first = int(first) - except: - first = 0 - if last is None: - last = len(data) - 1 - else: - try: - last = int(last) - except: - last = len(data) - 1 - latest = data[last] - prev = data[first] - data = { - 'alert_level': f'{prev["alert_level"]} -> {latest["alert_level"]}', - 'total_students': latest["total_students"] - prev["total_students"], - 'total_staff': latest["total_staff"] - prev["total_staff"], - 'new_students': latest["new_students"] - prev["new_students"], - 'new_staff': latest["new_staff"] - prev["new_staff"], - 'quarantine_on_campus': latest["quarantine_on_campus"] - prev["quarantine_on_campus"], - 'quarantine_off_campus': latest["quarantine_off_campus"] - prev["quarantine_off_campus"], - 'isolation_on_campus': latest["isolation_on_campus"] - prev["isolation_on_campus"], - 'isolation_off_campus': latest["isolation_off_campus"] - prev["isolation_off_campus"], - 'beds_available': latest["beds_available"] - prev["beds_available"], - 'tests_administered': latest["tests_administered"] - prev["tests_administered"], - 'description': f'day {first} to {last}' - } - return jsonify(data) - +def dedup(): + data = Day.get_all() + # get first date + starting_date = data[-1].serialize()['last_updated'].split(' ')[0] + for i in range(len(data)-2, 0, -1): + if data[i].serialize()['last_updated'].split(' ')[0] != starting_date: + starting_date = data[i].serialize()['last_updated'].split(' ')[0] + else: + db.session.delete(data[i]) + print('dropped ' + data[i].serialize()['last_updated']) + db.session.commit() + +get_data()
\ No newline at end of file diff --git a/poller/commands.py b/poller/commands.py new file mode 100644 index 0000000..fc45dc7 --- /dev/null +++ b/poller/commands.py @@ -0,0 +1,36 @@ +""" +CLI commands for data management +""" +import json +import click +from dateutil import parser + +from . import APP, db +from .models import Day + +@APP.cli.command('import-history') +@click.argument('history_file') +def import_history(history_file): + """ + Import history from JSON file + """ + data = '{}' + with open(history_file, 'r') as file: + data = file.read() + parsed = json.loads(data) + for item in parsed: + if not parser.parse(item['last_updated']) in [day.last_updated for day in Day.get_all()]: + db.session.add(Day( + last_updated=parser.parse(item['last_updated']), + alert_level=item['alert_level'], + beds_available=item['beds_available'], + isolation_off_campus=item['isolation_off_campus'], + isolation_on_campus=item['isolation_on_campus'], + new_staff=item['new_staff'], + new_students=item['new_students'], + quarantine_off_campus=item['quarantine_off_campus'], + quarantine_on_campus=item['quarantine_on_campus'], + tests_administered=item['tests_administered'], + total_staff=item['total_staff'], + total_students=item['total_students'])) + db.session.commit() diff --git a/poller/db.py b/poller/db.py deleted file mode 100644 index d3836a7..0000000 --- a/poller/db.py +++ /dev/null @@ -1,67 +0,0 @@ -import threading -import sqlite3 - -db_lock = threading.Lock() - -def create_tables(): - with db_lock: - db_conn = sqlite3.connect('data/data.sqlite3') - c = db_conn.cursor() - sql = f'CREATE TABLE IF NOT EXISTS `alertlevel` (time DATETIME PRIMARY KEY NOT NULL, color CHAR(50) NOT NULL);' - c.execute(sql) - sql = f'CREATE TABLE IF NOT EXISTS `total` (time DATETIME PRIMARY KEY NOT NULL, total_students INT NOT NULL, total_staff INT NOT NULL);' - c.execute(sql) - sql = f'CREATE TABLE IF NOT EXISTS `new` (time DATETIME PRIMARY KEY NOT NULL, new_students INT NOT NULL, new_staff INT NOT NULL);' - c.execute(sql) - sql = f'CREATE TABLE IF NOT EXISTS `quarantine` (time DATETIME PRIMARY KEY NOT NULL, quarantine_on_campus INT NOT NULL, quarantine_off_campus INT NOT NULL);' - c.execute(sql) - sql = f'CREATE TABLE IF NOT EXISTS `isolation` (time DATETIME PRIMARY KEY NOT NULL, isolation_on_campus INT NOT NULL, isolation_off_campus INT NOT NULL);' - c.execute(sql) - sql = f'CREATE TABLE IF NOT EXISTS `beds` (time DATETIME PRIMARY KEY NOT NULL, beds_available INT NOT NULL);' - c.execute(sql) - sql = f'CREATE TABLE IF NOT EXISTS `tests` (time DATETIME PRIMARY KEY NOT NULL, tests_administered INT NOT NULL);' - c.execute(sql) - db_conn.commit() - db_conn.close() - - -def get_latest_from_db(): - return get_all_from_db()[-1] - -def get_all_from_db(): - with db_lock: - db_conn = sqlite3.connect('data/data.sqlite3') - c = db_conn.cursor() - sql = 'SELECT alertlevel.time, alertlevel.color, total.total_students, total.total_staff, new.new_students, new.new_staff, ' + \ - 'quarantine.quarantine_on_campus, quarantine.quarantine_off_campus, isolation.isolation_on_campus, isolation.isolation_off_campus, ' + \ - 'beds.beds_available, tests.tests_administered ' + \ - 'FROM `alertlevel` ' + \ - 'INNER JOIN `total` ' + \ - 'ON alertlevel.time = total.time ' + \ - 'INNER JOIN `new` ' + \ - 'ON alertlevel.time = new.time ' + \ - 'INNER JOIN `quarantine` ' + \ - 'ON alertlevel.time = quarantine.time ' + \ - 'INNER JOIN `isolation` ' + \ - 'ON alertlevel.time = isolation.time ' + \ - 'INNER JOIN `beds` ' + \ - 'ON alertlevel.time = beds.time ' + \ - 'INNER JOIN `tests` ' + \ - 'ON alertlevel.time = tests.time' - c.execute(sql) - - data = [{ - 'alert_level': d[1], - 'total_students': d[2], - 'total_staff': d[3], - 'new_students': d[4], - 'new_staff': d[5], - 'quarantine_on_campus': d[6], - 'quarantine_off_campus': d[7], - 'isolation_on_campus': d[8], - 'isolation_off_campus': d[9], - 'beds_available': d[10], - 'tests_administered': d[11], - 'last_updated': d[0] - } for d in c.fetchall()] - return data diff --git a/poller/dedup.py b/poller/dedup.py deleted file mode 100644 index ba68be8..0000000 --- a/poller/dedup.py +++ /dev/null @@ -1,71 +0,0 @@ -import sqlite3 - -def get_all_from_db(): - db_conn = sqlite3.connect('data/data.sqlite3') - c = db_conn.cursor() - sql = 'SELECT alertlevel.time, alertlevel.color, total.total_students, total.total_staff, new.new_students, new.new_staff, ' + \ - 'quarantine.quarantine_on_campus, quarantine.quarantine_off_campus, isolation.isolation_on_campus, isolation.isolation_off_campus, ' + \ - 'beds.beds_available, tests.tests_administered ' + \ - 'FROM `alertlevel` ' + \ - 'INNER JOIN `total` ' + \ - 'ON alertlevel.time = total.time ' + \ - 'INNER JOIN `new` ' + \ - 'ON alertlevel.time = new.time ' + \ - 'INNER JOIN `quarantine` ' + \ - 'ON alertlevel.time = quarantine.time ' + \ - 'INNER JOIN `isolation` ' + \ - 'ON alertlevel.time = isolation.time ' + \ - 'INNER JOIN `beds` ' + \ - 'ON alertlevel.time = beds.time ' + \ - 'INNER JOIN `tests` ' + \ - 'ON alertlevel.time = tests.time' - c.execute(sql) - - data = [{ - 'alert_level': d[1], - 'total_students': d[2], - 'total_staff': d[3], - 'new_students': d[4], - 'new_staff': d[5], - 'quarantine_on_campus': d[6], - 'quarantine_off_campus': d[7], - 'isolation_on_campus': d[8], - 'isolation_off_campus': d[9], - 'beds_available': d[10], - 'tests_administered': d[11], - 'last_updated': d[0] - } for d in c.fetchall()] - return data - - -def drop_by_date(date): - db_conn = sqlite3.connect('data/data.sqlite3') - c = db_conn.cursor() - sql = f'DELETE FROM `alertlevel` WHERE time=\'{date}\';' - c.execute(sql) - sql = f'DELETE FROM `total` WHERE time=\'{date}\';' - c.execute(sql) - sql = f'DELETE FROM `new` WHERE time=\'{date}\';' - c.execute(sql) - sql = f'DELETE FROM `quarantine` WHERE time=\'{date}\';' - c.execute(sql) - sql = f'DELETE FROM `isolation` WHERE time=\'{date}\';' - c.execute(sql) - sql = f'DELETE FROM `beds` WHERE time=\'{date}\';' - c.execute(sql) - sql = f'DELETE FROM `tests` WHERE time=\'{date}\';' - c.execute(sql) - db_conn.commit() - db_conn.close() - - -def dedup(): - data = get_all_from_db() - # get first date - starting_date = data[-1]['last_updated'].split(' ')[0] - for i in range(len(data)-2, 0, -1): - if data[i]['last_updated'].split(' ')[0] != starting_date: - starting_date = data[i]['last_updated'].split(' ')[0] - else: - drop_by_date(data[i]['last_updated']) - print('dropped ' + data[i]['last_updated']) diff --git a/poller/models.py b/poller/models.py new file mode 100644 index 0000000..4bcca06 --- /dev/null +++ b/poller/models.py @@ -0,0 +1,53 @@ +""" +Defines models +""" +from datetime import datetime +from sqlalchemy import Column, Integer, String, DateTime + +from . import db + +#pylint: disable=too-few-public-methods +class Day(db.Model): + """ + Model for each day's data + """ + __tablename__ = 'days' + last_updated = Column(DateTime, default=datetime.now, onupdate=datetime.now, \ + primary_key=True, nullable=False) + alert_level = Column(String(16), nullable=False) + beds_available = Column(Integer, nullable=False) + isolation_off_campus = Column(Integer, nullable=False) + isolation_on_campus = Column(Integer, nullable=False) + new_staff = Column(Integer, nullable=False) + new_students = Column(Integer, nullable=False) + quarantine_off_campus = Column(Integer, nullable=False) + quarantine_on_campus = Column(Integer, nullable=False) + tests_administered = Column(Integer, nullable=False) + total_staff = Column(Integer, nullable=False) + total_students = Column(Integer, nullable=False) + + @classmethod + def get_all(cls): + """ + Helper to get all values from the database + """ + return cls.query.all() + + def serialize(self): + """ + used for json serialization + """ + return { + 'last_updated': self.last_updated.strftime('%Y-%m-%d %H:%M:%S'), + 'alert_level': self.alert_level, + 'beds_available': self.beds_available, + 'isolation_off_campus': self.isolation_off_campus, + 'isolation_on_campus': self.isolation_on_campus, + 'new_staff': self.new_staff, + 'new_students': self.new_students, + 'quarantine_off_campus': self.quarantine_off_campus, + 'quarantine_on_campus': self.quarantine_on_campus, + 'tests_administered': self.tests_administered, + 'total_staff': self.total_staff, + 'total_students': self.total_students, + } diff --git a/poller/routes.py b/poller/routes.py new file mode 100644 index 0000000..6911fdd --- /dev/null +++ b/poller/routes.py @@ -0,0 +1,11 @@ +from flask import Flask, jsonify +from . import APP +from .models import Day + +@APP.route('/api/v0/history') +def _get_api_v0_history(): + return jsonify([day.serialize() for day in Day.get_all()]) + +@APP.route('/api/v0/latest') +def _get_api_v0_latest(): + return jsonify(Day.get_all()[-1].serialize()) |