From f8628ca4473f655e05d031e312dd5e7f92ff148c Mon Sep 17 00:00:00 2001 From: Galen Guyer Date: Thu, 5 Nov 2020 15:40:21 -0500 Subject: move dedup into poller to run on insert --- dedup.py | 70 --------------------------------------------------- poller/__init__.py | 3 +++ poller/dedup.py | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 70 deletions(-) delete mode 100644 dedup.py create mode 100644 poller/dedup.py diff --git a/dedup.py b/dedup.py deleted file mode 100644 index cff72bb..0000000 --- a/dedup.py +++ /dev/null @@ -1,70 +0,0 @@ -import sqlite3 - -def get_all_from_db(): - db_conn = sqlite3.connect('data/data.sqlite3') - c = db_conn.cursor() - sql = 'SELECT alertlevel.time, alertlevel.color, total.total_students, total.total_staff, new.new_students, new.new_staff, ' + \ - 'quarantine.quarantine_on_campus, quarantine.quarantine_off_campus, isolation.isolation_on_campus, isolation.isolation_off_campus, ' + \ - 'beds.beds_available, tests.tests_administered ' + \ - 'FROM `alertlevel` ' + \ - 'INNER JOIN `total` ' + \ - 'ON alertlevel.time = total.time ' + \ - 'INNER JOIN `new` ' + \ - 'ON alertlevel.time = new.time ' + \ - 'INNER JOIN `quarantine` ' + \ - 'ON alertlevel.time = quarantine.time ' + \ - 'INNER JOIN `isolation` ' + \ - 'ON alertlevel.time = isolation.time ' + \ - 'INNER JOIN `beds` ' + \ - 'ON alertlevel.time = beds.time ' + \ - 'INNER JOIN `tests` ' + \ - 'ON alertlevel.time = tests.time' - c.execute(sql) - - data = [{ - 'alert_level': d[1], - 'total_students': d[2], - 'total_staff': d[3], - 'new_students': d[4], - 'new_staff': d[5], - 'quarantine_on_campus': d[6], - 'quarantine_off_campus': d[7], - 'isolation_on_campus': d[8], - 'isolation_off_campus': d[9], - 'beds_available': d[10], - 'tests_administered': d[11], - 'last_updated': d[0] - } for d in c.fetchall()] - return data - - -def drop_by_date(date): - db_conn = sqlite3.connect('data/data.sqlite3') - c = db_conn.cursor() - sql = f'DELETE FROM `alertlevel` WHERE time=\'{date}\';' - c.execute(sql) - sql = f'DELETE FROM `total` WHERE time=\'{date}\';' - c.execute(sql) - sql = f'DELETE FROM `new` WHERE time=\'{date}\';' - c.execute(sql) - sql = f'DELETE FROM `quarantine` WHERE time=\'{date}\';' - c.execute(sql) - sql = f'DELETE FROM `isolation` WHERE time=\'{date}\';' - c.execute(sql) - sql = f'DELETE FROM `beds` WHERE time=\'{date}\';' - c.execute(sql) - sql = f'DELETE FROM `tests` WHERE time=\'{date}\';' - c.execute(sql) - db_conn.commit() - db_conn.close() - - -data = get_all_from_db() -# get first date -starting_date = data[-1]['last_updated'].split(' ')[0] -for i in range(len(data)-2, 0, -1): - if data[i]['last_updated'].split(' ')[0] != starting_date: - starting_date = data[i]['last_updated'].split(' ')[0] - else: - drop_by_date(data[i]['last_updated']) - print('dropped ' + data[i]['last_updated']) diff --git a/poller/__init__.py b/poller/__init__.py index 20e5cc8..f9d0d62 100644 --- a/poller/__init__.py +++ b/poller/__init__.py @@ -10,6 +10,8 @@ from flask import Flask, jsonify import requests from bs4 import BeautifulSoup +import dedup from .dedup + POOL_TIME = 5 * 60 # Seconds DASHBOARD_URL = 'https://rit.edu/ready/dashboard' LATEST_DATA = None @@ -64,6 +66,7 @@ def update_db(): c.execute(sql) db_conn.commit() db_conn.close() + dedup() def get_latest_from_db(): with db_lock: diff --git a/poller/dedup.py b/poller/dedup.py new file mode 100644 index 0000000..9db5886 --- /dev/null +++ b/poller/dedup.py @@ -0,0 +1,73 @@ +import sqlite3 + +def get_all_from_db(): + db_conn = sqlite3.connect('data/data.sqlite3') + c = db_conn.cursor() + sql = 'SELECT alertlevel.time, alertlevel.color, total.total_students, total.total_staff, new.new_students, new.new_staff, ' + \ + 'quarantine.quarantine_on_campus, quarantine.quarantine_off_campus, isolation.isolation_on_campus, isolation.isolation_off_campus, ' + \ + 'beds.beds_available, tests.tests_administered ' + \ + 'FROM `alertlevel` ' + \ + 'INNER JOIN `total` ' + \ + 'ON alertlevel.time = total.time ' + \ + 'INNER JOIN `new` ' + \ + 'ON alertlevel.time = new.time ' + \ + 'INNER JOIN `quarantine` ' + \ + 'ON alertlevel.time = quarantine.time ' + \ + 'INNER JOIN `isolation` ' + \ + 'ON alertlevel.time = isolation.time ' + \ + 'INNER JOIN `beds` ' + \ + 'ON alertlevel.time = beds.time ' + \ + 'INNER JOIN `tests` ' + \ + 'ON alertlevel.time = tests.time' + c.execute(sql) + + data = [{ + 'alert_level': d[1], + 'total_students': d[2], + 'total_staff': d[3], + 'new_students': d[4], + 'new_staff': d[5], + 'quarantine_on_campus': d[6], + 'quarantine_off_campus': d[7], + 'isolation_on_campus': d[8], + 'isolation_off_campus': d[9], + 'beds_available': d[10], + 'tests_administered': d[11], + 'last_updated': d[0] + } for d in c.fetchall()] + return data + + +def drop_by_date(date): + db_conn = sqlite3.connect('data/data.sqlite3') + c = db_conn.cursor() + sql = f'DELETE FROM `alertlevel` WHERE time=\'{date}\';' + c.execute(sql) + sql = f'DELETE FROM `total` WHERE time=\'{date}\';' + c.execute(sql) + sql = f'DELETE FROM `new` WHERE time=\'{date}\';' + c.execute(sql) + sql = f'DELETE FROM `quarantine` WHERE time=\'{date}\';' + c.execute(sql) + sql = f'DELETE FROM `isolation` WHERE time=\'{date}\';' + c.execute(sql) + sql = f'DELETE FROM `beds` WHERE time=\'{date}\';' + c.execute(sql) + sql = f'DELETE FROM `tests` WHERE time=\'{date}\';' + c.execute(sql) + db_conn.commit() + db_conn.close() + + +def dedup(): + data = get_all_from_db() + # get first date + starting_date = data[-1]['last_updated'].split(' ')[0] + for i in range(len(data)-2, 0, -1): + if data[i]['last_updated'].split(' ')[0] != starting_date: + starting_date = data[i]['last_updated'].split(' ')[0] + else: + drop_by_date(data[i]['last_updated']) + print('dropped ' + data[i]['last_updated']) + +dedup() \ No newline at end of file -- cgit v1.2.3