aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGalen Guyer <galen@galenguyer.com>2020-11-05 15:40:21 -0500
committerGalen Guyer <galen@galenguyer.com>2020-11-05 15:40:21 -0500
commitf8628ca4473f655e05d031e312dd5e7f92ff148c (patch)
tree9ddded40552f2a6b535af6a55c136d7cdc051ae2
parent4bad72d853e05da08126ae24d353bfbed2c42d94 (diff)
move dedup into poller to run on insert
-rw-r--r--poller/__init__.py3
-rw-r--r--poller/dedup.py (renamed from dedup.py)21
2 files changed, 15 insertions, 9 deletions
diff --git a/poller/__init__.py b/poller/__init__.py
index 20e5cc8..f9d0d62 100644
--- a/poller/__init__.py
+++ b/poller/__init__.py
@@ -10,6 +10,8 @@ from flask import Flask, jsonify
import requests
from bs4 import BeautifulSoup
+import dedup from .dedup
+
POOL_TIME = 5 * 60 # Seconds
DASHBOARD_URL = 'https://rit.edu/ready/dashboard'
LATEST_DATA = None
@@ -64,6 +66,7 @@ def update_db():
c.execute(sql)
db_conn.commit()
db_conn.close()
+ dedup()
def get_latest_from_db():
with db_lock:
diff --git a/dedup.py b/poller/dedup.py
index cff72bb..9db5886 100644
--- a/dedup.py
+++ b/poller/dedup.py
@@ -59,12 +59,15 @@ def drop_by_date(date):
db_conn.close()
-data = get_all_from_db()
-# get first date
-starting_date = data[-1]['last_updated'].split(' ')[0]
-for i in range(len(data)-2, 0, -1):
- if data[i]['last_updated'].split(' ')[0] != starting_date:
- starting_date = data[i]['last_updated'].split(' ')[0]
- else:
- drop_by_date(data[i]['last_updated'])
- print('dropped ' + data[i]['last_updated'])
+def dedup():
+ data = get_all_from_db()
+ # get first date
+ starting_date = data[-1]['last_updated'].split(' ')[0]
+ for i in range(len(data)-2, 0, -1):
+ if data[i]['last_updated'].split(' ')[0] != starting_date:
+ starting_date = data[i]['last_updated'].split(' ')[0]
+ else:
+ drop_by_date(data[i]['last_updated'])
+ print('dropped ' + data[i]['last_updated'])
+
+dedup() \ No newline at end of file