diff options
author | Galen Guyer <galen@galenguyer.com> | 2022-12-22 18:10:11 -0800 |
---|---|---|
committer | Galen Guyer <galen@galenguyer.com> | 2022-12-22 18:10:11 -0800 |
commit | 9e57b57d7fefcdc599e463f6c964fe0ca87a1caf (patch) | |
tree | d8d764de2365881cf296d3948ab608f281b509b9 | |
parent | 0c15b0d4a2ee3cd52b4d9aecfcf007cd76a5b06d (diff) |
Handle weekly updates
-rw-r--r-- | Cargo.lock | 23 | ||||
-rw-r--r-- | Cargo.toml | 6 | ||||
-rw-r--r-- | migrations/01-create-db.sql | 5 | ||||
-rw-r--r-- | src/file.rs | 6 | ||||
-rw-r--r-- | src/load.rs | 17 | ||||
-rw-r--r-- | src/main.rs | 134 | ||||
-rw-r--r-- | src/meta.rs | 35 | ||||
-rw-r--r-- | src/types.rs | 10 |
8 files changed, 199 insertions, 37 deletions
@@ -134,14 +134,15 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.22" +version = "0.4.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfd4d1b31faaa3a89d7934dbded3111da0d2ef28e3ebccdb4f0179f5929d1ef1" +checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f" dependencies = [ "iana-time-zone", "js-sys", "num-integer", "num-traits", + "serde", "time", "wasm-bindgen", "winapi", @@ -965,18 +966,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.147" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d193d69bae983fc11a79df82342761dfbf28a99fc8d203dca4c3c1b590948965" +checksum = "256b9932320c590e707b94576e3cc1f7c9024d0ee6612dfbcf1cb106cbe8e055" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.147" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f1d362ca8fc9c3e3a7484440752472d68a6caa98f1ab81d99b5dfe517cec852" +checksum = "b4eae9b04cbffdfd550eb462ed33bc6a1b68c935127d008b27444d08380f94e4" dependencies = [ "proc-macro2", "quote", @@ -1153,9 +1154,9 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.103" +version = "1.0.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a864042229133ada95abf3b54fdc62ef5ccabe9515b64717bcb9a1919e59445d" +checksum = "60b9b43d45702de4c839cb9b51d9f529c5dd26a4aff255b42b1ebc03e88ee908" dependencies = [ "proc-macro2", "quote", @@ -1229,9 +1230,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "tokio" -version = "1.21.2" +version = "1.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9e03c497dc955702ba729190dc4aac6f2a0ce97f913e5b1b5912fc5039d9099" +checksum = "eab6d665857cc6ca78d6e80303a02cea7a7851e85dfbd77cbdc09bd129f1ef46" dependencies = [ "autocfg", "bytes", @@ -1244,7 +1245,7 @@ dependencies = [ "signal-hook-registry", "socket2", "tokio-macros", - "winapi", + "windows-sys", ] [[package]] @@ -6,14 +6,14 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -chrono = "0.4.22" +chrono = {version = "0.4.23", features = ["serde"]} csv = "1.1.6" filetime = "0.2.18" indicatif = "0.17.2" itertools = "0.10.5" regex = "1.7.0" -serde = { version = "1.0.147", features = ["derive"] } +serde = { version = "1.0.149", features = ["derive"] } sqlx = { version = "0.6.2", features = ["sqlite", "runtime-tokio-rustls", "chrono"] } -tokio = { version = "1.21.2", features = ["full"] } +tokio = { version = "1.23.0", features = ["full"] } ureq = "2.5.0" zip = { version = "0.6.3", default-features = false, features = ["deflate"] } diff --git a/migrations/01-create-db.sql b/migrations/01-create-db.sql index d0c0cee..247a3d8 100644 --- a/migrations/01-create-db.sql +++ b/migrations/01-create-db.sql @@ -1,9 +1,10 @@ .echo on CREATE TABLE IF NOT EXISTS updates ( - id SERIAL PRIMARY KEY, + id INTEGER PRIMARY KEY, daily BOOLEAN NOT NULL, - date datetime NOT NULL, + weekly BOOLEAN NOT NULL, + date datetime NOT NULL ); CREATE TABLE IF NOT EXISTS amateurs ( diff --git a/src/file.rs b/src/file.rs index 3dcccd5..c8d8d3d 100644 --- a/src/file.rs +++ b/src/file.rs @@ -142,12 +142,16 @@ pub fn download_file(url: &str, file_name: Option<&str>) -> Result<File, ()> { } output_file.flush().expect("Error flushing output file"); + if let Some(modified) = last_modified { + filetime::set_file_mtime(&output_file_name, FileTime::from_unix_time(modified, 0)) + .expect("Error setting file mtime"); + } progress_bar.finish(); Ok(fs::File::open(&output_file_name).expect("Error opening output file")) } -pub fn unzip_file(zip_file: File) -> Result<(), ()> { +pub fn unzip_file(zip_file: &File) -> Result<(), ()> { let mut archive = zip::ZipArchive::new(zip_file).expect("Error opening zip archive"); let progress_bar = ProgressBar::new(archive.len().try_into().unwrap()); diff --git a/src/load.rs b/src/load.rs index cde0d62..b3bb6ee 100644 --- a/src/load.rs +++ b/src/load.rs @@ -92,6 +92,7 @@ pub async fn load_amateurs(db: &SqlitePool) { .commit() .await .expect("Error committing transaction"); + std::fs::remove_file("AM.dat").expect("Error deleting AM.dat"); progress_bar.finish(); } @@ -157,7 +158,7 @@ pub async fn load_comments(db: &SqlitePool) { .commit() .await .expect("Error committing transaction"); - + std::fs::remove_file("CO.dat").expect("Error deleting CO.dat"); progress_bar.finish(); } @@ -245,6 +246,7 @@ pub async fn load_entities(db: &SqlitePool) { .commit() .await .expect("Error committing transaction"); + std::fs::remove_file("EN.dat").expect("Error deleting EN.dat"); progress_bar.finish(); } @@ -361,7 +363,7 @@ pub async fn load_headers(db: &SqlitePool) { .commit() .await .expect("Error committing transaction"); - + std::fs::remove_file("HD.dat").expect("Error deleting HD.dat"); progress_bar.finish(); } @@ -425,7 +427,7 @@ pub async fn load_history(db: &SqlitePool) { .commit() .await .expect("Error committing transaction"); - + std::fs::remove_file("HS.dat").expect("Error deleting HS.dat"); progress_bar.finish(); } @@ -493,7 +495,7 @@ pub async fn load_license_attachments(db: &SqlitePool) { .commit() .await .expect("Error committing transaction"); - + std::fs::remove_file("LA.dat").expect("Error deleting LA.dat"); progress_bar.finish(); } @@ -562,7 +564,7 @@ pub async fn load_special_conditions(db: &SqlitePool) { .commit() .await .expect("Error committing transaction"); - + std::fs::remove_file("SC.dat").expect("Error deleting SC.dat"); progress_bar.finish(); } @@ -633,7 +635,7 @@ pub async fn load_special_conditions_free_form(db: &SqlitePool) { .commit() .await .expect("Error committing transaction"); - + std::fs::remove_file("SF.dat").expect("Error deleting SF.dat"); progress_bar.finish(); } @@ -702,6 +704,7 @@ pub async fn load_special_condition_codes(db: &SqlitePool) { .commit() .await .expect("Error committing transaction"); - + std::fs::remove_file("special_condition_codes.txt") + .expect("Error deleting special_condition_codes.txt"); progress_bar.finish(); } diff --git a/src/main.rs b/src/main.rs index a240dd1..4152dbf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,17 +1,78 @@ +use chrono::{DateTime, Utc}; use regex::Regex; use sqlx::sqlite::SqlitePool; -use std::fs; +use std::{fs, os::unix::prelude::MetadataExt, time::Duration}; mod fcc_date; mod file; mod load; +mod meta; mod types; use file::{download_file, unzip_file}; +use types::Update; const WEEKLY_DUMP_URL: &str = "https://data.fcc.gov/download/pub/uls/complete/l_amat.zip"; +const SUNDAY_DUMP_URL: &str = "https://data.fcc.gov/download/pub/uls/daily/l_am_sun.zip"; +const MONDAY_DUMP_URL: &str = "https://data.fcc.gov/download/pub/uls/daily/l_am_mon.zip"; +const TUESDAY_DUMP_URL: &str = "https://data.fcc.gov/download/pub/uls/daily/l_am_tue.zip"; +const WEDNESDAY_DUMP_URL: &str = "https://data.fcc.gov/download/pub/uls/daily/l_am_wed.zip"; +const THURSDAY_DUMP_URL: &str = "https://data.fcc.gov/download/pub/uls/daily/l_am_thu.zip"; +const FRIDAY_DUMP_URL: &str = "https://data.fcc.gov/download/pub/uls/daily/l_am_fri.zip"; +const SATURDAY_DUMP_URL: &str = "https://data.fcc.gov/download/pub/uls/daily/l_am_sat.zip"; const SPECIAL_CONDITIONS_URL: &str = "https://www.fcc.gov/file/20669/download"; -async fn load_weekly(db: &SqlitePool) { +#[allow(dead_code)] +#[derive(Debug)] +struct FccUpdates { + weekly: Option<DateTime<Utc>>, + sunday: Option<DateTime<Utc>>, + monday: Option<DateTime<Utc>>, + tuesday: Option<DateTime<Utc>>, + wednesday: Option<DateTime<Utc>>, + thursday: Option<DateTime<Utc>>, + friday: Option<DateTime<Utc>>, + saturday: Option<DateTime<Utc>>, +} +impl FccUpdates { + fn new() -> Self { + Self { + weekly: get_last_updated_header(WEEKLY_DUMP_URL), + sunday: get_last_updated_header(SUNDAY_DUMP_URL), + monday: get_last_updated_header(MONDAY_DUMP_URL), + tuesday: get_last_updated_header(TUESDAY_DUMP_URL), + wednesday: get_last_updated_header(WEDNESDAY_DUMP_URL), + thursday: get_last_updated_header(THURSDAY_DUMP_URL), + friday: get_last_updated_header(FRIDAY_DUMP_URL), + saturday: get_last_updated_header(SATURDAY_DUMP_URL), + } + } +} + +fn get_last_updated_header(url: &str) -> Option<DateTime<Utc>> { + let resp = ureq::head(url).call().expect("Error downloading file"); + + assert!(resp.has("Content-Length")); + let len = resp + .header("Content-Length") + .unwrap_or("0") + .parse() + .unwrap_or(0); + + // This is the size given when there's no updates + if len <= 211 { + return None; + } + + match resp.header("Last-Modified") { + Some(last_mod) => match DateTime::parse_from_rfc2822(last_mod) { + Ok(dt) => Some(dt.into()), + Err(_) => None, + }, + None => None, + } +} + +async fn load_weekly(db: &SqlitePool) -> chrono::DateTime<Utc> { let output_file = download_file(WEEKLY_DUMP_URL, None).expect("Error downloading weekly dump file"); // Hardcoding this file name because it might change and I don't want to deal with that @@ -19,7 +80,8 @@ async fn load_weekly(db: &SqlitePool) { download_file(SPECIAL_CONDITIONS_URL, Some("special_condition_codes.txt")) .expect("Error downloading Special Conditions file"); - unzip_file(output_file).expect("Error unzipping file"); + unzip_file(&output_file).expect("Error unzipping file"); + std::fs::remove_file("counts").expect("Error deleting counts file"); // Some idiot at the FCC decided that unescaped newlines in the middle of a field were cool // Uncle Ted may have had some good ideas after all @@ -43,16 +105,23 @@ async fn load_weekly(db: &SqlitePool) { ) .expect("Error writing file"); - load::load_amateurs(&db).await; - load::load_comments(&db).await; - load::load_entities(&db).await; - load::load_headers(&db).await; - load::load_history(&db).await; - load::load_license_attachments(&db).await; - load::load_special_conditions(&db).await; - load::load_special_conditions_free_form(&db).await; + load::load_amateurs(db).await; + load::load_comments(db).await; + load::load_entities(db).await; + load::load_headers(db).await; + load::load_history(db).await; + load::load_license_attachments(db).await; + load::load_special_conditions(db).await; + load::load_special_conditions_free_form(db).await; + + load::load_special_condition_codes(db).await; - load::load_special_condition_codes(&db).await; + let meta = output_file.metadata().unwrap(); + std::fs::remove_file("l_amat.zip").expect("Error deleting l_amat.zip"); + + DateTime::<Utc>::from( + std::time::UNIX_EPOCH + Duration::from_secs(meta.mtime().try_into().unwrap()), + ) } #[tokio::main] @@ -61,5 +130,44 @@ async fn main() { .await .expect("Error connecting to database"); - load_weekly(&db).await; + let fcc_updates = dbg!(FccUpdates::new()); + + let last_weekly = meta::get_last_update(&db, meta::UpdateType::Weekly) + .await + .expect("Error getting last weekly update"); + + // if this is the first time the database is being updated + if last_weekly.is_none() { + println!("No weekly updates found, loading weekly dump"); + let update_date = load_weekly(&db).await; + meta::insert_update( + &db, + &Update { + id: 0, // placeholder + daily: false, + weekly: true, + date: update_date, + }, + ) + .await + .expect("Error inserting update"); + return; + } + let last_weekly = last_weekly.unwrap(); + + if fcc_updates.weekly.is_some() && fcc_updates.weekly.unwrap() > last_weekly.date { + println!("New weekly update found, loading weekly dump"); + let update_date = load_weekly(&db).await; + meta::insert_update( + &db, + &Update { + id: 0, // placeholder + daily: false, + weekly: true, + date: update_date, + }, + ) + .await + .expect("Error inserting update"); + } } diff --git a/src/meta.rs b/src/meta.rs new file mode 100644 index 0000000..6878afc --- /dev/null +++ b/src/meta.rs @@ -0,0 +1,35 @@ +use crate::types::Update; + +use sqlx::SqlitePool; + +#[allow(dead_code)] +pub enum UpdateType { + Daily, + Weekly, + Any, +} + +pub async fn get_last_update( + db: &SqlitePool, + update_type: UpdateType, +) -> Result<Option<Update>, sqlx::Error> { + let query_str = match update_type { + UpdateType::Daily => "SELECT * FROM updates WHERE daily = 1 ORDER BY id DESC LIMIT 1", + UpdateType::Weekly => "SELECT * FROM updates WHERE weekly = 1 ORDER BY id DESC LIMIT 1", + UpdateType::Any => "SELECT * FROM updates ORDER BY id DESC LIMIT 1", + }; + let update = sqlx::query_as::<_, Update>(query_str) + .fetch_optional(db) + .await?; + Ok(update) +} + +pub async fn insert_update(db: &SqlitePool, update: &Update) -> Result<(), sqlx::Error> { + sqlx::query("INSERT INTO updates (daily, weekly, date) VALUES (?, ?, ?)") + .bind(update.daily) + .bind(update.weekly) + .bind(update.date) + .execute(db) + .await?; + Ok(()) +} diff --git a/src/types.rs b/src/types.rs index 9d8fcf3..09e5d06 100644 --- a/src/types.rs +++ b/src/types.rs @@ -2,7 +2,17 @@ use crate::fcc_date; use chrono::NaiveDate; +use chrono::{DateTime, Utc}; use serde::Deserialize; +use sqlx::FromRow; + +#[derive(Debug, Deserialize, FromRow)] +pub struct Update { + pub id: u32, + pub daily: bool, + pub weekly: bool, + pub date: DateTime<Utc>, +} #[derive(Debug, Deserialize)] pub struct Amateur<'a> { |