diff options
author | Galen Guyer <galen@galenguyer.com> | 2022-11-09 14:24:35 -0500 |
---|---|---|
committer | Galen Guyer <galen@galenguyer.com> | 2022-11-09 14:24:35 -0500 |
commit | 2d369519777ea969262dd5b22a8042415ecfe0a8 (patch) | |
tree | 407bdf7893f69961d7a03ad05308f21f866e72d0 | |
parent | cf660aee65c50eff461eaaf13852ae2d4d907842 (diff) |
Proper date serialization/deserialization and indexes
-rw-r--r-- | .gitignore | 3 | ||||
-rw-r--r-- | Cargo.lock | 1 | ||||
-rw-r--r-- | Cargo.toml | 2 | ||||
-rw-r--r-- | migrations/01-create-db.sql | 4 | ||||
-rw-r--r-- | migrations/02-create-indexes.sql | 33 | ||||
-rw-r--r-- | src/main.rs | 90 |
6 files changed, 115 insertions, 18 deletions
@@ -3,4 +3,5 @@ *.zip *.dat counts -*.db
\ No newline at end of file +*.db +*.db.zst @@ -1067,6 +1067,7 @@ dependencies = [ "bitflags", "byteorder", "bytes", + "chrono", "crc", "crossbeam-queue", "dotenvy", @@ -12,7 +12,7 @@ filetime = "0.2.18" indicatif = "0.17.1" regex = "1.7.0" serde = { version = "1.0.147", features = ["derive"] } -sqlx = { version = "0.6.2", features = ["sqlite", "runtime-tokio-rustls"] } +sqlx = { version = "0.6.2", features = ["sqlite", "runtime-tokio-rustls", "chrono"] } tokio = { version = "1.21.2", features = ["full"] } ureq = "2.5.0" zip = { version = "0.6.3", default-features = false, features = ["deflate"] } diff --git a/migrations/01-create-db.sql b/migrations/01-create-db.sql index ce7cd30..573952b 100644 --- a/migrations/01-create-db.sql +++ b/migrations/01-create-db.sql @@ -1,3 +1,5 @@ +.echo on + CREATE TABLE migrations ( id SERIAL PRIMARY KEY, name VARCHAR(255) NOT NULL, @@ -169,4 +171,4 @@ CREATE TABLE special_conditions_free_form ( status_date datetime ); INSERT INTO migrations (name) -VALUES ('01-create-db.sql');
\ No newline at end of file +VALUES ('01-create-db.sql'); diff --git a/migrations/02-create-indexes.sql b/migrations/02-create-indexes.sql new file mode 100644 index 0000000..903bfd8 --- /dev/null +++ b/migrations/02-create-indexes.sql @@ -0,0 +1,33 @@ +.echo on + +CREATE INDEX idx_amateurs_unique_system_identifier ON amateurs (unique_system_identifier); +CREATE INDEX idx_amateurs_call_sign ON amateurs (call_sign); +CREATE INDEX idx_amateurs_operator_class ON amateurs (operator_class); + +CREATE INDEX idx_comments_unique_system_identifier ON comments (unique_system_identifier); +CREATE INDEX idx_comments_call_sign ON comments (call_sign); + +CREATE INDEX idx_entities_unique_system_identifier ON entities (unique_system_identifier); +CREATE INDEX idx_entities_call_sign ON entities (call_sign); +CREATE INDEX idx_entities_entity_name ON entities (entity_name); +CREATE INDEX idx_entities_first_name ON entities (first_name); +CREATE INDEX idx_entities_last_name ON entities (last_name); +CREATE INDEX idx_entities_phone ON entities (phone); +CREATE INDEX idx_entities_email ON entities (email); +CREATE INDEX idx_entities_street_address ON entities (street_address); +CREATE INDEX idx_entities_city ON entities (city); +CREATE INDEX idx_entities_state ON entities (state); +CREATE INDEX idx_entities_zip_code ON entities (zip_code); +CREATE INDEX idx_entities_frn ON entities (frn); + +CREATE INDEX idx_headers_unique_system_identifier ON headers (unique_system_identifier); +CREATE INDEX idx_headers_call_sign ON headers (call_sign); +CREATE INDEX idx_headers_license_status ON headers (license_status); +CREATE INDEX idx_headers_grant_date ON headers (grant_date); +CREATE INDEX idx_headers_expired_date ON headers (expired_date); +CREATE INDEX idx_headers_certifier_first_name ON headers (certifier_first_name); +CREATE INDEX idx_headers_certifier_last_name ON headers (certifier_last_name); + + +CREATE INDEX idx_history_unique_system_identifier ON history (unique_system_identifier); +CREATE INDEX idx_history_call_sign ON history (call_sign); diff --git a/src/main.rs b/src/main.rs index 9487823..637333a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,4 @@ -use chrono::DateTime; +use chrono::{DateTime, NaiveDate}; use filetime::{self, FileTime}; use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle}; use regex::Regex; @@ -19,6 +19,53 @@ const INSERT_LICENSE_ATTACHMENT_SQL: &str = r"INSERT INTO license_attachments (r const INSERT_SPECIAL_CONDITION_SQL: &str = r"INSERT INTO special_conditions (record_type, unique_system_identifier, uls_file_number, ebf_number, call_sign, special_conditions_type, special_conditions_code, status_code, status_date) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)"; const INSERT_SPECIAL_CONDITION_FREE_FORM_SQL: &str = r"INSERT INTO special_conditions_free_form (record_type, unique_system_identifier, uls_file_number, ebf_number, call_sign, license_free_form_type, unique_license_free_form_identifier, sequence_number, license_free_form_condition, status_code, status_date) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"; +#[allow(dead_code)] +mod fcc_date { + use chrono::NaiveDate; + use serde::{self, Deserialize, Deserializer, Serializer}; + + const FCC_FORMAT: &str = "%m/%d/%Y"; + const SQL_FORMAT: &str = "%Y-%m-%d"; + + // The signature of a serialize_with function must follow the pattern: + // + // fn serialize<S>(&T, S) -> Result<S::Ok, S::Error> + // where + // S: Serializer + // + // although it may also be generic over the input types T. + pub fn serialize<S>(date: &Option<NaiveDate>, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + let s = match date { + Some(date) => date.format(SQL_FORMAT).to_string(), + None => "".to_string(), + }; + serializer.serialize_str(&s) + } + + // The signature of a deserialize_with function must follow the pattern: + // + // fn deserialize<'de, D>(D) -> Result<T, D::Error> + // where + // D: Deserializer<'de> + // + // although it may also be generic over the output types T. + pub fn deserialize<'de, D>(deserializer: D) -> Result<Option<NaiveDate>, D::Error> + where + D: Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + if s == "" { + return Ok(None); + } + NaiveDate::parse_from_str(&s, FCC_FORMAT) + .map(|date| Some(date)) + .map_err(serde::de::Error::custom) + } +} + #[allow(dead_code, non_snake_case)] #[derive(Debug, Deserialize)] struct Amateur<'a> { @@ -48,10 +95,12 @@ struct Comment<'a> { pub UniqueSystemIdentifier: &'a str, pub UlsFileNumber: &'a str, pub CallSign: &'a str, - pub CommentDate: &'a str, + #[serde(with = "fcc_date")] + pub CommentDate: Option<NaiveDate>, pub Description: &'a str, pub StatusCode: &'a str, - pub StatusDate: &'a str, + #[serde(with = "fcc_date")] + pub StatusDate: Option<NaiveDate>, } #[allow(dead_code, non_snake_case)] #[derive(Deserialize, Debug)] @@ -82,7 +131,8 @@ struct Entity<'a> { pub ApplicantTypeCode: &'a str, pub ApplicantTypeCodeOther: &'a str, pub StatusCode: &'a str, - pub StatusDate: &'a str, + #[serde(with = "fcc_date")] + pub StatusDate: Option<NaiveDate>, pub ThreePointSevenGhzLicenseType: &'a str, pub LinkedUniqueSystemIdentifier: &'a str, pub LinkedCallsign: &'a str, @@ -98,9 +148,12 @@ struct Header<'a> { pub CallSign: &'a str, pub LicenseStatus: &'a str, pub RadioServiceCode: &'a str, - pub GrantDate: &'a str, - pub ExpiredDate: &'a str, - pub CancellationDate: &'a str, + #[serde(with = "fcc_date")] + pub GrantDate: Option<NaiveDate>, + #[serde(with = "fcc_date")] + pub ExpiredDate: Option<NaiveDate>, + #[serde(with = "fcc_date")] + pub CancellationDate: Option<NaiveDate>, pub EligibilityRuleNumber: &'a str, pub Reserved: &'a str, pub Alien: &'a str, @@ -133,8 +186,10 @@ struct Header<'a> { pub Asian: &'a str, pub White: &'a str, pub Hispanic: &'a str, - pub EffectiveDate: &'a str, - pub LastActionDate: &'a str, + #[serde(with = "fcc_date")] + pub EffectiveDate: Option<NaiveDate>, + #[serde(with = "fcc_date")] + pub LastActionDate: Option<NaiveDate>, pub AuctionId: Option<i32>, pub BroadcastServicesRegulatoryStatus: &'a str, pub BandManagerRegulatoryStatus: &'a str, @@ -158,7 +213,8 @@ struct History<'a> { pub UniqueSystemIdentifier: &'a str, pub UlsFileNumber: &'a str, pub CallSign: &'a str, - pub LogDate: &'a str, + #[serde(with = "fcc_date")] + pub LogDate: Option<NaiveDate>, pub Code: &'a str, } #[allow(dead_code, non_snake_case)] @@ -169,7 +225,8 @@ struct LicenseAttachment<'a> { pub CallSign: &'a str, pub AttachmentCode: &'a str, pub AttachmentDescription: &'a str, - pub AttachmentDate: &'a str, + #[serde(with = "fcc_date")] + pub AttachmentDate: Option<NaiveDate>, pub AttachmentFileName: &'a str, pub ActionPerformed: &'a str, } @@ -184,7 +241,8 @@ struct SpecialCondition<'a> { pub SpecialConditionType: &'a str, pub SpecialConditionCode: Option<i32>, pub StatusCode: &'a str, - pub StatusDate: &'a str, + #[serde(with = "fcc_date")] + pub StatusDate: Option<NaiveDate>, } #[allow(dead_code, non_snake_case)] #[derive(Deserialize, Debug)] @@ -199,7 +257,8 @@ struct SpecialConditionFreeForm<'a> { pub SequenceNumber: Option<i32>, pub LicenseFreeFormCondition: &'a str, pub StatusCode: &'a str, - pub StatusDate: &'a str, + #[serde(with = "fcc_date")] + pub StatusDate: Option<NaiveDate>, } fn download_file() -> Result<File, ()> { let resp = ureq::get(WEEKLY_DUMP_URL) @@ -694,7 +753,7 @@ async fn load_license_attachments(db: &Pool<Sqlite>) { let line = line.expect("Error reading entry"); let attachment: LicenseAttachment = line.deserialize(None).expect("Error deserializing entry"); - let statement = sqlx::query(INSERT_SPECIAL_CONDITION_SQL); + let statement = sqlx::query(INSERT_LICENSE_ATTACHMENT_SQL); statement .bind(attachment.RecordType) .bind(attachment.UniqueSystemIdentifier) @@ -828,7 +887,8 @@ async fn load_special_conditions_free_form(db: &Pool<Sqlite>) { #[tokio::main] async fn main() { - let output_file = download_file().expect("Error downloading file"); + // let output_file = download_file().expect("Error downloading file"); + let output_file = File::open("l_amat.zip").expect("Error opening file"); unzip_file(output_file).expect("Error unzipping file"); |