diff options
author | Galen Guyer <galen@galenguyer.com> | 2023-01-23 12:42:16 -0500 |
---|---|---|
committer | Galen Guyer <galen@galenguyer.com> | 2023-01-23 12:42:16 -0500 |
commit | a237eb8334dfc180fc1cecff06a849efe51f603c (patch) | |
tree | fbad762db13841d80ae170986878f082dcc74492 | |
parent | 6e97c01eec4c2ce669b7b22b8edbc8669fcbac49 (diff) |
move fixing comment file to load_comments function after file check
-rw-r--r-- | src/load.rs | 15 | ||||
-rw-r--r-- | src/main.rs | 20 |
2 files changed, 14 insertions, 21 deletions
diff --git a/src/load.rs b/src/load.rs index 7c69012..cc35c24 100644 --- a/src/load.rs +++ b/src/load.rs @@ -5,6 +5,8 @@ use itertools::Itertools; use sqlx::{QueryBuilder, Sqlite, SqlitePool}; use std::fs::File; use std::io::BufRead; +use regex::Regex; +use std::fs; const INSERT_AMATEUR_SQL: &str = include_str!("sql/insert-amateur.sql"); const INSERT_COMMENT_SQL: &str = include_str!("sql/insert-comment.sql"); @@ -109,7 +111,18 @@ pub async fn load_comments(db: &SqlitePool, clear_first: bool) { println!("CO.dat not found, skipping"); return; } - let comments_file = comments_file.unwrap(); + + // Some idiot at the FCC decided that unescaped newlines in the middle of a field were cool + // Uncle Ted may have had some good ideas after all + let comments_regex = Regex::new(r"\s*\r\r\n").unwrap(); + let comments = fs::read_to_string("CO.dat").expect("Error reading file"); + fs::write( + "CO.dat", + comments_regex.replace_all(&comments, " ").to_string(), + ) + .expect("Error writing file"); + + let comments_file = File::open("CO.dat").unwrap(); // let comments_file_meta = fs::metadata("CO.dat").expect("Error getting file metadata"); let line_count = std::io::BufReader::new(&comments_file).lines().count(); drop(comments_file); diff --git a/src/main.rs b/src/main.rs index adbf18b..4ca7130 100644 --- a/src/main.rs +++ b/src/main.rs @@ -127,16 +127,6 @@ async fn load_weekly(db: &SqlitePool) -> chrono::DateTime<Utc> { unzip_file(&output_file).expect("Error unzipping file"); std::fs::remove_file("counts").expect("Error deleting counts file"); - // Some idiot at the FCC decided that unescaped newlines in the middle of a field were cool - // Uncle Ted may have had some good ideas after all - let comments_regex = Regex::new(r"\s*\r\r\n").unwrap(); - let comments = fs::read_to_string("CO.dat").expect("Error reading file"); - fs::write( - "CO.dat", - comments_regex.replace_all(&comments, " ").to_string(), - ) - .expect("Error writing file"); - // This is somehow worse, newlines can either be \n (more common) OR \r\n. // The first one is easy, if there's a newline without a preceeding carriage return, it's bad and should be gone // CRLF is what's normally used, however the last character of every entry is either R, P, T, or |, so if there's a CRLF @@ -189,16 +179,6 @@ async fn load_daily(url: &str, db: &SqlitePool) -> chrono::DateTime<Utc> { unzip_file(&output_file).expect("Error unzipping file"); std::fs::remove_file("counts").expect("Error deleting counts file"); - // Some idiot at the FCC decided that unescaped newlines in the middle of a field were cool - // Uncle Ted may have had some good ideas after all - let comments_regex = Regex::new(r"\s*\r\r\n").unwrap(); - let comments = fs::read_to_string("CO.dat").expect("Error reading file"); - fs::write( - "CO.dat", - comments_regex.replace_all(&comments, " ").to_string(), - ) - .expect("Error writing file"); - load::load_amateurs(db, false).await; load::load_comments(db, false).await; load::load_entities(db, false).await; |