From 7389290d80a2a3cfa860b3a29a715a50af805331 Mon Sep 17 00:00:00 2001 From: Dmitry Date: Tue, 4 Oct 2022 22:58:57 +0300 Subject: [PATCH] SourceQuestionsZipReader: Iterator --- src/main.rs | 153 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 123 insertions(+), 30 deletions(-) diff --git a/src/main.rs b/src/main.rs index 73d206d..bc1e6b4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -267,17 +267,104 @@ impl From for Vec { } } -// measure and return time elapsed in `func` in seconds -pub fn measure(func: F) -> f64 { - let start = Instant::now(); - func(); - let elapsed = start.elapsed(); - (elapsed.as_secs() as f64) + (elapsed.subsec_nanos() as f64 / 1_000_000_000.0) +struct SourceQuestionsZipReader +where + R: std::io::Read + std::io::Seek, +{ + zipfile: zip::ZipArchive, + index: Option, } -pub fn measure_and_print(func: F) { - let m = measure(func); - eprintln!("{}", m); +impl SourceQuestionsZipReader +where + R: std::io::Read + std::io::Seek, +{ + fn new(zipfile: zip::ZipArchive) -> Self { + SourceQuestionsZipReader { + zipfile, + index: None, + } + } +} + +impl Iterator for SourceQuestionsZipReader +where + R: std::io::Read + std::io::Seek, +{ + type Item = (String, Result); + + fn next(&mut self) -> Option { + if self.index.is_none() && !self.zipfile.is_empty() { + self.index = Some(0); + } + + match self.index { + Some(i) if i < self.zipfile.len() => { + self.index = Some(i + 1); + + self.nth(i) + } + _ => None, + } + } + + fn nth(&mut self, n: usize) -> Option { + if self.zipfile.len() <= n { + return None; + } + self.index = Some(n + 1); + + let file = self.zipfile.by_index(n).unwrap(); + let name = file.mangled_name(); + let name_str = name.to_str().unwrap(); + + let data: Result = serde_json::from_reader(file); + + Some((String::from(name_str), data)) + } + + fn size_hint(&self) -> (usize, Option) { + let len = self.zipfile.len(); + let index = self.index.unwrap_or(0); + let rem = if len > index + 1 { + len - (index + 1) + } else { + 0 + }; + (rem, Some(rem)) + } + + fn count(self) -> usize + where + Self: Sized, + { + self.zipfile.len() + } +} + +impl ExactSizeIterator for SourceQuestionsZipReader +where + R: std::io::Read + std::io::Seek, +{ + fn len(&self) -> usize { + self.zipfile.len() + } +} + +trait ReadSourceQuestionsBatches +where + R: std::io::Read + std::io::Seek, +{ + fn source_questions(self) -> SourceQuestionsZipReader; +} + +impl ReadSourceQuestionsBatches for zip::ZipArchive +where + R: std::io::Read + std::io::Seek, +{ + fn source_questions(self) -> SourceQuestionsZipReader { + SourceQuestionsZipReader::new(self) + } } fn write_db() { @@ -292,7 +379,7 @@ fn write_db() { let zip_file = fs::File::open(ZIP_FILENAME).unwrap(); let zip_reader = io::BufReader::new(zip_file); - let mut archive = zip::ZipArchive::new(zip_reader).unwrap(); + let archive = zip::ZipArchive::new(zip_reader).unwrap(); let options: Options = serde_json::from_value(json!({ "map_size": 900 * 1024 * 1024, // 900mb @@ -311,16 +398,8 @@ fn write_db() { let mut count: usize = 0; let count = &mut count; - (0..archive.len()) - .map(|i| { - let file = archive.by_index(i).unwrap(); - let name = file.mangled_name(); - let name_str = name.to_str().unwrap(); - - let data: Result = serde_json::from_reader(file); - - (String::from(name_str), data) - }) + archive + .source_questions() .filter(|(_, data)| data.is_ok()) .flat_map(|(filename, data)| { let mut data = data.unwrap(); @@ -354,19 +433,20 @@ where println!("{:#?}", q) } -fn read_from_zip(mut file_num: usize, mut num: usize) -> Option { +fn read_from_zip(file_num: usize, mut num: usize) -> Option { + let mut rng = rand::thread_rng(); let zip_file = fs::File::open(ZIP_FILENAME).unwrap(); let zip_reader = io::BufReader::new(zip_file); - let mut archive = zip::ZipArchive::new(zip_reader).unwrap(); + let archive = zip::ZipArchive::new(zip_reader).unwrap(); - let mut rng = rand::thread_rng(); - if file_num == 0 { - file_num = (1..=archive.len()).choose(&mut rng).unwrap(); - } - let file = archive.by_index(file_num - 1).unwrap(); - let data: Result = serde_json::from_reader(file); - let data = data.unwrap(); - let questions: Vec = data.into(); + let mut source_questions = archive.source_questions(); + let (_, batch) = if file_num == 0 { + source_questions.choose(&mut rng).unwrap() + } else { + source_questions.nth(file_num - 1).unwrap() + }; + let batch = batch.unwrap(); + let questions: Vec = batch.into(); if num == 0 { num = (1..=questions.len()).choose(&mut rng).unwrap(); @@ -413,6 +493,19 @@ fn read_from_db(mut id: u32) -> Option { collection.get::(id).unwrap() } +// measure and return time elapsed in `func` in seconds +pub fn measure(func: F) -> f64 { + let start = Instant::now(); + func(); + let elapsed = start.elapsed(); + (elapsed.as_secs() as f64) + (elapsed.subsec_nanos() as f64 / 1_000_000_000.0) +} + +pub fn measure_and_print(func: F) { + let m = measure(func); + eprintln!("{}", m); +} + fn main() { let args = Cli::parse();