diff --git a/src/main.rs b/src/main.rs index bc1e6b4..c967118 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,13 +1,8 @@ extern crate serde; -#[macro_use] extern crate serde_derive; -// This allows inserting JSON documents #[macro_use] extern crate serde_json; extern crate ledb; -// This allows define typed documents easy -#[macro_use] -extern crate ledb_derive; extern crate ledb_types; extern crate zip; @@ -19,18 +14,15 @@ use std::{fs, io}; use ledb::{Options, Storage}; +mod questions; +mod source; + +use crate::questions::{Question, QuestionsConverter}; +use crate::source::ReadSourceQuestionsBatches; + const ZIP_FILENAME: &str = "json.zip"; const DB_DIR: &str = "db"; -macro_rules! make { - ($Target:ident; by {$($field:ident),+}; from $src:expr) => {$Target {$( - $field: $src.$field - ),+}}; - ($Target:ident; with defaults and by {$($field:ident),+}; from $src:expr) => {$Target {$( - $field: $src.$field - ),+ ,..$Target::default()}} -} - #[derive(Subcommand, Debug)] enum Command { Write, @@ -57,316 +49,6 @@ struct Cli { measure: bool, } -#[derive(Debug, Default, Clone, Serialize, Deserialize)] -struct SourceQuestion { - #[serde(default)] - num: u32, - id: String, - - #[serde(alias = "Вопрос")] - description: String, - #[serde(alias = "Ответ")] - answer: String, - - #[serde(alias = "Автор")] - #[serde(default)] - author: String, - #[serde(alias = "Комментарий")] - #[serde(default)] - comment: String, - #[serde(alias = "Комментарии")] - #[serde(alias = "Инфо")] - #[serde(default)] - comment1: String, - #[serde(alias = "Тур")] - #[serde(default)] - tour: String, - #[serde(alias = "Ссылка")] - #[serde(alias = "URL")] - #[serde(default)] - url: String, - #[serde(alias = "Дата")] - #[serde(default)] - date: String, - #[serde(alias = "Обработан")] - #[serde(default)] - processed_by: String, - #[serde(alias = "Редактор")] - #[serde(default)] - redacted_by: String, - #[serde(alias = "Копирайт")] - #[serde(default)] - copyright: String, - #[serde(alias = "Тема")] - #[serde(default)] - theme: String, - #[serde(alias = "Вид")] - #[serde(alias = "Тип")] - #[serde(default)] - kind: String, - #[serde(alias = "Источник")] - #[serde(default)] - source: String, - #[serde(alias = "Рейтинг")] - #[serde(default)] - rating: String, -} - -#[derive(Debug, Default, Clone, Serialize, Deserialize)] -struct SourceQuestionsBatch { - #[serde(default)] - filename: String, - #[serde(alias = "Пакет")] - #[serde(alias = "Чемпионат")] - description: String, - #[serde(alias = "Автор")] - #[serde(default)] - author: String, - #[serde(alias = "Комментарий")] - #[serde(alias = "Комментарии")] - #[serde(alias = "Инфо")] - #[serde(default)] - comment: String, - #[serde(alias = "Ссылка")] - #[serde(alias = "URL")] - #[serde(default)] - url: String, - #[serde(alias = "Дата")] - #[serde(default)] - date: String, - #[serde(alias = "Обработан")] - #[serde(default)] - processed_by: String, - #[serde(alias = "Редактор")] - #[serde(default)] - redacted_by: String, - #[serde(alias = "Копирайт")] - #[serde(default)] - copyright: String, - #[serde(alias = "Тема")] - #[serde(default)] - theme: String, - #[serde(alias = "Вид")] - #[serde(alias = "Тип")] - #[serde(default)] - kind: String, - #[serde(alias = "Источник")] - #[serde(default)] - source: String, - #[serde(alias = "Рейтинг")] - #[serde(default)] - rating: String, - #[serde(alias = "Вопросы")] - questions: Vec, -} - -#[derive(Debug, Default, Clone, Serialize, Deserialize, Document)] -struct BatchInfo { - #[document(primary)] - #[serde(default)] - filename: String, - #[serde(default)] - description: String, - #[serde(default)] - author: String, - #[serde(default)] - comment: String, - #[serde(default)] - url: String, - #[serde(default)] - date: String, - #[serde(default)] - processed_by: String, - #[serde(default)] - redacted_by: String, - #[serde(default)] - copyright: String, - #[serde(default)] - theme: String, - #[serde(default)] - kind: String, - #[serde(default)] - source: String, - #[serde(default)] - rating: String, -} - -#[derive(Debug, Default, Clone, Serialize, Deserialize, Document)] -struct Question { - #[document(primary)] - #[serde(default)] - num: u32, - #[document(index)] - id: String, - - description: String, - answer: String, - - #[serde(default)] - author: String, - #[serde(default)] - comment: String, - #[serde(default)] - comment1: String, - #[serde(default)] - tour: String, - #[serde(default)] - url: String, - #[serde(default)] - date: String, - #[serde(default)] - processed_by: String, - #[serde(default)] - redacted_by: String, - #[serde(default)] - copyright: String, - #[serde(default)] - theme: String, - #[serde(default)] - kind: String, - #[serde(default)] - source: String, - #[serde(default)] - rating: String, - #[document(nested)] - #[serde(default)] - batch_info: BatchInfo, -} - -impl From for Question { - fn from(src: SourceQuestion) -> Self { - make! {Self; with defaults and by { - num, id, description, answer, author, comment, comment1, tour, url, - date, processed_by, redacted_by, copyright, theme, kind, source, rating - }; from src} - } -} - -impl From for BatchInfo { - fn from(src: SourceQuestionsBatch) -> Self { - make! {Self; by { - filename, description, author, comment, url, date, - processed_by, redacted_by, copyright, theme, kind, source, rating - }; from src} - } -} - -impl From for Vec { - fn from(src: SourceQuestionsBatch) -> Self { - let mut result: Vec = src - .questions - .iter() - .map(|item| item.clone().into()) - .collect(); - let batch_info = BatchInfo::from(src); - result.iter_mut().for_each(|mut question| { - question.batch_info = batch_info.clone(); - }); - - result - } -} - -struct SourceQuestionsZipReader -where - R: std::io::Read + std::io::Seek, -{ - zipfile: zip::ZipArchive, - index: Option, -} - -impl SourceQuestionsZipReader -where - R: std::io::Read + std::io::Seek, -{ - fn new(zipfile: zip::ZipArchive) -> Self { - SourceQuestionsZipReader { - zipfile, - index: None, - } - } -} - -impl Iterator for SourceQuestionsZipReader -where - R: std::io::Read + std::io::Seek, -{ - type Item = (String, Result); - - fn next(&mut self) -> Option { - if self.index.is_none() && !self.zipfile.is_empty() { - self.index = Some(0); - } - - match self.index { - Some(i) if i < self.zipfile.len() => { - self.index = Some(i + 1); - - self.nth(i) - } - _ => None, - } - } - - fn nth(&mut self, n: usize) -> Option { - if self.zipfile.len() <= n { - return None; - } - self.index = Some(n + 1); - - let file = self.zipfile.by_index(n).unwrap(); - let name = file.mangled_name(); - let name_str = name.to_str().unwrap(); - - let data: Result = serde_json::from_reader(file); - - Some((String::from(name_str), data)) - } - - fn size_hint(&self) -> (usize, Option) { - let len = self.zipfile.len(); - let index = self.index.unwrap_or(0); - let rem = if len > index + 1 { - len - (index + 1) - } else { - 0 - }; - (rem, Some(rem)) - } - - fn count(self) -> usize - where - Self: Sized, - { - self.zipfile.len() - } -} - -impl ExactSizeIterator for SourceQuestionsZipReader -where - R: std::io::Read + std::io::Seek, -{ - fn len(&self) -> usize { - self.zipfile.len() - } -} - -trait ReadSourceQuestionsBatches -where - R: std::io::Read + std::io::Seek, -{ - fn source_questions(self) -> SourceQuestionsZipReader; -} - -impl ReadSourceQuestionsBatches for zip::ZipArchive -where - R: std::io::Read + std::io::Seek, -{ - fn source_questions(self) -> SourceQuestionsZipReader { - SourceQuestionsZipReader::new(self) - } -} - fn write_db() { let out_file: PathBuf = [DB_DIR, "data.mdb"].into_iter().collect(); match fs::metadata(&out_file) { @@ -398,24 +80,15 @@ fn write_db() { let mut count: usize = 0; let count = &mut count; - archive - .source_questions() - .filter(|(_, data)| data.is_ok()) - .flat_map(|(filename, data)| { - let mut data = data.unwrap(); - data.filename = filename; - let questions: Vec = data.into(); - questions - }) - .for_each(|question| { - let result = collection.insert(&question); - if result.is_err() { - println!("-- {:#?}", question); - panic!("{:#?}", result); - } else { - *count += 1; - } - }); + archive.source_questions().convert().for_each(|question| { + let result = collection.insert(&question); + if result.is_err() { + println!("-- {:#?}", question); + panic!("{:#?}", result); + } else { + *count += 1; + } + }); println!("inserted {}\nwriting...", count); storage.sync(true).unwrap(); @@ -440,14 +113,14 @@ fn read_from_zip(file_num: usize, mut num: usize) -> Option { let archive = zip::ZipArchive::new(zip_reader).unwrap(); let mut source_questions = archive.source_questions(); - let (_, batch) = if file_num == 0 { + let (filename, batch) = if file_num == 0 { source_questions.choose(&mut rng).unwrap() } else { source_questions.nth(file_num - 1).unwrap() }; - let batch = batch.unwrap(); + let mut batch = batch.unwrap(); + batch.filename = filename; let questions: Vec = batch.into(); - if num == 0 { num = (1..=questions.len()).choose(&mut rng).unwrap(); } diff --git a/src/questions.rs b/src/questions.rs new file mode 100644 index 0000000..7571928 --- /dev/null +++ b/src/questions.rs @@ -0,0 +1,141 @@ +use ledb::Document; +use serde_derive::{Deserialize, Serialize}; + +use crate::source::{SourceQuestion, SourceQuestionsBatch}; + +macro_rules! make { + ($Target:ident; by {$($field:ident),+}; from $src:expr) => {$Target {$( + $field: $src.$field + ),+}}; + ($Target:ident; with defaults and by {$($field:ident),+}; from $src:expr) => {$Target {$( + $field: $src.$field + ),+ ,..$Target::default()}} +} + +#[derive(Debug, Default, Clone, Serialize, Deserialize, Document)] +pub struct BatchInfo { + #[document(primary)] + #[serde(default)] + pub filename: String, + #[serde(default)] + pub description: String, + #[serde(default)] + pub author: String, + #[serde(default)] + pub comment: String, + #[serde(default)] + pub url: String, + #[serde(default)] + pub date: String, + #[serde(default)] + pub processed_by: String, + #[serde(default)] + pub redacted_by: String, + #[serde(default)] + pub copyright: String, + #[serde(default)] + pub theme: String, + #[serde(default)] + pub kind: String, + #[serde(default)] + pub source: String, + #[serde(default)] + pub rating: String, +} + +#[derive(Debug, Default, Clone, Serialize, Deserialize, Document)] +pub struct Question { + #[document(primary)] + #[serde(default)] + pub num: u32, + #[document(index)] + pub id: String, + + pub description: String, + pub answer: String, + + #[serde(default)] + pub author: String, + #[serde(default)] + pub comment: String, + #[serde(default)] + pub comment1: String, + #[serde(default)] + pub tour: String, + #[serde(default)] + pub url: String, + #[serde(default)] + pub date: String, + #[serde(default)] + pub processed_by: String, + #[serde(default)] + pub redacted_by: String, + #[serde(default)] + pub copyright: String, + #[serde(default)] + pub theme: String, + #[serde(default)] + pub kind: String, + #[serde(default)] + pub source: String, + #[serde(default)] + pub rating: String, + #[document(nested)] + #[serde(default)] + pub batch_info: BatchInfo, +} + +impl From for Question { + fn from(src: SourceQuestion) -> Self { + make! {Self; with defaults and by { + num, id, description, answer, author, comment, comment1, tour, url, + date, processed_by, redacted_by, copyright, theme, kind, source, rating + }; from src} + } +} + +impl From for BatchInfo { + fn from(src: SourceQuestionsBatch) -> Self { + make! {Self; by { + filename, description, author, comment, url, date, + processed_by, redacted_by, copyright, theme, kind, source, rating + }; from src} + } +} + +impl From for Vec { + fn from(src: SourceQuestionsBatch) -> Self { + let mut result: Vec = src + .questions + .iter() + .map(|item| item.clone().into()) + .collect(); + let batch_info = BatchInfo::from(src); + result.iter_mut().for_each(|mut question| { + question.batch_info = batch_info.clone(); + }); + + result + } +} + +pub trait QuestionsConverter { + fn convert<'a>(&'a mut self) -> Box + 'a>; +} + +impl QuestionsConverter for T +where + T: Iterator)>, +{ + fn convert<'a>(&'a mut self) -> Box + 'a> { + let iter = self + .filter(|(_, data)| data.is_ok()) + .flat_map(|(filename, data)| { + let mut batch = data.unwrap(); + batch.filename = filename; + let questions: Vec = batch.into(); + questions + }); + Box::new(iter) + } +} diff --git a/src/source.rs b/src/source.rs new file mode 100644 index 0000000..7cdae96 --- /dev/null +++ b/src/source.rs @@ -0,0 +1,206 @@ +use serde_derive::{Deserialize, Serialize}; +use std::io::{Read, Seek}; +use zip::ZipArchive; + +#[derive(Debug, Default, Clone, Serialize, Deserialize)] +pub struct SourceQuestion { + #[serde(default)] + pub num: u32, + pub id: String, + + #[serde(alias = "Вопрос")] + pub description: String, + #[serde(alias = "Ответ")] + pub answer: String, + + #[serde(alias = "Автор")] + #[serde(default)] + pub author: String, + #[serde(alias = "Комментарий")] + #[serde(default)] + pub comment: String, + #[serde(alias = "Комментарии")] + #[serde(alias = "Инфо")] + #[serde(default)] + pub comment1: String, + #[serde(alias = "Тур")] + #[serde(default)] + pub tour: String, + #[serde(alias = "Ссылка")] + #[serde(alias = "URL")] + #[serde(default)] + pub url: String, + #[serde(alias = "Дата")] + #[serde(default)] + pub date: String, + #[serde(alias = "Обработан")] + #[serde(default)] + pub processed_by: String, + #[serde(alias = "Редактор")] + #[serde(default)] + pub redacted_by: String, + #[serde(alias = "Копирайт")] + #[serde(default)] + pub copyright: String, + #[serde(alias = "Тема")] + #[serde(default)] + pub theme: String, + #[serde(alias = "Вид")] + #[serde(alias = "Тип")] + #[serde(default)] + pub kind: String, + #[serde(alias = "Источник")] + #[serde(default)] + pub source: String, + #[serde(alias = "Рейтинг")] + #[serde(default)] + pub rating: String, +} + +#[derive(Debug, Default, Clone, Serialize, Deserialize)] +pub struct SourceQuestionsBatch { + #[serde(default)] + pub filename: String, + #[serde(alias = "Пакет")] + #[serde(alias = "Чемпионат")] + pub description: String, + #[serde(alias = "Автор")] + #[serde(default)] + pub author: String, + #[serde(alias = "Комментарий")] + #[serde(alias = "Комментарии")] + #[serde(alias = "Инфо")] + #[serde(default)] + pub comment: String, + #[serde(alias = "Ссылка")] + #[serde(alias = "URL")] + #[serde(default)] + pub url: String, + #[serde(alias = "Дата")] + #[serde(default)] + pub date: String, + #[serde(alias = "Обработан")] + #[serde(default)] + pub processed_by: String, + #[serde(alias = "Редактор")] + #[serde(default)] + pub redacted_by: String, + #[serde(alias = "Копирайт")] + #[serde(default)] + pub copyright: String, + #[serde(alias = "Тема")] + #[serde(default)] + pub theme: String, + #[serde(alias = "Вид")] + #[serde(alias = "Тип")] + #[serde(default)] + pub kind: String, + #[serde(alias = "Источник")] + #[serde(default)] + pub source: String, + #[serde(alias = "Рейтинг")] + #[serde(default)] + pub rating: String, + #[serde(alias = "Вопросы")] + pub questions: Vec, +} + +pub struct SourceQuestionsZipReader +where + R: Read + Seek, +{ + zipfile: ZipArchive, + index: Option, +} + +impl SourceQuestionsZipReader +where + R: Read + Seek, +{ + fn new(zipfile: ZipArchive) -> Self { + SourceQuestionsZipReader { + zipfile, + index: None, + } + } +} + +impl Iterator for SourceQuestionsZipReader +where + R: Read + Seek, +{ + type Item = (String, Result); + + fn next(&mut self) -> Option { + if self.index.is_none() && !self.zipfile.is_empty() { + self.index = Some(0); + } + + match self.index { + Some(i) if i < self.zipfile.len() => { + self.index = Some(i + 1); + + self.nth(i) + } + _ => None, + } + } + + fn nth(&mut self, n: usize) -> Option { + if self.zipfile.len() <= n { + return None; + } + self.index = Some(n + 1); + + let file = self.zipfile.by_index(n).unwrap(); + let name = file.mangled_name(); + let name_str = name.to_str().unwrap(); + + let data: Result = serde_json::from_reader(file); + + Some((String::from(name_str), data)) + } + + fn size_hint(&self) -> (usize, Option) { + let len = self.zipfile.len(); + let index = self.index.unwrap_or(0); + let rem = if len > index + 1 { + len - (index + 1) + } else { + 0 + }; + (rem, Some(rem)) + } + + fn count(self) -> usize + where + Self: Sized, + { + self.zipfile.len() + } +} + +impl ExactSizeIterator for SourceQuestionsZipReader +where + R: Read + Seek, +{ + fn len(&self) -> usize { + self.zipfile.len() + } +} + +pub trait ReadSourceQuestionsBatches +where + R: Read + Seek, +{ + fn source_questions(self) -> SourceQuestionsZipReader; +} + +impl ReadSourceQuestionsBatches for ZipArchive +where + R: Read + Seek, +{ + fn source_questions(self) -> SourceQuestionsZipReader { + SourceQuestionsZipReader::new(self) + } +}