extern crate serde; #[macro_use] extern crate serde_derive; // This allows inserting JSON documents #[macro_use] extern crate serde_json; extern crate ledb; // This allows define typed documents easy #[macro_use] extern crate ledb_derive; extern crate ledb_types; extern crate zip; use clap::{Parser, Subcommand}; use rand::seq::IteratorRandom; use std::time::Instant; use std::{fs, io}; use ledb::{Options, Storage}; #[derive(Subcommand, Debug)] enum Command { Write, Compact, Print { #[clap(value_parser, default_value = "0")] id: u32, }, ZipPrint { #[clap(value_parser, default_value = "0")] file_num: usize, #[clap(value_parser, default_value = "0")] num: usize, }, } #[derive(Parser, Debug)] #[clap(author, version, about, long_about = None)] #[clap(propagate_version = true)] struct Cli { #[clap(subcommand)] command: Command, #[clap(short, long, action)] measure: bool, } #[derive(Debug, Default, Clone, Deserialize)] struct SourceQuestion { #[serde(default)] num: u32, id: String, #[serde(alias = "Вопрос")] description: String, #[serde(alias = "Ответ")] answer: String, #[serde(alias = "Автор")] #[serde(default)] author: String, #[serde(alias = "Комментарий")] #[serde(default)] comment: String, #[serde(alias = "Комментарии")] #[serde(alias = "Инфо")] #[serde(default)] comment1: String, #[serde(alias = "Тур")] #[serde(default)] tour: String, #[serde(alias = "Ссылка")] #[serde(alias = "URL")] #[serde(default)] url: String, #[serde(alias = "Дата")] #[serde(default)] date: String, #[serde(alias = "Обработан")] #[serde(default)] processed_by: String, #[serde(alias = "Редактор")] #[serde(default)] redacted_by: String, #[serde(alias = "Копирайт")] #[serde(default)] copyright: String, #[serde(alias = "Тема")] #[serde(default)] theme: String, #[serde(alias = "Вид")] #[serde(alias = "Тип")] #[serde(default)] kind: String, #[serde(alias = "Источник")] #[serde(default)] source: String, #[serde(alias = "Рейтинг")] #[serde(default)] rating: String, } #[derive(Debug, Default, Clone, Deserialize)] struct SourceQuestionsBatch { #[serde(default)] filename: String, #[serde(alias = "Пакет")] #[serde(alias = "Чемпионат")] description: String, #[serde(alias = "Автор")] #[serde(default)] author: String, #[serde(alias = "Комментарий")] #[serde(alias = "Комментарии")] #[serde(alias = "Инфо")] #[serde(default)] comment: String, #[serde(alias = "Ссылка")] #[serde(alias = "URL")] #[serde(default)] url: String, #[serde(alias = "Дата")] #[serde(default)] date: String, #[serde(alias = "Обработан")] #[serde(default)] processed_by: String, #[serde(alias = "Редактор")] #[serde(default)] redacted_by: String, #[serde(alias = "Копирайт")] #[serde(default)] copyright: String, #[serde(alias = "Тема")] #[serde(default)] theme: String, #[serde(alias = "Вид")] #[serde(alias = "Тип")] #[serde(default)] kind: String, #[serde(alias = "Источник")] #[serde(default)] source: String, #[serde(alias = "Рейтинг")] #[serde(default)] rating: String, #[serde(alias = "Вопросы")] questions: Vec, } #[derive(Debug, Default, Clone, Serialize, Deserialize, Document)] struct BatchInfo { #[document(primary)] #[serde(default)] filename: String, #[serde(default)] description: String, #[serde(default)] author: String, #[serde(default)] comment: String, #[serde(default)] url: String, #[serde(default)] date: String, #[serde(default)] processed_by: String, #[serde(default)] redacted_by: String, #[serde(default)] copyright: String, #[serde(default)] theme: String, #[serde(default)] kind: String, #[serde(default)] source: String, #[serde(default)] rating: String, } #[derive(Debug, Default, Clone, Serialize, Deserialize, Document)] struct Question { #[document(primary)] #[serde(default)] num: u32, #[document(index)] id: String, description: String, answer: String, #[serde(default)] author: String, #[serde(default)] comment: String, #[serde(default)] comment1: String, #[serde(default)] tour: String, #[serde(default)] url: String, #[serde(default)] date: String, #[serde(default)] processed_by: String, #[serde(default)] redacted_by: String, #[serde(default)] copyright: String, #[serde(default)] theme: String, #[serde(default)] kind: String, #[serde(default)] source: String, #[serde(default)] rating: String, #[document(nested)] #[serde(default)] batch_info: BatchInfo, } impl From for Question { fn from(src: SourceQuestion) -> Self { Self { num: src.num, id: src.id, description: src.description, answer: src.answer, author: src.author, comment: src.comment, comment1: src.comment1, tour: src.tour, url: src.url, date: src.date, processed_by: src.processed_by, redacted_by: src.redacted_by, copyright: src.copyright, theme: src.theme, kind: src.kind, source: src.source, rating: src.rating, batch_info: BatchInfo::default(), } } } impl From<&SourceQuestionsBatch> for BatchInfo { fn from(src: &SourceQuestionsBatch) -> Self { Self { filename: src.filename.clone(), description: src.description.clone(), author: src.author.clone(), comment: src.comment.clone(), url: src.url.clone(), date: src.date.clone(), processed_by: src.processed_by.clone(), redacted_by: src.redacted_by.clone(), copyright: src.copyright.clone(), theme: src.theme.clone(), kind: src.kind.clone(), source: src.source.clone(), rating: src.rating.clone(), } } } impl From for Vec { fn from(src: SourceQuestionsBatch) -> Self { let mut result = Vec::::with_capacity(src.questions.len()); src.questions.iter().for_each(|item| { let mut question: Question = item.clone().into(); question.batch_info = BatchInfo::from(&src); result.push(question); }); result } } // measure and return time elapsed in `func` in seconds pub fn measure(func: F) -> f64 { let start = Instant::now(); func(); let elapsed = start.elapsed(); (elapsed.as_secs() as f64) + (elapsed.subsec_nanos() as f64 / 1_000_000_000.0) } pub fn measure_and_print(func: F) { let m = measure(func); eprintln!("{}", m); } fn writer_v4() { let zip_file = fs::File::open("test1.zip").unwrap(); let zip_reader = io::BufReader::new(zip_file); let mut archive = zip::ZipArchive::new(zip_reader).unwrap(); let options: Options = serde_json::from_value(json!({ "map_size": 900 * 1024 * 1024, // 900mb "write_map": true, "map_async": true, "no_lock": true, "no_meta_sync": true, "no_sync": true, })) .unwrap(); let storage = Storage::new("db", options).unwrap(); let collection = storage.collection("questions").unwrap(); collection.purge().unwrap(); let mut count: usize = 0; let count = &mut count; (0..archive.len()) .map(|i| { let file = archive.by_index(i).unwrap(); let name = file.mangled_name(); let name_str = name.to_str().unwrap(); let data: Result = serde_json::from_reader(file); (String::from(name_str), data) }) .filter(|(_, data)| data.is_ok()) .flat_map(|(filename, data)| { let mut data = data.unwrap(); data.filename = filename; let questions: Vec = data.into(); questions }) .for_each(|question| { let result = collection.insert(&question); if result.is_err() { println!( "Error: {:?} \n\ On: {:?}", result, question ); } else { *count += 1; } }); println!("inserted {}", count); storage.sync(true).unwrap(); print!("stats: "); let stats = storage.stat().unwrap(); println!("{:?}", stats); drop(storage); } fn print_question_from(get_q: F) where F: FnOnce() -> Option, { let q = get_q().unwrap(); println!("{:#?}", q) } fn reader_v1(mut file_num: usize, mut num: usize) -> Option { let zip_file = fs::File::open("test1.zip").unwrap(); let zip_reader = io::BufReader::new(zip_file); let mut archive = zip::ZipArchive::new(zip_reader).unwrap(); let mut rng = rand::thread_rng(); if file_num == 0 { file_num = (1..=archive.len()).choose(&mut rng).unwrap(); } let file = archive.by_index(file_num - 1).unwrap(); let data: Result = serde_json::from_reader(file); let data = data.unwrap(); let questions: Vec = data.into(); if num == 0 { num = (1..=questions.len()).choose(&mut rng).unwrap(); } Some(questions[num - 1].clone()) } fn compact_db() { let options: Options = serde_json::from_value(json!({ "write_map": true, "map_async": true, "no_lock": true, "no_meta_sync": true, "no_sync": true, "compact": true, })) .unwrap(); let storage = Storage::new("db", options).unwrap(); storage.sync(true).unwrap(); let stats = storage.stat().unwrap(); println!("{:?}", stats); drop(storage); } fn reader_v4(mut id: u32) -> Option { let options: Options = serde_json::from_value(json!({ "read_only": true, // "map_async": true, "no_lock": true, // "no_meta_sync": true, // "no_sync": true, // "no_tls": true })) .unwrap(); let storage = Storage::new("db", options).unwrap(); let collection = storage.collection("questions").unwrap(); let mut rng = rand::thread_rng(); if id == 0 { let last_id = collection.last_id().unwrap(); id = (1..=last_id).choose(&mut rng).unwrap(); //let id = ledb::KeyData::Int(id as i64); } collection.get::(id).unwrap() } fn main() { let args = Cli::parse(); let mut action: Box = match &args.command { Command::Write => Box::new(writer_v4), Command::Compact => Box::new(compact_db), Command::Print { id } => { let get_question = Box::new(|| reader_v4(*id)); Box::new(|| print_question_from(get_question)) } Command::ZipPrint { file_num, num } => { let get_question = Box::new(|| reader_v1(*file_num, *num)); Box::new(|| print_question_from(get_question)) } }; if args.measure { action = Box::new(|| measure_and_print(action)); } action(); } /* #[cfg(test)] mod tests { use super::*; use test::Bencher; #[test] fn reader_v1_works() { reader_v1().unwrap(); } fn reader_v4_works() { reader_v4().unwrap(); } #[bench] fn bench_reader_v1(b: &mut Bencher) { b.iter(|| reader_v1()); } #[bench] fn bench_reader_v4(b: &mut Bencher) { b.iter(|| reader_v4()); } } */