diff --git a/app/Cargo.toml b/app/Cargo.toml index ef32ab3..bc9edee 100644 --- a/app/Cargo.toml +++ b/app/Cargo.toml @@ -13,6 +13,10 @@ description = "Утилита загружающая базу данных ЧГ name = "db_bench" harness = false +[[bench]] +name = "questions_bench" +harness = false + [dependencies] chgk_ledb_lib = {path = "../lib", features = ["sync", "source", "convert"]} serde_json="1.0" diff --git a/app/benches/questions_bench.rs b/app/benches/questions_bench.rs new file mode 100644 index 0000000..1b08f86 --- /dev/null +++ b/app/benches/questions_bench.rs @@ -0,0 +1,101 @@ +#[macro_use] +extern crate criterion; +extern crate bincode; +extern crate serde; +extern crate serde_derive; +extern crate serde_json; +extern crate tempfile; + +use chgk_ledb_lib::db::{Reader, Writer, WriterOpts}; +use chgk_ledb_lib::questions::{Question, QuestionsConverter}; +use chgk_ledb_lib::source::ReadSourceQuestionsBatches; + +use std::path::Path; +use std::time::Duration; +use std::{fs, io}; + +use criterion::{BatchSize, Criterion}; +use tempfile::{tempdir, NamedTempFile}; + +const ZIP_FILENAME: &str = "../json.zip"; +const NEW_DB_FILENAME: &str = "../db.dat"; + +const N: usize = 4096; + +fn read_sample() -> Vec { + let zip_file = fs::File::open(ZIP_FILENAME).unwrap(); + let zip_reader = io::BufReader::new(zip_file); + let archive = zip::ZipArchive::new(zip_reader).unwrap(); + let mut source_questions = archive.source_questions(); + + source_questions + .convert() + .take(N) + .enumerate() + .map(|(num, mut question)| { + question.num = 1 + num as u32; + question + }) + .collect() +} + +fn prepare_db_writer>(path: P) -> Writer { + let opts = WriterOpts { + compress_lvl: 1, + data_buf_size: 100 * 1024 * 1024, + out_buf_size: 100 * 1024 * 1024, + current_buf_size: 10240, + }; + + Writer::new(path, opts).expect("new writer") +} + +fn questions_read(c: &mut Criterion) { + c.bench_function("questions_read", |b| { + b.iter_batched( + || { + let reader: Reader = + Reader::new(NEW_DB_FILENAME, 4096).expect("new reader"); + reader.into_iter().take(N) + }, + |reader| { + for item in reader { + drop(item); + } + }, + BatchSize::SmallInput, + ) + }); +} + +fn questions_write(c: &mut Criterion) { + let dir = tempdir().expect("tempdir"); + + c.bench_function("questions_write", |b| { + b.iter_batched( + || { + let tmpfile = NamedTempFile::new_in(dir.path()) + .expect("new tempfile") + .into_temp_path(); + let src = read_sample().into_iter(); + let writer = prepare_db_writer(&tmpfile); + (src, writer) + }, + |(mut src, mut writer)| { + writer.load(&mut src).unwrap(); + writer.finish().unwrap(); + }, + BatchSize::SmallInput, + ) + }); +} + +fn config() -> Criterion { + Criterion::default() + .sample_size(40) + .warm_up_time(Duration::from_secs(7)) + .measurement_time(Duration::from_secs(20)) +} + +criterion_group! {name=benches; config = config(); targets = questions_read, questions_write} +criterion_main!(benches); diff --git a/app_async/Cargo.toml b/app_async/Cargo.toml index e974234..e46f6e6 100644 --- a/app_async/Cargo.toml +++ b/app_async/Cargo.toml @@ -13,6 +13,10 @@ description = "Утилита загружающая базу данных ЧГ name = "async_bench" harness = false +[[bench]] +name = "questions_async_bench" +harness = false + [dependencies] chgk_ledb_lib = {path = "../lib", features = ["async", "convert_async"]} serde_json="1.0" diff --git a/app_async/benches/questions_async_bench.rs b/app_async/benches/questions_async_bench.rs new file mode 100644 index 0000000..950a79a --- /dev/null +++ b/app_async/benches/questions_async_bench.rs @@ -0,0 +1,152 @@ +#[macro_use] +extern crate criterion; +extern crate bincode; +extern crate serde; +extern crate serde_derive; +extern crate tempfile; + +use async_compression::Level; +use chgk_ledb_lib::async_db::{Reader, Writer, WriterOpts}; +use chgk_ledb_lib::questions::{Question, QuestionsConverterAsyncForStream}; +use chgk_ledb_lib::source::ReadSourceQuestionsBatchesAsync; +use futures::pin_mut; +use futures::StreamExt; +use std::time::Duration; +use std::{ops::Deref, path::Path}; + +use async_zip::tokio::read::seek::ZipFileReader; + +use criterion::{BatchSize, Criterion}; +use lazy_static::lazy_static; +use tempfile::{tempdir, NamedTempFile}; +use tokio::{fs, runtime}; + +const ZIP_FILENAME: &str = "../json.zip"; +const NEW_DB_FILENAME: &str = "../db.dat"; + +lazy_static! { + static ref RUNTIME: tokio::runtime::Runtime = + runtime::Builder::new_current_thread().build().unwrap(); +} + +const N: usize = 4096; + +async fn read_sample() -> Vec { + let mut file = fs::File::open(ZIP_FILENAME).await.expect("open zip"); + let archive = ZipFileReader::with_tokio(&mut file) + .await + .expect("open zip file reader"); + let mut source_questions = archive.source_questions(); + let source_questions = source_questions.stream(); + pin_mut!(source_questions); + + source_questions + .converter() + .convert() + .take(N) + .enumerate() + .map(|(num, mut question)| { + question.num = 1 + num as u32; + question + }) + .collect() + .await +} + +fn read_sample_sync() -> Vec { + std::thread::spawn(|| { + runtime::Builder::new_current_thread() + .build() + .unwrap() + .block_on(read_sample()) + }) + .join() + .expect("spawn thread") +} + +async fn prepare_db_writer>(path: P) -> Writer { + let opts = WriterOpts { + compress_lvl: Level::Fastest, + data_buf_size: 100 * 1024 * 1024, + out_buf_size: 100 * 1024 * 1024, + current_buf_size: 10240, + }; + + Writer::::new(path, opts) + .await + .expect("new writer") +} + +fn setup_writer>(path: P) -> Writer { + let rp = path.as_ref().to_str().unwrap().to_string(); + + std::thread::spawn(|| { + runtime::Builder::new_current_thread() + .build() + .unwrap() + .block_on(prepare_db_writer(rp)) + }) + .join() + .expect("spawn thread") +} + +fn setup_reader>(path: P) -> Reader { + let rp = path.as_ref().to_str().unwrap().to_string(); + std::thread::spawn(|| { + runtime::Builder::new_current_thread() + .build() + .unwrap() + .block_on(async move { Reader::new(rp).await.expect("new reader") }) + }) + .join() + .expect("spawn thread") +} + +fn questions_async_read(c: &mut Criterion) { + c.bench_function("questions_async_read", |b| { + b.to_async(RUNTIME.deref()).iter_batched( + || setup_reader(NEW_DB_FILENAME), + |reader| async move { + reader + .stream() + .take(N) + .for_each(|item| async { drop(item) }) + .await + }, + BatchSize::SmallInput, + ) + }); +} + +fn questions_async_write(c: &mut Criterion) { + let dir = tempdir().expect("tempdir"); + + c.bench_function("questions_async_write", |b| { + b.to_async(RUNTIME.deref()).iter_batched( + || { + let tmpfile = NamedTempFile::new_in(dir.path()) + .expect("new tempfile") + .into_temp_path(); + let src = read_sample_sync().into_iter(); + let src = futures::stream::iter(src); + let writer = setup_writer(&tmpfile); + (src, writer) + }, + |(mut src, mut writer)| async move { + writer.load(&mut src).await.unwrap(); + writer.finish().await.unwrap(); + }, + BatchSize::SmallInput, + ) + }); +} + +fn config() -> Criterion { + Criterion::default() + .sample_size(40) + .warm_up_time(Duration::from_secs(7)) + .measurement_time(Duration::from_secs(20)) +} + +criterion_group! {name=benches; config = config(); targets = questions_async_read, questions_async_write} +criterion_main!(benches);