Compare commits
21 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
39d4d6b610
|
|||
|
27260695f7
|
|||
|
f5bd72b298
|
|||
|
dda50e7d2b
|
|||
|
693f349ae3
|
|||
|
7a2e58b1b9
|
|||
|
9570b1d6e2
|
|||
|
5355d0398d
|
|||
|
b63e9aa45c
|
|||
|
8120a996a3
|
|||
|
3a26a4aa7f
|
|||
|
103b677d21
|
|||
|
e18539a982
|
|||
|
249ac3a4ef
|
|||
|
e940f1c37c
|
|||
|
f3dabe7a06
|
|||
| e521e39f5e | |||
| cf591198a0 | |||
| be6b17a8e2 | |||
| 6a3b3647b3 | |||
| 7efd03d624 |
26
.drone.yml
26
.drone.yml
@@ -8,4 +8,30 @@ steps:
|
|||||||
- apk add --no-cache musl-dev
|
- apk add --no-cache musl-dev
|
||||||
- cargo build --verbose --all
|
- cargo build --verbose --all
|
||||||
- cargo test --verbose --all
|
- cargo test --verbose --all
|
||||||
|
environment:
|
||||||
|
CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse
|
||||||
|
|
||||||
|
trigger:
|
||||||
|
event:
|
||||||
|
- push
|
||||||
|
|
||||||
|
---
|
||||||
|
kind: pipeline
|
||||||
|
name: publish
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: push
|
||||||
|
image: rust:1-alpine
|
||||||
|
commands:
|
||||||
|
- apk add --no-cache musl-dev
|
||||||
|
- cargo build -p chgk_ledb_lib
|
||||||
|
- cargo publish --registry gitea -p chgk_ledb_lib
|
||||||
|
environment:
|
||||||
|
CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse
|
||||||
|
CARGO_REGISTRIES_GITEA_INDEX: https://gitea.b4tman.ru/b4tman/_cargo-index.git
|
||||||
|
CARGO_REGISTRIES_GITEA_TOKEN:
|
||||||
|
from_secret: cargo_gitea_token
|
||||||
|
|
||||||
|
trigger:
|
||||||
|
event:
|
||||||
|
- tag
|
||||||
|
|||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -6,3 +6,4 @@ json.zip
|
|||||||
/exp
|
/exp
|
||||||
/.vscode
|
/.vscode
|
||||||
test*.bin
|
test*.bin
|
||||||
|
db.dat
|
||||||
|
|||||||
872
Cargo.lock
generated
872
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
28
Cargo.toml
28
Cargo.toml
@@ -1,26 +1,8 @@
|
|||||||
[package]
|
[workspace]
|
||||||
name = "chgk_ledb"
|
members = [
|
||||||
version = "0.1.0"
|
"app",
|
||||||
authors = ["Dmitry <b4tm4n@mail.ru>"]
|
"lib"
|
||||||
edition = "2021"
|
]
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
|
||||||
|
|
||||||
[dependencies]
|
|
||||||
serde="1.0"
|
|
||||||
serde_derive="1.0"
|
|
||||||
serde_json="1.0"
|
|
||||||
ledb = {git = "https://github.com/b4tman/ledb.git", rev="a646b90e", package="ledb"}
|
|
||||||
ledb-derive = {git = "https://github.com/b4tman/ledb.git", rev="a646b90e", package="ledb-derive"}
|
|
||||||
ledb-types = {git = "https://github.com/b4tman/ledb.git", rev="a646b90e", package="ledb-types"}
|
|
||||||
zip="0.6"
|
|
||||||
rand="0.8"
|
|
||||||
clap = { version = "3.2.22", features = ["derive"] }
|
|
||||||
bincode = "^2.0.0-rc.2"
|
|
||||||
zstd = "^0.10"
|
|
||||||
|
|
||||||
[dev-dependencies]
|
|
||||||
tempfile = "3.3"
|
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
opt-level = 3
|
opt-level = 3
|
||||||
|
|||||||
28
app/Cargo.toml
Normal file
28
app/Cargo.toml
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
[package]
|
||||||
|
name = "chgk_ledb"
|
||||||
|
version = "1.1.0"
|
||||||
|
authors = ["Dmitry <b4tm4n@mail.ru>"]
|
||||||
|
edition = "2021"
|
||||||
|
repository = "https://gitea.b4tman.ru/b4tman/chgk_ledb"
|
||||||
|
license = "MIT"
|
||||||
|
description = "Утилита загружающая базу данных ЧГК вопросов из ZIP файла в JSON формате в базу данных."
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[[bench]]
|
||||||
|
name = "db_bench"
|
||||||
|
harness = false
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
chgk_ledb_lib = {path = "../lib"}
|
||||||
|
serde_json="1.0"
|
||||||
|
zip="0.6"
|
||||||
|
rand="0.8"
|
||||||
|
clap = { version = "3.2.22", features = ["derive"] }
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
criterion = "0.4.0"
|
||||||
|
tempfile = "3.3"
|
||||||
|
bincode = "^2.0.0-rc.2"
|
||||||
|
serde="1.0"
|
||||||
|
serde_derive="1.0"
|
||||||
112
app/benches/db_bench.rs
Normal file
112
app/benches/db_bench.rs
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
#[macro_use]
|
||||||
|
extern crate criterion;
|
||||||
|
extern crate bincode;
|
||||||
|
extern crate serde;
|
||||||
|
extern crate serde_derive;
|
||||||
|
extern crate serde_json;
|
||||||
|
extern crate tempfile;
|
||||||
|
|
||||||
|
use chgk_ledb_lib::db;
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
use db::{Reader, Writer, WriterOpts};
|
||||||
|
|
||||||
|
use criterion::{BatchSize, Criterion};
|
||||||
|
use tempfile::{tempdir, NamedTempFile};
|
||||||
|
|
||||||
|
use serde_derive::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
#[derive(
|
||||||
|
bincode::Encode,
|
||||||
|
bincode::Decode,
|
||||||
|
Clone,
|
||||||
|
Debug,
|
||||||
|
PartialEq,
|
||||||
|
Eq,
|
||||||
|
PartialOrd,
|
||||||
|
Ord,
|
||||||
|
Serialize,
|
||||||
|
Deserialize,
|
||||||
|
)]
|
||||||
|
struct TestData {
|
||||||
|
num1: u64,
|
||||||
|
num2: u64,
|
||||||
|
test: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
const N: usize = 4096;
|
||||||
|
|
||||||
|
fn gen_data(count: usize) -> impl Iterator<Item = TestData> {
|
||||||
|
(0..count).map(|i| 143 + i as u64).map(|i| TestData {
|
||||||
|
num1: i,
|
||||||
|
num2: (i * 100) ^ 0xDF0E441122334455,
|
||||||
|
test: "test ---- Test ____".repeat(123 + i as usize % 15),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn prepare_db_writer<P: AsRef<Path>>(path: P) -> Writer<TestData> {
|
||||||
|
let opts = WriterOpts {
|
||||||
|
compress_lvl: 1,
|
||||||
|
data_buf_size: 100 * 1024 * 1024,
|
||||||
|
out_buf_size: 100 * 1024 * 1024,
|
||||||
|
current_buf_size: 10240,
|
||||||
|
};
|
||||||
|
|
||||||
|
Writer::new(path, opts).expect("new writer")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn db_read(c: &mut Criterion) {
|
||||||
|
let dir = tempdir().expect("tempdir");
|
||||||
|
let tmpfile = NamedTempFile::new_in(dir.path())
|
||||||
|
.expect("new tempfile")
|
||||||
|
.into_temp_path();
|
||||||
|
let mut writer = prepare_db_writer(&tmpfile);
|
||||||
|
|
||||||
|
let mut items_iter = gen_data(N).collect::<Vec<TestData>>().into_iter();
|
||||||
|
writer.load(&mut items_iter).unwrap();
|
||||||
|
writer.finish().unwrap();
|
||||||
|
|
||||||
|
c.bench_function("read", |b| {
|
||||||
|
b.iter_batched(
|
||||||
|
|| {
|
||||||
|
let reader: Reader<TestData> = Reader::new(&tmpfile, 2048).expect("new reader");
|
||||||
|
reader
|
||||||
|
},
|
||||||
|
|reader| {
|
||||||
|
for item in reader {
|
||||||
|
drop(item);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
BatchSize::SmallInput,
|
||||||
|
)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn db_write(c: &mut Criterion) {
|
||||||
|
let dir = tempdir().expect("tempdir");
|
||||||
|
|
||||||
|
c.bench_function("write", |b| {
|
||||||
|
b.iter_batched(
|
||||||
|
|| {
|
||||||
|
let tmpfile = NamedTempFile::new_in(dir.path())
|
||||||
|
.expect("new tempfile")
|
||||||
|
.into_temp_path();
|
||||||
|
let src = gen_data(N).collect::<Vec<TestData>>().into_iter();
|
||||||
|
let writer = prepare_db_writer(&tmpfile);
|
||||||
|
(src, writer)
|
||||||
|
},
|
||||||
|
|(mut src, mut writer)| {
|
||||||
|
writer.load(&mut src).unwrap();
|
||||||
|
writer.finish().unwrap();
|
||||||
|
},
|
||||||
|
BatchSize::SmallInput,
|
||||||
|
)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn config() -> Criterion {
|
||||||
|
Criterion::default().sample_size(40)
|
||||||
|
}
|
||||||
|
|
||||||
|
criterion_group! {name=benches; config = config(); targets = db_read, db_write}
|
||||||
|
criterion_main!(benches);
|
||||||
@@ -1,36 +1,24 @@
|
|||||||
extern crate serde;
|
|
||||||
extern crate serde_derive;
|
|
||||||
#[macro_use]
|
|
||||||
extern crate serde_json;
|
extern crate serde_json;
|
||||||
extern crate ledb;
|
|
||||||
extern crate ledb_types;
|
|
||||||
extern crate zip;
|
|
||||||
|
|
||||||
use clap::{Parser, Subcommand};
|
use clap::{Parser, Subcommand};
|
||||||
use rand::seq::IteratorRandom;
|
use rand::seq::IteratorRandom;
|
||||||
|
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::path::PathBuf;
|
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
use std::{fs, sync::mpsc, thread};
|
use std::{fs, sync::mpsc, thread};
|
||||||
|
|
||||||
use ledb::{Options, Storage};
|
use chgk_ledb_lib::db;
|
||||||
|
use chgk_ledb_lib::questions;
|
||||||
mod db;
|
use chgk_ledb_lib::source;
|
||||||
mod questions;
|
|
||||||
mod source;
|
|
||||||
|
|
||||||
use crate::questions::{Question, QuestionsConverter};
|
use crate::questions::{Question, QuestionsConverter};
|
||||||
use crate::source::ReadSourceQuestionsBatches;
|
use crate::source::ReadSourceQuestionsBatches;
|
||||||
|
|
||||||
const ZIP_FILENAME: &str = "json.zip";
|
const ZIP_FILENAME: &str = "json.zip";
|
||||||
const NEW_DB_FILENAME: &str = "test.bin";
|
const NEW_DB_FILENAME: &str = "db.dat";
|
||||||
const DB_DIR: &str = "db";
|
|
||||||
|
|
||||||
#[derive(Subcommand, Debug)]
|
#[derive(Subcommand, Debug)]
|
||||||
enum Command {
|
enum Command {
|
||||||
Write,
|
Write,
|
||||||
Compact,
|
|
||||||
Print {
|
Print {
|
||||||
#[clap(value_parser, default_value = "0")]
|
#[clap(value_parser, default_value = "0")]
|
||||||
id: u32,
|
id: u32,
|
||||||
@@ -41,11 +29,6 @@ enum Command {
|
|||||||
#[clap(value_parser, default_value = "0")]
|
#[clap(value_parser, default_value = "0")]
|
||||||
num: usize,
|
num: usize,
|
||||||
},
|
},
|
||||||
Write2,
|
|
||||||
Print2 {
|
|
||||||
#[clap(value_parser, default_value = "0")]
|
|
||||||
id: u32,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
@@ -79,60 +62,12 @@ fn zip_reader_task(tx: mpsc::Sender<Question>) {
|
|||||||
}
|
}
|
||||||
println!("read done");
|
println!("read done");
|
||||||
}
|
}
|
||||||
fn db_writer_task(rx: mpsc::Receiver<Question>) {
|
|
||||||
let out_file: PathBuf = [DB_DIR, "data.mdb"].into_iter().collect();
|
|
||||||
match fs::metadata(&out_file) {
|
|
||||||
Ok(x) if x.is_file() => {
|
|
||||||
fs::remove_file(&out_file).unwrap();
|
|
||||||
println!(r#""{}" removed"#, out_file.to_str().unwrap());
|
|
||||||
}
|
|
||||||
_ => {}
|
|
||||||
};
|
|
||||||
|
|
||||||
let options: Options = serde_json::from_value(json!({
|
|
||||||
"map_size": 900 * 1024 * 1024, // 900mb
|
|
||||||
"write_map": true,
|
|
||||||
"map_async": true,
|
|
||||||
"no_lock": true,
|
|
||||||
"no_meta_sync": true,
|
|
||||||
"no_sync": true,
|
|
||||||
}))
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let storage = Storage::new(DB_DIR, options).unwrap();
|
|
||||||
let collection = storage.collection("questions").unwrap();
|
|
||||||
|
|
||||||
let count = collection.load(rx).expect("load");
|
|
||||||
|
|
||||||
println!("loaded {count}");
|
|
||||||
|
|
||||||
println!("syncing to disk...");
|
|
||||||
storage.sync(true).unwrap();
|
|
||||||
|
|
||||||
print!("stats: ");
|
|
||||||
let stats = storage.stat().unwrap();
|
|
||||||
println!("{:?}", stats);
|
|
||||||
|
|
||||||
drop(storage);
|
|
||||||
println!("write done");
|
|
||||||
}
|
|
||||||
|
|
||||||
fn write_db() {
|
|
||||||
let (tx, rx) = mpsc::channel::<Question>();
|
|
||||||
[
|
|
||||||
thread::spawn(move || zip_reader_task(tx)),
|
|
||||||
thread::spawn(move || db_writer_task(rx)),
|
|
||||||
]
|
|
||||||
.into_iter()
|
|
||||||
.for_each(|handle| handle.join().expect("thread panic"));
|
|
||||||
println!("all done");
|
|
||||||
}
|
|
||||||
|
|
||||||
fn print_question_from<F>(get_q: F)
|
fn print_question_from<F>(get_q: F)
|
||||||
where
|
where
|
||||||
F: FnOnce() -> Option<Question>,
|
F: FnOnce() -> Option<Question>,
|
||||||
{
|
{
|
||||||
let q = get_q().unwrap();
|
let q = get_q().expect("question not found");
|
||||||
println!("{:#?}", q)
|
println!("{:#?}", q)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -157,45 +92,6 @@ fn read_from_zip(file_num: usize, mut num: usize) -> Option<Question> {
|
|||||||
Some(questions[num - 1].clone())
|
Some(questions[num - 1].clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn compact_db() {
|
|
||||||
let options: Options = serde_json::from_value(json!({
|
|
||||||
"write_map": true,
|
|
||||||
"map_async": true,
|
|
||||||
"no_lock": true,
|
|
||||||
"no_meta_sync": true,
|
|
||||||
"no_sync": true,
|
|
||||||
"compact": true,
|
|
||||||
}))
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let storage = Storage::new(DB_DIR, options).unwrap();
|
|
||||||
|
|
||||||
storage.sync(true).unwrap();
|
|
||||||
let stats = storage.stat().unwrap();
|
|
||||||
println!("{:?}", stats);
|
|
||||||
drop(storage);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn read_from_db(mut id: u32) -> Option<Question> {
|
|
||||||
let options: Options = serde_json::from_value(json!({
|
|
||||||
"read_only": true,
|
|
||||||
"map_async": true,
|
|
||||||
"no_lock": true,
|
|
||||||
}))
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let storage = Storage::new(DB_DIR, options).unwrap();
|
|
||||||
let collection = storage.collection("questions").unwrap();
|
|
||||||
let mut rng = rand::thread_rng();
|
|
||||||
|
|
||||||
if id == 0 {
|
|
||||||
let last_id = collection.last_id().unwrap();
|
|
||||||
id = (1..=last_id).choose(&mut rng).unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
collection.get::<Question>(id).unwrap()
|
|
||||||
}
|
|
||||||
|
|
||||||
// measure and return time elapsed in `func` in seconds
|
// measure and return time elapsed in `func` in seconds
|
||||||
pub fn measure<F: FnOnce()>(func: F) -> f64 {
|
pub fn measure<F: FnOnce()>(func: F) -> f64 {
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
@@ -214,7 +110,6 @@ fn main() {
|
|||||||
|
|
||||||
let mut action: Box<dyn FnOnce()> = match &args.command {
|
let mut action: Box<dyn FnOnce()> = match &args.command {
|
||||||
Command::Write => Box::new(write_db),
|
Command::Write => Box::new(write_db),
|
||||||
Command::Compact => Box::new(compact_db),
|
|
||||||
Command::Print { id } => {
|
Command::Print { id } => {
|
||||||
let get_question = Box::new(|| read_from_db(*id));
|
let get_question = Box::new(|| read_from_db(*id));
|
||||||
Box::new(|| print_question_from(get_question))
|
Box::new(|| print_question_from(get_question))
|
||||||
@@ -223,11 +118,6 @@ fn main() {
|
|||||||
let get_question = Box::new(|| read_from_zip(*file_num, *num));
|
let get_question = Box::new(|| read_from_zip(*file_num, *num));
|
||||||
Box::new(|| print_question_from(get_question))
|
Box::new(|| print_question_from(get_question))
|
||||||
}
|
}
|
||||||
Command::Write2 => Box::new(write_db2),
|
|
||||||
Command::Print2 { id } => {
|
|
||||||
let get_question = Box::new(|| read_from_db2(*id));
|
|
||||||
Box::new(|| print_question_from(get_question))
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
if args.measure {
|
if args.measure {
|
||||||
@@ -237,11 +127,11 @@ fn main() {
|
|||||||
action();
|
action();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn read_from_db2(id: u32) -> Option<Question> {
|
fn read_from_db(id: u32) -> Option<Question> {
|
||||||
let mut reader: db::Reader<Question> =
|
let reader: db::Reader<Question> =
|
||||||
db::Reader::new(NEW_DB_FILENAME, 2048).expect("new db reader");
|
db::Reader::new(NEW_DB_FILENAME, 2048).expect("new db reader");
|
||||||
|
|
||||||
let mut questions = reader.iter();
|
let mut questions = reader.into_iter();
|
||||||
|
|
||||||
match id {
|
match id {
|
||||||
0 => {
|
0 => {
|
||||||
@@ -251,17 +141,17 @@ fn read_from_db2(id: u32) -> Option<Question> {
|
|||||||
_ => questions.nth((id - 1) as usize),
|
_ => questions.nth((id - 1) as usize),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn write_db2() {
|
fn write_db() {
|
||||||
let (tx, rx) = mpsc::channel::<Question>();
|
let (tx, rx) = mpsc::channel::<Question>();
|
||||||
[
|
[
|
||||||
thread::spawn(move || zip_reader_task(tx)),
|
thread::spawn(move || zip_reader_task(tx)),
|
||||||
thread::spawn(move || db_writer2_task(rx)),
|
thread::spawn(move || db_writer_task(rx)),
|
||||||
]
|
]
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.for_each(|handle| handle.join().expect("thread panic"));
|
.for_each(|handle| handle.join().expect("thread panic"));
|
||||||
println!("all done");
|
println!("all done");
|
||||||
}
|
}
|
||||||
fn db_writer2_task(rx: mpsc::Receiver<Question>) {
|
fn db_writer_task(rx: mpsc::Receiver<Question>) {
|
||||||
let writer_opts = db::WriterOpts::default();
|
let writer_opts = db::WriterOpts::default();
|
||||||
let mut writer: db::Writer<Question> =
|
let mut writer: db::Writer<Question> =
|
||||||
db::Writer::new(NEW_DB_FILENAME, writer_opts).expect("new db writer");
|
db::Writer::new(NEW_DB_FILENAME, writer_opts).expect("new db writer");
|
||||||
18
bench.txt
18
bench.txt
@@ -3,21 +3,21 @@
|
|||||||
95 MB json.zip
|
95 MB json.zip
|
||||||
---
|
---
|
||||||
|
|
||||||
hyperfine -n print -n print2 -n zip-print -w 100 -m 100 ".\target\release\chgk_ledb.exe print 444" ".\target\release\chgk_ledb.exe print2 444" ".\target\release\chgk_ledb.exe zip-print 4 84"
|
hyperfine -n print -n print2 -n zip-print -w 400 -m 400 ".\target\release\chgk_ledb.exe print 444" ".\target\release\chgk_ledb.exe print2 444" ".\target\release\chgk_ledb.exe zip-print 4 84"
|
||||||
|
|
||||||
Benchmark 1: print
|
Benchmark 1: print
|
||||||
Time (mean ± σ): 20.0 ms ± 1.5 ms [User: 3.4 ms, System: 11.7 ms]
|
Time (mean ± σ): 19.0 ms ± 1.5 ms [User: 5.6 ms, System: 13.1 ms]
|
||||||
Range (min … max): 17.4 ms … 24.1 ms 100 runs
|
Range (min … max): 16.8 ms … 24.5 ms 400 runs
|
||||||
|
|
||||||
Benchmark 2: print2
|
Benchmark 2: print2
|
||||||
Time (mean ± σ): 19.4 ms ± 1.5 ms [User: 5.5 ms, System: 9.8 ms]
|
Time (mean ± σ): 18.6 ms ± 1.6 ms [User: 5.5 ms, System: 12.6 ms]
|
||||||
Range (min … max): 17.0 ms … 23.9 ms 100 runs
|
Range (min … max): 16.1 ms … 29.5 ms 400 runs
|
||||||
|
|
||||||
Benchmark 3: zip-print
|
Benchmark 3: zip-print
|
||||||
Time (mean ± σ): 40.2 ms ± 2.3 ms [User: 15.1 ms, System: 18.9 ms]
|
Time (mean ± σ): 40.8 ms ± 3.3 ms [User: 15.4 ms, System: 21.6 ms]
|
||||||
Range (min … max): 36.6 ms … 48.1 ms 100 runs
|
Range (min … max): 36.5 ms … 67.5 ms 400 runs
|
||||||
|
|
||||||
Summary
|
Summary
|
||||||
'print2' ran
|
'print2' ran
|
||||||
1.03 ± 0.11 times faster than 'print'
|
1.02 ± 0.12 times faster than 'print'
|
||||||
2.07 ± 0.20 times faster than 'zip-print'
|
2.20 ± 0.26 times faster than 'zip-print'
|
||||||
|
|||||||
23
lib/Cargo.toml
Normal file
23
lib/Cargo.toml
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
[package]
|
||||||
|
name = "chgk_ledb_lib"
|
||||||
|
version = "1.1.0"
|
||||||
|
authors = ["Dmitry <b4tm4n@mail.ru>"]
|
||||||
|
edition = "2021"
|
||||||
|
repository = "https://gitea.b4tman.ru/b4tman/chgk_ledb"
|
||||||
|
license = "MIT"
|
||||||
|
description = "Библиотека для доступа к файлу базы данных вопросов ЧГК"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
serde="1.0"
|
||||||
|
serde_derive="1.0"
|
||||||
|
serde_json="1.0"
|
||||||
|
zip="0.6"
|
||||||
|
bincode = "^2.0.0-rc.2"
|
||||||
|
zstd = "^0.10"
|
||||||
|
memmap = "0.7.0"
|
||||||
|
fs4 = { version = "0.6.3", features = ["sync"] }
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
tempfile = "3.3"
|
||||||
@@ -1,10 +1,15 @@
|
|||||||
use std::{
|
use std::{
|
||||||
fs,
|
fs,
|
||||||
io::{self, Cursor, Read, Seek, Write},
|
io::{self, Cursor, Read, Write},
|
||||||
marker::PhantomData,
|
marker::PhantomData,
|
||||||
path::Path,
|
path::Path,
|
||||||
|
sync::Arc,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use memmap::{Mmap, MmapOptions};
|
||||||
|
|
||||||
|
use fs4::FileExt;
|
||||||
|
|
||||||
type LSize = u32;
|
type LSize = u32;
|
||||||
const LEN_SIZE: usize = std::mem::size_of::<LSize>();
|
const LEN_SIZE: usize = std::mem::size_of::<LSize>();
|
||||||
const BINCODE_CFG: bincode::config::Configuration = bincode::config::standard();
|
const BINCODE_CFG: bincode::config::Configuration = bincode::config::standard();
|
||||||
@@ -60,6 +65,7 @@ where
|
|||||||
{
|
{
|
||||||
pub fn new<P: AsRef<Path>>(path: P, opts: WriterOpts) -> Result<Self, String> {
|
pub fn new<P: AsRef<Path>>(path: P, opts: WriterOpts) -> Result<Self, String> {
|
||||||
let out = fs::File::create(path).str_err()?;
|
let out = fs::File::create(path).str_err()?;
|
||||||
|
out.try_lock_exclusive().str_err()?;
|
||||||
let out = io::BufWriter::with_capacity(opts.out_buf_size, out);
|
let out = io::BufWriter::with_capacity(opts.out_buf_size, out);
|
||||||
let data_buf: Vec<u8> = Vec::with_capacity(opts.data_buf_size);
|
let data_buf: Vec<u8> = Vec::with_capacity(opts.data_buf_size);
|
||||||
let data_buf = Cursor::new(data_buf);
|
let data_buf = Cursor::new(data_buf);
|
||||||
@@ -130,6 +136,9 @@ where
|
|||||||
let pos: LSize = self.data_buf.position() as LSize;
|
let pos: LSize = self.data_buf.position() as LSize;
|
||||||
self.table.push(pos);
|
self.table.push(pos);
|
||||||
|
|
||||||
|
let output_size: u64 = (self.table.len() * LEN_SIZE) as u64 + self.data_buf.position();
|
||||||
|
self.out.get_ref().allocate(output_size).str_err()?;
|
||||||
|
|
||||||
// write tab
|
// write tab
|
||||||
let tab_size = (self.table.len() * LEN_SIZE) as LSize;
|
let tab_size = (self.table.len() * LEN_SIZE) as LSize;
|
||||||
for pos in self.table {
|
for pos in self.table {
|
||||||
@@ -144,6 +153,7 @@ where
|
|||||||
io::copy(&mut data, &mut self.out).str_err()?;
|
io::copy(&mut data, &mut self.out).str_err()?;
|
||||||
|
|
||||||
self.out.flush().str_err()?;
|
self.out.flush().str_err()?;
|
||||||
|
self.out.get_ref().unlock().str_err()?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -152,32 +162,32 @@ pub struct Reader<T>
|
|||||||
where
|
where
|
||||||
T: bincode::Decode,
|
T: bincode::Decode,
|
||||||
{
|
{
|
||||||
input: io::BufReader<fs::File>,
|
mmap: Mmap,
|
||||||
count: usize,
|
count: usize,
|
||||||
first_pos: LSize,
|
first_pos: LSize,
|
||||||
_t: PhantomData<*const T>,
|
_t: Option<Arc<T>>, // PhantomData replacement
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> Reader<T>
|
impl<T> Reader<T>
|
||||||
where
|
where
|
||||||
T: bincode::Decode,
|
T: bincode::Decode,
|
||||||
{
|
{
|
||||||
pub fn new<P: AsRef<Path>>(path: P, buf_size: usize) -> Result<Self, String> {
|
pub fn new<P: AsRef<Path>>(path: P, _buf_size: usize) -> Result<Self, String> {
|
||||||
let input = fs::File::open(path).str_err()?;
|
let file = fs::File::open(path).str_err()?;
|
||||||
let mut input = io::BufReader::with_capacity(buf_size, input);
|
file.try_lock_shared().str_err()?;
|
||||||
|
let mmap = unsafe { MmapOptions::new().map(&file).str_err()? };
|
||||||
|
|
||||||
// read first pos and records count
|
// read first pos and records count
|
||||||
let mut first_data: [u8; LEN_SIZE] = [0; LEN_SIZE];
|
let first_data: [u8; LEN_SIZE] = mmap[0..LEN_SIZE].try_into().str_err()?;
|
||||||
input.read_exact(&mut first_data).str_err()?;
|
|
||||||
let first_pos = LSize::from_le_bytes(first_data);
|
let first_pos = LSize::from_le_bytes(first_data);
|
||||||
let tab_len = (first_pos as usize) / LEN_SIZE;
|
let tab_len = (first_pos as usize) / LEN_SIZE;
|
||||||
let count = tab_len - 1;
|
let count = tab_len - 1;
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
input,
|
mmap,
|
||||||
count,
|
count,
|
||||||
first_pos,
|
first_pos,
|
||||||
_t: PhantomData,
|
_t: None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -185,39 +195,34 @@ where
|
|||||||
self.count
|
self.count
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get(&mut self, index: usize) -> Result<T, String> {
|
pub fn is_empty(&self) -> bool {
|
||||||
|
0 == self.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get(&self, index: usize) -> Result<T, String> {
|
||||||
if index >= self.len() {
|
if index >= self.len() {
|
||||||
return Err("index out of range".into());
|
return Err("index out of range".into());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let next_pos: usize = (index + 1) * LEN_SIZE;
|
||||||
|
let next_end: usize = next_pos + LEN_SIZE;
|
||||||
|
|
||||||
// read item data pos
|
// read item data pos
|
||||||
let data_pos = if 0 == index {
|
let data_pos = if 0 == index {
|
||||||
self.first_pos
|
self.first_pos
|
||||||
} else {
|
} else {
|
||||||
let tab_pos: u64 = (index * LEN_SIZE).try_into().str_err()?;
|
let tab_pos: usize = index * LEN_SIZE;
|
||||||
let mut pos_curr_data: [u8; LEN_SIZE] = [0; LEN_SIZE];
|
let pos_curr_data: [u8; LEN_SIZE] =
|
||||||
let cur_pos = self.input.stream_position().str_err()? as i64;
|
self.mmap[tab_pos..next_pos].try_into().str_err()?;
|
||||||
self.input
|
|
||||||
.seek_relative((tab_pos as i64) - cur_pos)
|
|
||||||
.str_err()?;
|
|
||||||
|
|
||||||
self.input.read_exact(&mut pos_curr_data).str_err()?;
|
|
||||||
LSize::from_le_bytes(pos_curr_data)
|
LSize::from_le_bytes(pos_curr_data)
|
||||||
};
|
} as usize;
|
||||||
|
|
||||||
// read next item pos
|
// read next item pos
|
||||||
let mut pos_next_data: [u8; LEN_SIZE] = [0; LEN_SIZE];
|
let pos_next_data: [u8; LEN_SIZE] = self.mmap[next_pos..next_end].try_into().str_err()?;
|
||||||
self.input.read_exact(&mut pos_next_data).str_err()?;
|
let data_pos_next = LSize::from_le_bytes(pos_next_data) as usize;
|
||||||
let data_pos_next = LSize::from_le_bytes(pos_next_data);
|
|
||||||
// calc item data length
|
|
||||||
let data_len = data_pos_next - data_pos;
|
|
||||||
|
|
||||||
// read & unpack item data
|
// read & unpack item data
|
||||||
let cur_pos = self.input.stream_position().str_err()? as i64;
|
let reader = io::Cursor::new(self.mmap[data_pos..data_pos_next].as_ref());
|
||||||
self.input
|
|
||||||
.seek_relative((data_pos as i64) - cur_pos)
|
|
||||||
.str_err()?;
|
|
||||||
let reader = self.input.by_ref().take(data_len as u64);
|
|
||||||
let data = zstd::decode_all(reader).str_err()?;
|
let data = zstd::decode_all(reader).str_err()?;
|
||||||
|
|
||||||
// decode item
|
// decode item
|
||||||
@@ -226,7 +231,7 @@ where
|
|||||||
Ok(item.0)
|
Ok(item.0)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn iter(&mut self) -> ReaderIter<'_, T> {
|
pub fn iter(&self) -> ReaderIter<'_, T> {
|
||||||
ReaderIter::new(self)
|
ReaderIter::new(self)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -235,7 +240,7 @@ pub struct ReaderIter<'a, T>
|
|||||||
where
|
where
|
||||||
T: bincode::Decode,
|
T: bincode::Decode,
|
||||||
{
|
{
|
||||||
reader: &'a mut Reader<T>,
|
reader: &'a Reader<T>,
|
||||||
index: Option<usize>,
|
index: Option<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -243,7 +248,7 @@ impl<'a, T> ReaderIter<'a, T>
|
|||||||
where
|
where
|
||||||
T: bincode::Decode,
|
T: bincode::Decode,
|
||||||
{
|
{
|
||||||
fn new(reader: &'a mut Reader<T>) -> Self {
|
fn new(reader: &'a Reader<T>) -> Self {
|
||||||
ReaderIter {
|
ReaderIter {
|
||||||
reader,
|
reader,
|
||||||
index: None,
|
index: None,
|
||||||
@@ -258,7 +263,7 @@ where
|
|||||||
type Item = T;
|
type Item = T;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
if self.index.is_none() && self.reader.len() != 0 {
|
if self.index.is_none() && !self.reader.is_empty() {
|
||||||
self.index = Some(0);
|
self.index = Some(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -313,6 +318,100 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct ReaderIntoIter<T>
|
||||||
|
where
|
||||||
|
T: bincode::Decode,
|
||||||
|
{
|
||||||
|
reader: Reader<T>,
|
||||||
|
index: Option<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> ReaderIntoIter<T>
|
||||||
|
where
|
||||||
|
T: bincode::Decode,
|
||||||
|
{
|
||||||
|
fn new(reader: Reader<T>) -> Self {
|
||||||
|
Self {
|
||||||
|
reader,
|
||||||
|
index: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> Iterator for ReaderIntoIter<T>
|
||||||
|
where
|
||||||
|
T: bincode::Decode,
|
||||||
|
{
|
||||||
|
type Item = T;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
if self.index.is_none() && !self.reader.is_empty() {
|
||||||
|
self.index = Some(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
match self.index {
|
||||||
|
Some(i) if i < self.reader.len() => self.nth(i),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn nth(&mut self, n: usize) -> Option<Self::Item> {
|
||||||
|
if self.reader.len() <= n {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
self.index = Some(n + 1);
|
||||||
|
|
||||||
|
let item = self.reader.get(n);
|
||||||
|
match item {
|
||||||
|
Ok(item) => Some(item),
|
||||||
|
Err(_) => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||||
|
let len = self.reader.len();
|
||||||
|
if self.index.is_none() {
|
||||||
|
return (len, Some(len));
|
||||||
|
}
|
||||||
|
|
||||||
|
let index = self.index.unwrap();
|
||||||
|
let rem = if len > index + 1 {
|
||||||
|
len - (index + 1)
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
(rem, Some(rem))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn count(self) -> usize
|
||||||
|
where
|
||||||
|
Self: Sized,
|
||||||
|
{
|
||||||
|
self.reader.len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> ExactSizeIterator for ReaderIntoIter<T>
|
||||||
|
where
|
||||||
|
T: bincode::Decode,
|
||||||
|
{
|
||||||
|
fn len(&self) -> usize {
|
||||||
|
self.reader.len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> IntoIterator for Reader<T>
|
||||||
|
where
|
||||||
|
T: bincode::Decode,
|
||||||
|
{
|
||||||
|
type Item = T;
|
||||||
|
type IntoIter = ReaderIntoIter<Self::Item>;
|
||||||
|
|
||||||
|
fn into_iter(self) -> Self::IntoIter {
|
||||||
|
Self::IntoIter::new(self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
@@ -349,7 +448,7 @@ mod test {
|
|||||||
writer.load(&mut items.clone().into_iter()).expect("load");
|
writer.load(&mut items.clone().into_iter()).expect("load");
|
||||||
writer.finish().expect("finish write");
|
writer.finish().expect("finish write");
|
||||||
|
|
||||||
let mut reader: Reader<TestData> = Reader::new(&tmpfile, 2048).expect("new reader");
|
let reader: Reader<TestData> = Reader::new(&tmpfile, 2048).expect("new reader");
|
||||||
assert_eq!(items.len(), reader.len());
|
assert_eq!(items.len(), reader.len());
|
||||||
|
|
||||||
for (idx, item) in items.iter().enumerate() {
|
for (idx, item) in items.iter().enumerate() {
|
||||||
@@ -376,11 +475,76 @@ mod test {
|
|||||||
writer.load(&mut items.clone().into_iter()).expect("load");
|
writer.load(&mut items.clone().into_iter()).expect("load");
|
||||||
writer.finish().expect("finish write");
|
writer.finish().expect("finish write");
|
||||||
|
|
||||||
let mut reader: Reader<TestData> = Reader::new(&tmpfile, 2048).expect("new reader");
|
let reader: Reader<TestData> = Reader::new(&tmpfile, 2048).expect("new reader");
|
||||||
assert_eq!(items.len(), reader.len());
|
assert_eq!(items.len(), reader.len());
|
||||||
|
|
||||||
items.into_iter().zip(reader.iter()).for_each(|pair| {
|
items.into_iter().zip(reader.iter()).for_each(|pair| {
|
||||||
assert_eq!(pair.0, pair.1);
|
assert_eq!(pair.0, pair.1);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_write_read_into_iter() {
|
||||||
|
let dir = tempdir().expect("tempdir");
|
||||||
|
let tmpfile = dir.path().join("test.tmp");
|
||||||
|
let opts = WriterOpts {
|
||||||
|
compress_lvl: 1,
|
||||||
|
data_buf_size: 10 * 1024 * 1024,
|
||||||
|
out_buf_size: 10 * 1024 * 1024,
|
||||||
|
current_buf_size: 4096,
|
||||||
|
};
|
||||||
|
let mut writer: Writer<TestData> = Writer::new(&tmpfile, opts).expect("new writer");
|
||||||
|
|
||||||
|
let items_iter = gen_data(10);
|
||||||
|
let items: Vec<TestData> = items_iter.collect();
|
||||||
|
|
||||||
|
writer.load(&mut items.clone().into_iter()).expect("load");
|
||||||
|
writer.finish().expect("finish write");
|
||||||
|
|
||||||
|
let reader: Reader<TestData> = Reader::new(&tmpfile, 2048).expect("new reader");
|
||||||
|
assert_eq!(items.len(), reader.len());
|
||||||
|
|
||||||
|
items.into_iter().zip(reader).for_each(|pair| {
|
||||||
|
assert_eq!(pair.0, pair.1);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/// sharing Reader instance between threads
|
||||||
|
#[test]
|
||||||
|
fn test_share_reader() {
|
||||||
|
use std::thread;
|
||||||
|
|
||||||
|
let dir = tempdir().expect("tempdir");
|
||||||
|
let tmpfile = dir.path().join("test.tmp");
|
||||||
|
let opts = WriterOpts {
|
||||||
|
compress_lvl: 1,
|
||||||
|
data_buf_size: 10 * 1024 * 1024,
|
||||||
|
out_buf_size: 10 * 1024 * 1024,
|
||||||
|
current_buf_size: 4096,
|
||||||
|
};
|
||||||
|
let mut writer: Writer<TestData> = Writer::new(&tmpfile, opts).expect("new writer");
|
||||||
|
|
||||||
|
let items_iter = gen_data(10);
|
||||||
|
let items: Vec<TestData> = items_iter.collect();
|
||||||
|
|
||||||
|
writer.load(&mut items.clone().into_iter()).expect("load");
|
||||||
|
writer.finish().expect("finish write");
|
||||||
|
|
||||||
|
let reader: Reader<TestData> = Reader::new(&tmpfile, 2048).expect("new reader");
|
||||||
|
assert_eq!(items.len(), reader.len());
|
||||||
|
|
||||||
|
let reader = Arc::new(reader);
|
||||||
|
for _ in 0..=3 {
|
||||||
|
let cur_items = items.clone();
|
||||||
|
let cur_reader = Arc::clone(&reader);
|
||||||
|
thread::spawn(move || {
|
||||||
|
cur_items
|
||||||
|
.into_iter()
|
||||||
|
.zip(cur_reader.iter())
|
||||||
|
.for_each(|pair| {
|
||||||
|
assert_eq!(pair.0, pair.1);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
3
lib/src/lib.rs
Normal file
3
lib/src/lib.rs
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
pub mod db;
|
||||||
|
pub mod questions;
|
||||||
|
pub mod source;
|
||||||
@@ -1,4 +1,3 @@
|
|||||||
use ledb::Document;
|
|
||||||
use serde_derive::{Deserialize, Serialize};
|
use serde_derive::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::source::{SourceQuestion, SourceQuestionsBatch};
|
use crate::source::{SourceQuestion, SourceQuestionsBatch};
|
||||||
@@ -12,11 +11,8 @@ macro_rules! make {
|
|||||||
),+ ,..$Target::default()}}
|
),+ ,..$Target::default()}}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(
|
#[derive(Debug, Default, Clone, Serialize, Deserialize, bincode::Decode, bincode::Encode)]
|
||||||
Debug, Default, Clone, Serialize, Deserialize, Document, bincode::Decode, bincode::Encode,
|
|
||||||
)]
|
|
||||||
pub struct BatchInfo {
|
pub struct BatchInfo {
|
||||||
#[document(primary)]
|
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub filename: String,
|
pub filename: String,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
@@ -45,14 +41,10 @@ pub struct BatchInfo {
|
|||||||
pub rating: String,
|
pub rating: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(
|
#[derive(Debug, Default, Clone, Serialize, Deserialize, bincode::Decode, bincode::Encode)]
|
||||||
Debug, Default, Clone, Serialize, Deserialize, Document, bincode::Decode, bincode::Encode,
|
|
||||||
)]
|
|
||||||
pub struct Question {
|
pub struct Question {
|
||||||
#[document(primary)]
|
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub num: u32,
|
pub num: u32,
|
||||||
#[document(index)]
|
|
||||||
pub id: String,
|
pub id: String,
|
||||||
|
|
||||||
pub description: String,
|
pub description: String,
|
||||||
@@ -84,7 +76,6 @@ pub struct Question {
|
|||||||
pub source: String,
|
pub source: String,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub rating: String,
|
pub rating: String,
|
||||||
#[document(nested)]
|
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub batch_info: BatchInfo,
|
pub batch_info: BatchInfo,
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user