diff --git a/Cargo.lock b/Cargo.lock index 7904f03..437d20f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -174,6 +174,7 @@ dependencies = [ "ledb", "ledb-derive", "ledb-types", + "memmap", "rand", "serde", "serde_derive", @@ -638,6 +639,16 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "memmap" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "memoffset" version = "0.6.5" diff --git a/Cargo.toml b/Cargo.toml index c8f89eb..c79dbff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ rand="0.8" clap = { version = "3.2.22", features = ["derive"] } bincode = "^2.0.0-rc.2" zstd = "^0.10" +memmap = "0.7.0" [dev-dependencies] criterion = "0.4.0" diff --git a/bench.txt b/bench.txt index e0c3a24..d9a44b1 100644 --- a/bench.txt +++ b/bench.txt @@ -6,18 +6,18 @@ hyperfine -n print -n print2 -n zip-print -w 100 -m 100 ".\target\release\chgk_ledb.exe print 444" ".\target\release\chgk_ledb.exe print2 444" ".\target\release\chgk_ledb.exe zip-print 4 84" Benchmark 1: print - Time (mean ± σ): 20.0 ms ± 1.5 ms [User: 3.4 ms, System: 11.7 ms] - Range (min … max): 17.4 ms … 24.1 ms 100 runs + Time (mean ± σ): 20.0 ms ± 1.7 ms [User: 5.8 ms, System: 12.3 ms] + Range (min … max): 18.0 ms … 30.9 ms 100 runs Benchmark 2: print2 - Time (mean ± σ): 19.4 ms ± 1.5 ms [User: 5.5 ms, System: 9.8 ms] - Range (min … max): 17.0 ms … 23.9 ms 100 runs + Time (mean ± σ): 19.1 ms ± 1.2 ms [User: 5.7 ms, System: 12.3 ms] + Range (min … max): 17.2 ms … 22.1 ms 100 runs Benchmark 3: zip-print - Time (mean ± σ): 40.2 ms ± 2.3 ms [User: 15.1 ms, System: 18.9 ms] - Range (min … max): 36.6 ms … 48.1 ms 100 runs + Time (mean ± σ): 41.1 ms ± 1.4 ms [User: 16.1 ms, System: 22.2 ms] + Range (min … max): 38.9 ms … 45.4 ms 100 runs Summary 'print2' ran - 1.03 ± 0.11 times faster than 'print' - 2.07 ± 0.20 times faster than 'zip-print' + 1.05 ± 0.11 times faster than 'print' + 2.16 ± 0.15 times faster than 'zip-print' diff --git a/benches/db_bench.rs b/benches/db_bench.rs index a820a8b..1259d6c 100644 --- a/benches/db_bench.rs +++ b/benches/db_bench.rs @@ -3,7 +3,7 @@ extern crate criterion; #[path = "../src/db.rs"] mod db; -use std::{path::PathBuf}; +use std::path::PathBuf; use db::{Reader, Writer, WriterOpts}; @@ -88,6 +88,5 @@ fn config() -> Criterion { Criterion::default().sample_size(40) } - -criterion_group!{name=benches; config = config(); targets = db_read, db_write} +criterion_group! {name=benches; config = config(); targets = db_read, db_write} criterion_main!(benches); diff --git a/src/db.rs b/src/db.rs index 478fd72..a05888f 100644 --- a/src/db.rs +++ b/src/db.rs @@ -1,10 +1,12 @@ use std::{ fs, - io::{self, Cursor, Read, Seek, Write}, + io::{self, Cursor, Read, Write}, marker::PhantomData, path::Path, }; +use memmap::{Mmap, MmapOptions}; + type LSize = u32; const LEN_SIZE: usize = std::mem::size_of::(); const BINCODE_CFG: bincode::config::Configuration = bincode::config::standard(); @@ -152,7 +154,7 @@ pub struct Reader where T: bincode::Decode, { - input: io::BufReader, + mmap: Mmap, count: usize, first_pos: LSize, _t: PhantomData<*const T>, @@ -162,19 +164,18 @@ impl Reader where T: bincode::Decode, { - pub fn new>(path: P, buf_size: usize) -> Result { - let input = fs::File::open(path).str_err()?; - let mut input = io::BufReader::with_capacity(buf_size, input); + pub fn new>(path: P, _buf_size: usize) -> Result { + let file = fs::File::open(path).str_err()?; + let mmap = unsafe { MmapOptions::new().map(&file).str_err()? }; // read first pos and records count - let mut first_data: [u8; LEN_SIZE] = [0; LEN_SIZE]; - input.read_exact(&mut first_data).str_err()?; + let first_data: [u8; LEN_SIZE] = mmap[0..LEN_SIZE].try_into().str_err()?; let first_pos = LSize::from_le_bytes(first_data); let tab_len = (first_pos as usize) / LEN_SIZE; let count = tab_len - 1; Ok(Self { - input, + mmap, count, first_pos, _t: PhantomData, @@ -194,30 +195,24 @@ where let data_pos = if 0 == index { self.first_pos } else { - let tab_pos: u64 = (index * LEN_SIZE).try_into().str_err()?; - let mut pos_curr_data: [u8; LEN_SIZE] = [0; LEN_SIZE]; - let cur_pos = self.input.stream_position().str_err()? as i64; - self.input - .seek_relative((tab_pos as i64) - cur_pos) + let tab_pos: usize = index * LEN_SIZE; + let pos_curr_data: [u8; LEN_SIZE] = self.mmap[tab_pos..(tab_pos + LEN_SIZE)] + .try_into() .str_err()?; - - self.input.read_exact(&mut pos_curr_data).str_err()?; LSize::from_le_bytes(pos_curr_data) - }; + } as usize; // read next item pos - let mut pos_next_data: [u8; LEN_SIZE] = [0; LEN_SIZE]; - self.input.read_exact(&mut pos_next_data).str_err()?; - let data_pos_next = LSize::from_le_bytes(pos_next_data); + let next_pos: usize = (index + 1) * LEN_SIZE; + let pos_next_data: [u8; LEN_SIZE] = self.mmap[next_pos..(next_pos + LEN_SIZE)] + .try_into() + .str_err()?; + let data_pos_next = LSize::from_le_bytes(pos_next_data) as usize; // calc item data length let data_len = data_pos_next - data_pos; // read & unpack item data - let cur_pos = self.input.stream_position().str_err()? as i64; - self.input - .seek_relative((data_pos as i64) - cur_pos) - .str_err()?; - let reader = self.input.by_ref().take(data_len as u64); + let reader = self.mmap[data_pos..data_pos_next].take(data_len as u64); let data = zstd::decode_all(reader).str_err()?; // decode item