diff --git a/src/main.rs b/src/main.rs index 81feaf8..07e0900 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,7 +9,7 @@ extern crate zip; use clap::{Parser, Subcommand}; use rand::seq::IteratorRandom; -use std::io::{Read, Write}; +use std::io::{Read, Seek, Write}; use std::path::PathBuf; use std::time::Instant; use std::{fs, io, sync::mpsc, thread}; @@ -230,12 +230,6 @@ fn main() { action(); } -#[derive(bincode::Decode, bincode::Encode)] -struct VEntry { - pos: u32, - len: u16, -} - fn read_from_db2(id: u32) -> Option { use std::io::Cursor; const LEN_SIZE: usize = std::mem::size_of::(); @@ -253,17 +247,33 @@ fn read_from_db2(id: u32) -> Option { let mut zdata = vec![0u8; len]; input.read_exact(&mut zdata[..len]).expect("read ztab"); let tab_data = zstd::decode_all(Cursor::new(zdata)).expect("zstd decode table"); - let tab: (Vec, usize) = + let tab: (Vec, usize) = bincode::decode_from_slice(&tab_data, cfg).expect("bincode decode tab"); let tab = tab.0; - //println!("read tab done"); + //println!("read tab done, len {}", tab.len()); + let index = match id { + 0 => { + let mut rng = rand::thread_rng(); + (1..tab.len()).into_iter().choose(&mut rng).unwrap() + } + _ => (id - 1) as usize, + }; - let choosen = tab.get(id as usize).expect("get by id"); - input.seek_relative(choosen.pos as i64).expect("q seek"); - let reader = input.take(choosen.len as u64); + //println!("index {}", index); + + let pos = *tab.get(index).expect("get pos"); + let pos_next = *tab.get((index + 1) as usize).expect("get pos next"); + let len = pos_next - pos; + + //println!("pos {} | next {} | len {}", pos, pos_next, len); + + input.seek_relative(pos as i64).expect("q seek"); + let reader = input.take(len as u64); let data = zstd::decode_all(reader).expect("zstd decode data"); + //println!("zstd decoded len {}", data.len()); + let question: (Question, usize) = bincode::decode_from_slice(&data, cfg).expect("bincode decode q"); let question = question.0; @@ -284,35 +294,42 @@ fn write_db2() { } fn db_writer2_task(rx: mpsc::Receiver) { const LEN_SIZE: usize = std::mem::size_of::(); + const COMP_DATA_LEVEL: i32 = 2; + const COMP_HDR_LEVEL: i32 = 2; let cfg = bincode::config::standard().with_fixed_int_encoding(); - let mut table: Vec = vec![]; + let mut table: Vec = vec![]; let buf_data: Vec = Vec::with_capacity(500 * 1024 * 1024); use std::io::Cursor; let mut buf = Cursor::new(buf_data); let mut pos: u32 = 0; - - rx.into_iter().for_each(|q| { + let mut num = 1; + rx.into_iter().for_each(|mut q| { + q.num = num; let data = bincode::encode_to_vec(q, cfg).expect("bincode q encode"); - let data = zstd::encode_all(Cursor::new(data), 3).expect("zstd q encode"); + let data = zstd::encode_all(Cursor::new(data), COMP_DATA_LEVEL).expect("zstd q encode"); let len = buf.write(&data).expect("write question"); - table.push(VEntry { - pos, - len: len.try_into().expect("len try_into len"), - }); + table.push(pos); + pos += len as u32; + num += 1; }); + table.push(pos); + + println!( + "zbuf done, tab len {}, buf size {}", + table.len(), + buf.position() + ); buf.set_position(0); - println!("zbuf done"); - let tab_data = bincode::encode_to_vec(&table, cfg).expect("encode table"); - let zdata = zstd::encode_all(Cursor::new(tab_data), 3).expect("zstd enc table"); + let zdata = zstd::encode_all(Cursor::new(tab_data), COMP_HDR_LEVEL).expect("zstd enc table"); let zlen = zdata.len() as u64; - println!("z tab done"); + println!("z tab done, tab data_len = {}", zlen); let out = fs::File::create("test.bin").expect("out create"); let mut out = std::io::BufWriter::with_capacity(500 * 1024 * 1024, out); @@ -321,6 +338,9 @@ fn db_writer2_task(rx: mpsc::Receiver) { let ztab_writed = out.write(&zdata).expect("write tab zdata"); assert_eq!(ztab_writed, zdata.len()); drop(zdata); + + println!("header write done, pos: {}", out.stream_position().unwrap()); + std::io::copy(&mut buf, &mut out).expect("copy z buf"); drop(buf); out.flush().expect("out flush");