async db: postcard

This commit is contained in:
Dmitry Belyaev 2023-08-23 22:33:59 +03:00
parent cb781ccf76
commit fad2d1d63d

View File

@ -24,9 +24,9 @@ use fmmap::tokio::{AsyncMmapFile, AsyncMmapFileExt, AsyncOptions};
type LSize = u32; type LSize = u32;
const LEN_SIZE: usize = std::mem::size_of::<LSize>(); const LEN_SIZE: usize = std::mem::size_of::<LSize>();
const BINCODE_CFG: bincode::config::Configuration = bincode::config::standard();
use crate::util::BincodeVecWriter; use serde::{de::DeserializeOwned, Serialize};
use crate::util::ErrorToString; use crate::util::ErrorToString;
pub struct WriterOpts { pub struct WriterOpts {
@ -42,18 +42,18 @@ impl Default for WriterOpts {
compress_lvl: Level::Default, compress_lvl: Level::Default,
data_buf_size: 500 * 1024 * 1024, data_buf_size: 500 * 1024 * 1024,
out_buf_size: 200 * 1024 * 1024, out_buf_size: 200 * 1024 * 1024,
current_buf_size: 100 * 1024, current_buf_size: 1024 * 1024,
} }
} }
} }
pub struct Writer<T> pub struct Writer<T>
where where
T: bincode::Encode, T: Serialize,
{ {
out: io::BufWriter<fs::File>, out: io::BufWriter<fs::File>,
data_buf: Vec<u8>, data_buf: Vec<u8>,
cur_buf_item: BincodeVecWriter, cur_buf_item: Vec<u8>,
table: Vec<LSize>, table: Vec<LSize>,
compress_lvl: Level, compress_lvl: Level,
_t: PhantomData<Arc<T>>, _t: PhantomData<Arc<T>>,
@ -61,14 +61,13 @@ where
impl<T> Writer<T> impl<T> Writer<T>
where where
T: bincode::Encode, T: Serialize,
{ {
pub async fn new<P: AsRef<Path>>(path: P, opts: WriterOpts) -> Result<Self, String> { pub async fn new<P: AsRef<Path>>(path: P, opts: WriterOpts) -> Result<Self, String> {
let out = fs::File::create(path).await.str_err()?; let out = fs::File::create(path).await.str_err()?;
let out = io::BufWriter::with_capacity(opts.out_buf_size, out); let out = io::BufWriter::with_capacity(opts.out_buf_size, out);
let data_buf: Vec<u8> = Vec::with_capacity(opts.data_buf_size); let data_buf: Vec<u8> = Vec::with_capacity(opts.data_buf_size);
let cur_buf_item: Vec<u8> = Vec::with_capacity(opts.current_buf_size); let cur_buf_item: Vec<u8> = vec![0; opts.current_buf_size];
let cur_buf_item = BincodeVecWriter::new(cur_buf_item);
let compress_lvl = opts.compress_lvl; let compress_lvl = opts.compress_lvl;
@ -88,16 +87,16 @@ where
self.push_by_ref(&item).await self.push_by_ref(&item).await
} }
#[allow(clippy::useless_asref)]
pub async fn push_by_ref(&mut self, item: &T) -> Result<(), String> { pub async fn push_by_ref(&mut self, item: &T) -> Result<(), String> {
let pos: LSize = self.data_buf.len() as LSize; let pos: LSize = self.data_buf.len() as LSize;
let cur_item_data = postcard::to_slice(item, self.cur_buf_item.as_mut_slice()).str_err()?;
bincode::encode_into_writer(item, &mut self.cur_buf_item, BINCODE_CFG).str_err()?; let mut zencoder = ZstdEncoder::with_quality(cur_item_data.as_ref(), self.compress_lvl);
let mut zencoder = ZstdEncoder::with_quality(&self.cur_buf_item[..], self.compress_lvl);
io::copy(&mut zencoder, &mut self.data_buf) io::copy(&mut zencoder, &mut self.data_buf)
.await .await
.str_err()?; .str_err()?;
self.cur_buf_item.clear(); cur_item_data.fill(0);
self.table.push(pos); self.table.push(pos);
@ -159,7 +158,7 @@ use pin_project::pin_project;
/// only work when ..push.poll() returns Ready immediately /// only work when ..push.poll() returns Ready immediately
pub struct WriterSink<'a, T> pub struct WriterSink<'a, T>
where where
T: bincode::Encode, T: Serialize,
{ {
#[pin] #[pin]
writer: &'a mut Writer<T>, writer: &'a mut Writer<T>,
@ -168,7 +167,7 @@ where
impl<'a, T> Sink<T> for WriterSink<'a, T> impl<'a, T> Sink<T> for WriterSink<'a, T>
where where
T: bincode::Encode, T: Serialize,
{ {
type Error = String; type Error = String;
@ -213,7 +212,7 @@ where
pub struct Reader<T> pub struct Reader<T>
where where
T: bincode::Decode, T: DeserializeOwned,
{ {
mmap: AsyncMmapFile, mmap: AsyncMmapFile,
count: usize, count: usize,
@ -223,7 +222,7 @@ where
impl<T> Reader<T> impl<T> Reader<T>
where where
T: bincode::Decode, T: DeserializeOwned,
{ {
pub async fn new<P: AsRef<Path>>(path: P) -> Result<Self, String> { pub async fn new<P: AsRef<Path>>(path: P) -> Result<Self, String> {
let mmap = AsyncOptions::new() let mmap = AsyncOptions::new()
@ -292,10 +291,10 @@ where
decoder.read_to_end(data_buf).await.str_err()?; decoder.read_to_end(data_buf).await.str_err()?;
// decode item // decode item
let item: (T, usize) = bincode::decode_from_slice(data_buf, BINCODE_CFG).str_err()?; let (item, _): (T, _) = postcard::take_from_bytes(data_buf).str_err()?;
data_buf.clear(); data_buf.clear();
Ok(item.0) Ok(item)
} }
/// get item at index /// get item at index
@ -311,7 +310,7 @@ where
pub struct ReaderStream<'a, T> pub struct ReaderStream<'a, T>
where where
T: bincode::Decode, T: DeserializeOwned,
{ {
reader: &'a Reader<T>, reader: &'a Reader<T>,
index: Option<usize>, index: Option<usize>,
@ -319,7 +318,7 @@ where
impl<'a, T> ReaderStream<'a, T> impl<'a, T> ReaderStream<'a, T>
where where
T: bincode::Decode, T: DeserializeOwned,
{ {
fn new(reader: &'a Reader<T>) -> Self { fn new(reader: &'a Reader<T>) -> Self {
ReaderStream { ReaderStream {
@ -331,7 +330,7 @@ where
impl<'a, T> Stream for ReaderStream<'a, T> impl<'a, T> Stream for ReaderStream<'a, T>
where where
T: bincode::Decode, T: DeserializeOwned,
{ {
type Item = T; type Item = T;
@ -375,7 +374,7 @@ where
pub struct BufReader<T> pub struct BufReader<T>
where where
T: bincode::Decode, T: DeserializeOwned,
{ {
inner: Reader<T>, inner: Reader<T>,
buf: Vec<u8>, buf: Vec<u8>,
@ -383,7 +382,7 @@ where
impl<T> BufReader<T> impl<T> BufReader<T>
where where
T: bincode::Decode, T: DeserializeOwned,
{ {
pub async fn new<P: AsRef<Path>>(path: P, buf_size: usize) -> Result<Self, String> { pub async fn new<P: AsRef<Path>>(path: P, buf_size: usize) -> Result<Self, String> {
match Reader::<T>::new(path).await { match Reader::<T>::new(path).await {
@ -410,7 +409,7 @@ where
impl<T> From<Reader<T>> for BufReader<T> impl<T> From<Reader<T>> for BufReader<T>
where where
T: bincode::Decode, T: DeserializeOwned,
{ {
fn from(inner: Reader<T>) -> Self { fn from(inner: Reader<T>) -> Self {
Self { Self {
@ -422,7 +421,7 @@ where
impl<T> From<BufReader<T>> for Reader<T> impl<T> From<BufReader<T>> for Reader<T>
where where
T: bincode::Decode, T: DeserializeOwned,
{ {
fn from(value: BufReader<T>) -> Self { fn from(value: BufReader<T>) -> Self {
value.into_inner() value.into_inner()
@ -431,7 +430,7 @@ where
impl<T> Deref for BufReader<T> impl<T> Deref for BufReader<T>
where where
T: bincode::Decode, T: DeserializeOwned,
{ {
type Target = Reader<T>; type Target = Reader<T>;
fn deref(&self) -> &Self::Target { fn deref(&self) -> &Self::Target {
@ -441,7 +440,7 @@ where
pub struct BufReaderStream<T> pub struct BufReaderStream<T>
where where
T: bincode::Decode, T: DeserializeOwned,
{ {
reader: BufReader<T>, reader: BufReader<T>,
index: Option<usize>, index: Option<usize>,
@ -449,7 +448,7 @@ where
impl<T> BufReaderStream<T> impl<T> BufReaderStream<T>
where where
T: bincode::Decode, T: DeserializeOwned,
{ {
fn new(reader: BufReader<T>) -> Self { fn new(reader: BufReader<T>) -> Self {
BufReaderStream { BufReaderStream {
@ -473,7 +472,7 @@ where
impl<T> Stream for BufReaderStream<T> impl<T> Stream for BufReaderStream<T>
where where
T: bincode::Decode, T: DeserializeOwned,
{ {
type Item = T; type Item = T;
@ -516,9 +515,10 @@ where
mod test { mod test {
use super::*; use super::*;
use core::fmt::Debug; use core::fmt::Debug;
use serde_derive::Deserialize;
use tempfile::tempdir; use tempfile::tempdir;
#[derive(bincode::Encode, bincode::Decode, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] #[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
struct TestData { struct TestData {
num: u64, num: u64,
test: String, test: String,