add async writer
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
Dmitry Belyaev 2023-08-05 23:08:55 +03:00
parent 307790ce49
commit abcb7d6dc2
2 changed files with 191 additions and 0 deletions

188
lib/src/async_db.rs Normal file
View File

@ -0,0 +1,188 @@
use std::{marker::PhantomData, path::Path, sync::Arc};
use async_stream::stream;
use tokio::pin;
use async_compression::tokio::bufread::ZstdDecoder;
use async_compression::tokio::write::ZstdEncoder;
use async_compression::Level;
use futures::stream::{self, StreamExt};
use futures_core::stream::Stream;
use futures_util::pin_mut;
use tokio::{
fs,
io::{self, AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt},
};
use fmmap::tokio::{AsyncMmapFile, AsyncOptions};
type LSize = u32;
const LEN_SIZE: usize = std::mem::size_of::<LSize>();
const BINCODE_CFG: bincode::config::Configuration = bincode::config::standard();
trait ErrorToString {
type Output;
fn str_err(self) -> std::result::Result<Self::Output, String>;
}
impl<T, E> ErrorToString for std::result::Result<T, E>
where
E: std::error::Error,
{
type Output = T;
fn str_err(self) -> std::result::Result<Self::Output, String> {
self.map_err(|e| e.to_string())
}
}
pub struct WriterOpts {
pub compress_lvl: Level,
pub data_buf_size: usize,
pub out_buf_size: usize,
}
impl Default for WriterOpts {
fn default() -> Self {
Self {
compress_lvl: Level::Default,
data_buf_size: 500 * 1024 * 1024,
out_buf_size: 200 * 1024 * 1024,
}
}
}
pub struct Writer<T>
where
T: bincode::Encode,
{
out: io::BufWriter<fs::File>,
data_buf: Vec<u8>,
table: Vec<LSize>,
compress_lvl: Level,
_t: PhantomData<*const T>,
}
impl<T> Writer<T>
where
T: bincode::Encode,
{
pub async fn new<P: AsRef<Path>>(path: P, opts: WriterOpts) -> Result<Self, String> {
let out = fs::File::create(path).await.str_err()?;
let out = io::BufWriter::with_capacity(opts.out_buf_size, out);
let data_buf: Vec<u8> = Vec::with_capacity(opts.data_buf_size);
let compress_lvl = opts.compress_lvl;
let table: Vec<LSize> = vec![];
Ok(Self {
out,
data_buf,
table,
compress_lvl,
_t: PhantomData,
})
}
pub async fn push(&mut self, item: T) -> Result<(), String> {
let pos: LSize = self.data_buf.len() as LSize;
let item_data = bincode::encode_to_vec(item, BINCODE_CFG).str_err()?;
let mut zencoder = ZstdEncoder::with_quality(&mut self.data_buf, self.compress_lvl);
zencoder.write_all(&item_data).await.str_err()?;
zencoder.flush().await.str_err()?;
self.table.push(pos);
Ok(())
}
pub async fn load<S>(&mut self, source: S) -> Result<(), String>
where
S: Stream<Item = T> + std::marker::Unpin,
{
let hint = source.size_hint();
let hint = std::cmp::max(hint.0, hint.1.unwrap_or(0));
if hint > 0 {
self.table.reserve(hint);
}
pin_mut!(source);
while let Some(item) = source.next().await {
self.push(item).await?;
}
Ok(())
}
pub async fn finish(mut self) -> Result<(), String> {
// finish tab
let pos: LSize = self.data_buf.len() as LSize;
self.table.push(pos);
// write tab
let tab_size = (self.table.len() * LEN_SIZE) as LSize;
for pos in self.table {
let pos_data = (pos + tab_size).to_le_bytes();
self.out.write_all(&pos_data).await.str_err()?;
}
// copy data
self.out.write_all(&self.data_buf[..]).await.str_err()?;
self.out.flush().await.str_err()?;
Ok(())
}
}
#[cfg(test)]
mod test {
use super::*;
use tempfile::tempdir;
#[derive(bincode::Encode, bincode::Decode, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
struct TestData {
num: u64,
test: String,
}
fn gen_data(count: usize) -> impl Iterator<Item = TestData> {
(0..count).into_iter().map(|i| TestData {
num: i as u64,
test: "test".repeat(i),
})
}
fn stream_iter<I, T>(it: I) -> impl Stream<Item = T>
where
I: Iterator<Item = T>,
{
stream! {
for i in it {
yield i;
}
}
}
#[tokio::test]
async fn test_write() {
let dir = tempdir().expect("tempdir");
let tmpfile = dir.path().join("test.tmp");
let opts = WriterOpts {
compress_lvl: Level::Default,
data_buf_size: 10 * 1024 * 1024,
out_buf_size: 10 * 1024 * 1024,
};
let mut writer: Writer<TestData> = Writer::new(&tmpfile, opts).await.expect("new writer");
let items_iter = gen_data(5);
let items: Vec<TestData> = items_iter.collect();
let src = stream_iter(items.clone().into_iter());
pin_mut!(src);
writer.load(src).await.expect("load");
writer.finish().await.expect("finish write");
}
}

View File

@ -1,3 +1,6 @@
#[cfg(feature = "async")]
pub mod async_db;
#[cfg(feature = "sync")]
pub mod db;
pub mod questions;
pub mod source;