Compare commits

..

4 Commits
master ... simd

Author SHA1 Message Date
4a21ba6ed7 Merge branch 'master' into simd
Some checks failed
continuous-integration/drone/push Build is failing
2023-08-26 13:54:00 +03:00
e5a06083b3
Merge branch 'master' into simd
Some checks failed
continuous-integration/drone/push Build is failing
2023-08-25 15:32:11 +03:00
6ea39b3db4
Merge branch 'master' into simd
Some checks failed
continuous-integration/drone/push Build is failing
2023-08-25 14:42:29 +03:00
e1ef9cc3c3
simd_json instead of serde_json
Some checks failed
continuous-integration/drone/push Build is failing
2023-08-23 13:37:12 +03:00
10 changed files with 165 additions and 32 deletions

147
Cargo.lock generated
View File

@ -28,6 +28,17 @@ dependencies = [
"cpufeatures", "cpufeatures",
] ]
[[package]]
name = "ahash"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f"
dependencies = [
"cfg-if",
"once_cell",
"version_check",
]
[[package]] [[package]]
name = "aho-corasick" name = "aho-corasick"
version = "1.0.2" version = "1.0.2"
@ -318,7 +329,6 @@ dependencies = [
"rand", "rand",
"serde", "serde",
"serde_derive", "serde_derive",
"serde_json",
"tempfile", "tempfile",
"zip", "zip",
] ]
@ -338,7 +348,6 @@ dependencies = [
"rand", "rand",
"serde", "serde",
"serde_derive", "serde_derive",
"serde_json",
"tempfile", "tempfile",
"tokio", "tokio",
"tokio-stream", "tokio-stream",
@ -361,7 +370,7 @@ dependencies = [
"pin-project", "pin-project",
"serde", "serde",
"serde_derive", "serde_derive",
"serde_json", "simd-json",
"tempfile", "tempfile",
"tokio", "tokio",
"zip", "zip",
@ -712,6 +721,15 @@ dependencies = [
"miniz_oxide", "miniz_oxide",
] ]
[[package]]
name = "float-cmp"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4"
dependencies = [
"num-traits",
]
[[package]] [[package]]
name = "fmmap" name = "fmmap"
version = "0.3.2" version = "0.3.2"
@ -863,12 +881,31 @@ version = "1.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
[[package]]
name = "halfbrown"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5681137554ddff44396e5f149892c769d45301dd9aa19c51602a89ee214cb0ec"
dependencies = [
"hashbrown 0.13.2",
"serde",
]
[[package]] [[package]]
name = "hashbrown" name = "hashbrown"
version = "0.12.3" version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
[[package]]
name = "hashbrown"
version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e"
dependencies = [
"ahash",
]
[[package]] [[package]]
name = "heck" name = "heck"
version = "0.4.1" version = "0.4.1"
@ -906,7 +943,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
dependencies = [ dependencies = [
"autocfg", "autocfg",
"hashbrown", "hashbrown 0.12.3",
] ]
[[package]] [[package]]
@ -1008,6 +1045,70 @@ version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "lexical-core"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46"
dependencies = [
"lexical-parse-float",
"lexical-parse-integer",
"lexical-util",
"lexical-write-float",
"lexical-write-integer",
]
[[package]]
name = "lexical-parse-float"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f"
dependencies = [
"lexical-parse-integer",
"lexical-util",
"static_assertions",
]
[[package]]
name = "lexical-parse-integer"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9"
dependencies = [
"lexical-util",
"static_assertions",
]
[[package]]
name = "lexical-util"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc"
dependencies = [
"static_assertions",
]
[[package]]
name = "lexical-write-float"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862"
dependencies = [
"lexical-util",
"lexical-write-integer",
"static_assertions",
]
[[package]]
name = "lexical-write-integer"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446"
dependencies = [
"lexical-util",
"static_assertions",
]
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.147" version = "0.2.147"
@ -1489,6 +1590,26 @@ dependencies = [
"digest", "digest",
] ]
[[package]]
name = "simd-json"
version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de7f1293f0e4e11d52e588766fe9de8caa2857ff63809d40de83245452ca7c5c"
dependencies = [
"halfbrown",
"lexical-core",
"serde",
"serde_json",
"simdutf8",
"value-trait",
]
[[package]]
name = "simdutf8"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a"
[[package]] [[package]]
name = "similar" name = "similar"
version = "2.2.1" version = "2.2.1"
@ -1504,6 +1625,12 @@ dependencies = [
"autocfg", "autocfg",
] ]
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]] [[package]]
name = "strsim" name = "strsim"
version = "0.10.0" version = "0.10.0"
@ -1695,6 +1822,18 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "value-trait"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09a5b6c8ceb01263b969cac48d4a6705134d490ded13d889e52c0cfc80c6945e"
dependencies = [
"float-cmp",
"halfbrown",
"itoa",
"ryu",
]
[[package]] [[package]]
name = "version_check" name = "version_check"
version = "0.9.4" version = "0.9.4"

View File

@ -19,7 +19,6 @@ harness = false
[dependencies] [dependencies]
chgk_ledb_lib = {path = "../lib", features = ["sync", "source", "convert"]} chgk_ledb_lib = {path = "../lib", features = ["sync", "source", "convert"]}
serde_json="1.0"
zip="0.6" zip="0.6"
rand="0.8" rand="0.8"
clap = { version = "4.2.7", features = ["derive"] } clap = { version = "4.2.7", features = ["derive"] }

View File

@ -3,7 +3,6 @@ extern crate criterion;
extern crate bincode; extern crate bincode;
extern crate serde; extern crate serde;
extern crate serde_derive; extern crate serde_derive;
extern crate serde_json;
extern crate tempfile; extern crate tempfile;
use chgk_ledb_lib::db; use chgk_ledb_lib::db;

View File

@ -3,7 +3,6 @@ extern crate criterion;
extern crate bincode; extern crate bincode;
extern crate serde; extern crate serde;
extern crate serde_derive; extern crate serde_derive;
extern crate serde_json;
extern crate tempfile; extern crate tempfile;
use chgk_ledb_lib::db::{Reader, Writer, WriterOpts}; use chgk_ledb_lib::db::{Reader, Writer, WriterOpts};

View File

@ -1,4 +1,3 @@
extern crate serde_json;
use clap::{Parser, Subcommand}; use clap::{Parser, Subcommand};
use rand::seq::IteratorRandom; use rand::seq::IteratorRandom;

View File

@ -19,7 +19,6 @@ harness = false
[dependencies] [dependencies]
chgk_ledb_lib = {path = "../lib", features = ["async", "convert_async"]} chgk_ledb_lib = {path = "../lib", features = ["async", "convert_async"]}
serde_json="1.0"
async_zip = { version = "0.0.15", features = [ async_zip = { version = "0.0.15", features = [
"zstd", "zstd",
"tokio", "tokio",

View File

@ -1,4 +1,3 @@
extern crate serde_json;
use clap::{Parser, Subcommand}; use clap::{Parser, Subcommand};
use futures::{pin_mut, Future}; use futures::{pin_mut, Future};
use rand::distributions::Uniform; use rand::distributions::Uniform;

View File

@ -44,10 +44,9 @@ convert_async = [
[dependencies] [dependencies]
serde = "1.0" serde = "1.0"
serde_derive = "1.0" serde_derive = "1.0"
serde_json = "1.0"
bincode = "^2.0.0-rc.2" bincode = "^2.0.0-rc.2"
zip = { version = "0.6", optional = true } zip = { version = "0.6", optional = true }
async_zip = { version = "0.0.15" , features = [ async_zip = { version = "0.0.15", features = [
"zstd", "zstd",
"tokio", "tokio",
"tokio-fs", "tokio-fs",
@ -70,6 +69,7 @@ async-stream = { version = "0.3", optional = true }
zstd = { version = "^0.12", default-features = false, optional = true } zstd = { version = "^0.12", default-features = false, optional = true }
memmap = { version = "0.7.0", optional = true } memmap = { version = "0.7.0", optional = true }
pin-project = { version = "1.1.3", optional = true } pin-project = { version = "1.1.3", optional = true }
simd-json = "0.10.6"
[dev-dependencies] [dev-dependencies]
insta = { version = "1.31.0", features = ["yaml"] } insta = { version = "1.31.0", features = ["yaml"] }

View File

@ -142,7 +142,7 @@ pub mod convert {
impl<T> QuestionsConverter for T impl<T> QuestionsConverter for T
where where
T: Iterator<Item = (String, Result<SourceQuestionsBatch, serde_json::Error>)>, T: Iterator<Item = (String, Result<SourceQuestionsBatch, simd_json::Error>)>,
{ {
fn convert<'a>(&'a mut self) -> Box<dyn Iterator<Item = Question> + 'a> { fn convert<'a>(&'a mut self) -> Box<dyn Iterator<Item = Question> + 'a> {
let iter = self let iter = self
@ -169,7 +169,7 @@ pub mod convert {
fn test_convert() { fn test_convert() {
let mut source = iter::once(( let mut source = iter::once((
String::from("test.json"), String::from("test.json"),
Ok::<SourceQuestionsBatch, serde_json::Error>(sample_batch()), Ok::<SourceQuestionsBatch, simd_json::Error>(sample_batch()),
)); ));
let converted: Vec<_> = source.convert().collect(); let converted: Vec<_> = source.convert().collect();
assert_yaml_snapshot!(converted, @r#" assert_yaml_snapshot!(converted, @r#"
@ -207,7 +207,7 @@ pub mod convert_async {
pub struct QuestionsConverterAsync<T> pub struct QuestionsConverterAsync<T>
where where
T: Stream<Item = (String, Result<SourceQuestionsBatch, serde_json::Error>)> T: Stream<Item = (String, Result<SourceQuestionsBatch, simd_json::Error>)>
+ std::marker::Unpin, + std::marker::Unpin,
{ {
inner: T, inner: T,
@ -215,7 +215,7 @@ pub mod convert_async {
impl<T> From<T> for QuestionsConverterAsync<T> impl<T> From<T> for QuestionsConverterAsync<T>
where where
T: Stream<Item = (String, Result<SourceQuestionsBatch, serde_json::Error>)> T: Stream<Item = (String, Result<SourceQuestionsBatch, simd_json::Error>)>
+ std::marker::Unpin, + std::marker::Unpin,
{ {
fn from(inner: T) -> Self { fn from(inner: T) -> Self {
@ -225,7 +225,7 @@ pub mod convert_async {
pub trait QuestionsConverterAsyncForStream<T> pub trait QuestionsConverterAsyncForStream<T>
where where
T: Stream<Item = (String, Result<SourceQuestionsBatch, serde_json::Error>)> T: Stream<Item = (String, Result<SourceQuestionsBatch, simd_json::Error>)>
+ std::marker::Unpin, + std::marker::Unpin,
{ {
fn converter(&mut self) -> QuestionsConverterAsync<&mut T>; fn converter(&mut self) -> QuestionsConverterAsync<&mut T>;
@ -233,7 +233,7 @@ pub mod convert_async {
impl<T> QuestionsConverterAsyncForStream<T> for T impl<T> QuestionsConverterAsyncForStream<T> for T
where where
T: Stream<Item = (String, Result<SourceQuestionsBatch, serde_json::Error>)> T: Stream<Item = (String, Result<SourceQuestionsBatch, simd_json::Error>)>
+ std::marker::Unpin, + std::marker::Unpin,
{ {
fn converter(&mut self) -> QuestionsConverterAsync<&mut T> { fn converter(&mut self) -> QuestionsConverterAsync<&mut T> {
@ -243,7 +243,7 @@ pub mod convert_async {
impl<T> QuestionsConverterAsync<T> impl<T> QuestionsConverterAsync<T>
where where
T: Stream<Item = (String, Result<SourceQuestionsBatch, serde_json::Error>)> T: Stream<Item = (String, Result<SourceQuestionsBatch, simd_json::Error>)>
+ std::marker::Unpin, + std::marker::Unpin,
{ {
pub fn convert(self) -> impl Stream<Item = Question> { pub fn convert(self) -> impl Stream<Item = Question> {
@ -279,7 +279,7 @@ pub mod convert_async {
let source = futures::stream::once(async { let source = futures::stream::once(async {
( (
String::from("test.json"), String::from("test.json"),
Ok::<SourceQuestionsBatch, serde_json::Error>(sample_batch()), Ok::<SourceQuestionsBatch, simd_json::Error>(sample_batch()),
) )
}); });
@ -315,7 +315,7 @@ pub use convert_async::{QuestionsConverterAsync, QuestionsConverterAsyncForStrea
mod test { mod test {
use super::*; use super::*;
use insta::assert_yaml_snapshot; use insta::assert_yaml_snapshot;
use serde_json::json; use simd_json::json;
#[cfg(any(feature = "convert", feature = "convert_async"))] #[cfg(any(feature = "convert", feature = "convert_async"))]
pub mod convert_common { pub mod convert_common {
@ -373,7 +373,7 @@ mod test {
} }
#[test] #[test]
fn test_question_de() { fn test_question_de() {
let question_from_json: Result<Question, _> = serde_json::from_value(json!({ let question_from_json: Result<Question, _> = simd_json::serde::from_owned_value(json!({
"id": "Вопрос 1", "id": "Вопрос 1",
"description": "Сколько будет (2 * 2 * 2 + 2) * 2 * 2 + 2", "description": "Сколько будет (2 * 2 * 2 + 2) * 2 * 2 + 2",
"answer": "42", "answer": "42",

View File

@ -142,7 +142,7 @@ pub mod reader_sync {
where where
R: Read + Seek, R: Read + Seek,
{ {
type Item = (String, Result<SourceQuestionsBatch, serde_json::Error>); type Item = (String, Result<SourceQuestionsBatch, simd_json::Error>);
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
if self.index.is_none() && !self.zipfile.is_empty() { if self.index.is_none() && !self.zipfile.is_empty() {
@ -169,7 +169,7 @@ pub mod reader_sync {
let name = file.mangled_name(); let name = file.mangled_name();
let name_str = name.to_str().unwrap(); let name_str = name.to_str().unwrap();
let data: Result<SourceQuestionsBatch, _> = serde_json::from_reader(file); let data: Result<SourceQuestionsBatch, _> = simd_json::from_reader(file);
Some((String::from(name_str), data)) Some((String::from(name_str), data))
} }
@ -237,7 +237,7 @@ pub mod reader_sync {
zip_file zip_file
.start_file("test.json", options) .start_file("test.json", options)
.expect("zip start file"); .expect("zip start file");
let data = serde_json::to_vec(&batch).unwrap(); let data = simd_json::to_vec(&batch).unwrap();
let amount = zip_file.write(data.as_slice()).expect("write entry"); let amount = zip_file.write(data.as_slice()).expect("write entry");
assert_eq!(amount, data.len()); assert_eq!(amount, data.len());
zip_file.finish().expect("finish zip file"); zip_file.finish().expect("finish zip file");
@ -331,7 +331,7 @@ pub mod reader_async {
pub async fn get( pub async fn get(
&mut self, &mut self,
index: usize, index: usize,
) -> Result<(String, Result<SourceQuestionsBatch, serde_json::Error>), String> ) -> Result<(String, Result<SourceQuestionsBatch, simd_json::Error>), String>
where where
R: AsyncRead + AsyncSeek + Unpin, R: AsyncRead + AsyncSeek + Unpin,
{ {
@ -352,12 +352,12 @@ pub mod reader_async {
if let Err(error) = readed { if let Err(error) = readed {
return Err(format!("read_to_end: {error:?}")); return Err(format!("read_to_end: {error:?}"));
} }
let parsed: Result<SourceQuestionsBatch, _> = serde_json::from_slice(&data); let parsed: Result<SourceQuestionsBatch, _> = simd_json::from_slice(data.as_mut_slice());
Ok((filename, parsed)) Ok((filename, parsed))
} }
pub async fn get_next( pub async fn get_next(
&mut self, &mut self,
) -> Option<Result<(String, Result<SourceQuestionsBatch, serde_json::Error>), String>> ) -> Option<Result<(String, Result<SourceQuestionsBatch, simd_json::Error>), String>>
where where
R: AsyncRead + AsyncSeek + Unpin, R: AsyncRead + AsyncSeek + Unpin,
{ {
@ -376,7 +376,7 @@ pub mod reader_async {
} }
pub fn stream( pub fn stream(
&mut self, &mut self,
) -> impl Stream<Item = (String, Result<SourceQuestionsBatch, serde_json::Error>)> + '_ ) -> impl Stream<Item = (String, Result<SourceQuestionsBatch, simd_json::Error>)> + '_
{ {
stream! { stream! {
while let Some(Ok(item)) = self.get_next().await { while let Some(Ok(item)) = self.get_next().await {
@ -425,7 +425,7 @@ pub mod reader_async {
let entry = let entry =
ZipEntryBuilder::new("test.json".into(), async_zip::Compression::Zstd).build(); ZipEntryBuilder::new("test.json".into(), async_zip::Compression::Zstd).build();
zip_file zip_file
.write_entry_whole(entry, serde_json::to_vec(&batch).unwrap().as_slice()) .write_entry_whole(entry, simd_json::to_vec(&batch).unwrap().as_slice())
.await .await
.expect("write entry"); .expect("write entry");
zip_file.close().await.expect("close zip"); zip_file.close().await.expect("close zip");
@ -500,7 +500,7 @@ pub use reader_async::{ReadSourceQuestionsBatchesAsync, SourceQuestionsZipReader
mod test { mod test {
use super::*; use super::*;
use insta::assert_yaml_snapshot; use insta::assert_yaml_snapshot;
use serde_json::json; use simd_json::json;
pub fn sample_batch() -> SourceQuestionsBatch { pub fn sample_batch() -> SourceQuestionsBatch {
SourceQuestionsBatch { SourceQuestionsBatch {
@ -544,7 +544,7 @@ mod test {
} }
#[test] #[test]
fn test_batch_de() { fn test_batch_de() {
let batch_from_json: Result<SourceQuestionsBatch, _> = serde_json::from_value(json!({ let batch_from_json: Result<SourceQuestionsBatch, _> = simd_json::serde::from_owned_value(json!({
"Чемпионат": "Тестовый", "Чемпионат": "Тестовый",
"Дата": "00-000-2000", "Дата": "00-000-2000",
"Вопросы": [ "Вопросы": [