Compare commits

..

No commits in common. "simd" and "master" have entirely different histories.
simd ... master

10 changed files with 32 additions and 165 deletions

147
Cargo.lock generated
View File

@ -28,17 +28,6 @@ dependencies = [
"cpufeatures", "cpufeatures",
] ]
[[package]]
name = "ahash"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f"
dependencies = [
"cfg-if",
"once_cell",
"version_check",
]
[[package]] [[package]]
name = "aho-corasick" name = "aho-corasick"
version = "1.0.2" version = "1.0.2"
@ -329,6 +318,7 @@ dependencies = [
"rand", "rand",
"serde", "serde",
"serde_derive", "serde_derive",
"serde_json",
"tempfile", "tempfile",
"zip", "zip",
] ]
@ -348,6 +338,7 @@ dependencies = [
"rand", "rand",
"serde", "serde",
"serde_derive", "serde_derive",
"serde_json",
"tempfile", "tempfile",
"tokio", "tokio",
"tokio-stream", "tokio-stream",
@ -370,7 +361,7 @@ dependencies = [
"pin-project", "pin-project",
"serde", "serde",
"serde_derive", "serde_derive",
"simd-json", "serde_json",
"tempfile", "tempfile",
"tokio", "tokio",
"zip", "zip",
@ -721,15 +712,6 @@ dependencies = [
"miniz_oxide", "miniz_oxide",
] ]
[[package]]
name = "float-cmp"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4"
dependencies = [
"num-traits",
]
[[package]] [[package]]
name = "fmmap" name = "fmmap"
version = "0.3.2" version = "0.3.2"
@ -881,31 +863,12 @@ version = "1.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
[[package]]
name = "halfbrown"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5681137554ddff44396e5f149892c769d45301dd9aa19c51602a89ee214cb0ec"
dependencies = [
"hashbrown 0.13.2",
"serde",
]
[[package]] [[package]]
name = "hashbrown" name = "hashbrown"
version = "0.12.3" version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
[[package]]
name = "hashbrown"
version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e"
dependencies = [
"ahash",
]
[[package]] [[package]]
name = "heck" name = "heck"
version = "0.4.1" version = "0.4.1"
@ -943,7 +906,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
dependencies = [ dependencies = [
"autocfg", "autocfg",
"hashbrown 0.12.3", "hashbrown",
] ]
[[package]] [[package]]
@ -1045,70 +1008,6 @@ version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "lexical-core"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46"
dependencies = [
"lexical-parse-float",
"lexical-parse-integer",
"lexical-util",
"lexical-write-float",
"lexical-write-integer",
]
[[package]]
name = "lexical-parse-float"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f"
dependencies = [
"lexical-parse-integer",
"lexical-util",
"static_assertions",
]
[[package]]
name = "lexical-parse-integer"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9"
dependencies = [
"lexical-util",
"static_assertions",
]
[[package]]
name = "lexical-util"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc"
dependencies = [
"static_assertions",
]
[[package]]
name = "lexical-write-float"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862"
dependencies = [
"lexical-util",
"lexical-write-integer",
"static_assertions",
]
[[package]]
name = "lexical-write-integer"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446"
dependencies = [
"lexical-util",
"static_assertions",
]
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.147" version = "0.2.147"
@ -1590,26 +1489,6 @@ dependencies = [
"digest", "digest",
] ]
[[package]]
name = "simd-json"
version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de7f1293f0e4e11d52e588766fe9de8caa2857ff63809d40de83245452ca7c5c"
dependencies = [
"halfbrown",
"lexical-core",
"serde",
"serde_json",
"simdutf8",
"value-trait",
]
[[package]]
name = "simdutf8"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a"
[[package]] [[package]]
name = "similar" name = "similar"
version = "2.2.1" version = "2.2.1"
@ -1625,12 +1504,6 @@ dependencies = [
"autocfg", "autocfg",
] ]
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]] [[package]]
name = "strsim" name = "strsim"
version = "0.10.0" version = "0.10.0"
@ -1822,18 +1695,6 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "value-trait"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09a5b6c8ceb01263b969cac48d4a6705134d490ded13d889e52c0cfc80c6945e"
dependencies = [
"float-cmp",
"halfbrown",
"itoa",
"ryu",
]
[[package]] [[package]]
name = "version_check" name = "version_check"
version = "0.9.4" version = "0.9.4"

View File

@ -19,6 +19,7 @@ harness = false
[dependencies] [dependencies]
chgk_ledb_lib = {path = "../lib", features = ["sync", "source", "convert"]} chgk_ledb_lib = {path = "../lib", features = ["sync", "source", "convert"]}
serde_json="1.0"
zip="0.6" zip="0.6"
rand="0.8" rand="0.8"
clap = { version = "4.2.7", features = ["derive"] } clap = { version = "4.2.7", features = ["derive"] }

View File

@ -3,6 +3,7 @@ extern crate criterion;
extern crate bincode; extern crate bincode;
extern crate serde; extern crate serde;
extern crate serde_derive; extern crate serde_derive;
extern crate serde_json;
extern crate tempfile; extern crate tempfile;
use chgk_ledb_lib::db; use chgk_ledb_lib::db;

View File

@ -3,6 +3,7 @@ extern crate criterion;
extern crate bincode; extern crate bincode;
extern crate serde; extern crate serde;
extern crate serde_derive; extern crate serde_derive;
extern crate serde_json;
extern crate tempfile; extern crate tempfile;
use chgk_ledb_lib::db::{Reader, Writer, WriterOpts}; use chgk_ledb_lib::db::{Reader, Writer, WriterOpts};

View File

@ -1,3 +1,4 @@
extern crate serde_json;
use clap::{Parser, Subcommand}; use clap::{Parser, Subcommand};
use rand::seq::IteratorRandom; use rand::seq::IteratorRandom;

View File

@ -19,6 +19,7 @@ harness = false
[dependencies] [dependencies]
chgk_ledb_lib = {path = "../lib", features = ["async", "convert_async"]} chgk_ledb_lib = {path = "../lib", features = ["async", "convert_async"]}
serde_json="1.0"
async_zip = { version = "0.0.15", features = [ async_zip = { version = "0.0.15", features = [
"zstd", "zstd",
"tokio", "tokio",

View File

@ -1,3 +1,4 @@
extern crate serde_json;
use clap::{Parser, Subcommand}; use clap::{Parser, Subcommand};
use futures::{pin_mut, Future}; use futures::{pin_mut, Future};
use rand::distributions::Uniform; use rand::distributions::Uniform;

View File

@ -44,6 +44,7 @@ convert_async = [
[dependencies] [dependencies]
serde = "1.0" serde = "1.0"
serde_derive = "1.0" serde_derive = "1.0"
serde_json = "1.0"
bincode = "^2.0.0-rc.2" bincode = "^2.0.0-rc.2"
zip = { version = "0.6", optional = true } zip = { version = "0.6", optional = true }
async_zip = { version = "0.0.15" , features = [ async_zip = { version = "0.0.15" , features = [
@ -69,7 +70,6 @@ async-stream = { version = "0.3", optional = true }
zstd = { version = "^0.12", default-features = false, optional = true } zstd = { version = "^0.12", default-features = false, optional = true }
memmap = { version = "0.7.0", optional = true } memmap = { version = "0.7.0", optional = true }
pin-project = { version = "1.1.3", optional = true } pin-project = { version = "1.1.3", optional = true }
simd-json = "0.10.6"
[dev-dependencies] [dev-dependencies]
insta = { version = "1.31.0", features = ["yaml"] } insta = { version = "1.31.0", features = ["yaml"] }

View File

@ -142,7 +142,7 @@ pub mod convert {
impl<T> QuestionsConverter for T impl<T> QuestionsConverter for T
where where
T: Iterator<Item = (String, Result<SourceQuestionsBatch, simd_json::Error>)>, T: Iterator<Item = (String, Result<SourceQuestionsBatch, serde_json::Error>)>,
{ {
fn convert<'a>(&'a mut self) -> Box<dyn Iterator<Item = Question> + 'a> { fn convert<'a>(&'a mut self) -> Box<dyn Iterator<Item = Question> + 'a> {
let iter = self let iter = self
@ -169,7 +169,7 @@ pub mod convert {
fn test_convert() { fn test_convert() {
let mut source = iter::once(( let mut source = iter::once((
String::from("test.json"), String::from("test.json"),
Ok::<SourceQuestionsBatch, simd_json::Error>(sample_batch()), Ok::<SourceQuestionsBatch, serde_json::Error>(sample_batch()),
)); ));
let converted: Vec<_> = source.convert().collect(); let converted: Vec<_> = source.convert().collect();
assert_yaml_snapshot!(converted, @r#" assert_yaml_snapshot!(converted, @r#"
@ -207,7 +207,7 @@ pub mod convert_async {
pub struct QuestionsConverterAsync<T> pub struct QuestionsConverterAsync<T>
where where
T: Stream<Item = (String, Result<SourceQuestionsBatch, simd_json::Error>)> T: Stream<Item = (String, Result<SourceQuestionsBatch, serde_json::Error>)>
+ std::marker::Unpin, + std::marker::Unpin,
{ {
inner: T, inner: T,
@ -215,7 +215,7 @@ pub mod convert_async {
impl<T> From<T> for QuestionsConverterAsync<T> impl<T> From<T> for QuestionsConverterAsync<T>
where where
T: Stream<Item = (String, Result<SourceQuestionsBatch, simd_json::Error>)> T: Stream<Item = (String, Result<SourceQuestionsBatch, serde_json::Error>)>
+ std::marker::Unpin, + std::marker::Unpin,
{ {
fn from(inner: T) -> Self { fn from(inner: T) -> Self {
@ -225,7 +225,7 @@ pub mod convert_async {
pub trait QuestionsConverterAsyncForStream<T> pub trait QuestionsConverterAsyncForStream<T>
where where
T: Stream<Item = (String, Result<SourceQuestionsBatch, simd_json::Error>)> T: Stream<Item = (String, Result<SourceQuestionsBatch, serde_json::Error>)>
+ std::marker::Unpin, + std::marker::Unpin,
{ {
fn converter(&mut self) -> QuestionsConverterAsync<&mut T>; fn converter(&mut self) -> QuestionsConverterAsync<&mut T>;
@ -233,7 +233,7 @@ pub mod convert_async {
impl<T> QuestionsConverterAsyncForStream<T> for T impl<T> QuestionsConverterAsyncForStream<T> for T
where where
T: Stream<Item = (String, Result<SourceQuestionsBatch, simd_json::Error>)> T: Stream<Item = (String, Result<SourceQuestionsBatch, serde_json::Error>)>
+ std::marker::Unpin, + std::marker::Unpin,
{ {
fn converter(&mut self) -> QuestionsConverterAsync<&mut T> { fn converter(&mut self) -> QuestionsConverterAsync<&mut T> {
@ -243,7 +243,7 @@ pub mod convert_async {
impl<T> QuestionsConverterAsync<T> impl<T> QuestionsConverterAsync<T>
where where
T: Stream<Item = (String, Result<SourceQuestionsBatch, simd_json::Error>)> T: Stream<Item = (String, Result<SourceQuestionsBatch, serde_json::Error>)>
+ std::marker::Unpin, + std::marker::Unpin,
{ {
pub fn convert(self) -> impl Stream<Item = Question> { pub fn convert(self) -> impl Stream<Item = Question> {
@ -279,7 +279,7 @@ pub mod convert_async {
let source = futures::stream::once(async { let source = futures::stream::once(async {
( (
String::from("test.json"), String::from("test.json"),
Ok::<SourceQuestionsBatch, simd_json::Error>(sample_batch()), Ok::<SourceQuestionsBatch, serde_json::Error>(sample_batch()),
) )
}); });
@ -315,7 +315,7 @@ pub use convert_async::{QuestionsConverterAsync, QuestionsConverterAsyncForStrea
mod test { mod test {
use super::*; use super::*;
use insta::assert_yaml_snapshot; use insta::assert_yaml_snapshot;
use simd_json::json; use serde_json::json;
#[cfg(any(feature = "convert", feature = "convert_async"))] #[cfg(any(feature = "convert", feature = "convert_async"))]
pub mod convert_common { pub mod convert_common {
@ -373,7 +373,7 @@ mod test {
} }
#[test] #[test]
fn test_question_de() { fn test_question_de() {
let question_from_json: Result<Question, _> = simd_json::serde::from_owned_value(json!({ let question_from_json: Result<Question, _> = serde_json::from_value(json!({
"id": "Вопрос 1", "id": "Вопрос 1",
"description": "Сколько будет (2 * 2 * 2 + 2) * 2 * 2 + 2", "description": "Сколько будет (2 * 2 * 2 + 2) * 2 * 2 + 2",
"answer": "42", "answer": "42",

View File

@ -142,7 +142,7 @@ pub mod reader_sync {
where where
R: Read + Seek, R: Read + Seek,
{ {
type Item = (String, Result<SourceQuestionsBatch, simd_json::Error>); type Item = (String, Result<SourceQuestionsBatch, serde_json::Error>);
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
if self.index.is_none() && !self.zipfile.is_empty() { if self.index.is_none() && !self.zipfile.is_empty() {
@ -169,7 +169,7 @@ pub mod reader_sync {
let name = file.mangled_name(); let name = file.mangled_name();
let name_str = name.to_str().unwrap(); let name_str = name.to_str().unwrap();
let data: Result<SourceQuestionsBatch, _> = simd_json::from_reader(file); let data: Result<SourceQuestionsBatch, _> = serde_json::from_reader(file);
Some((String::from(name_str), data)) Some((String::from(name_str), data))
} }
@ -237,7 +237,7 @@ pub mod reader_sync {
zip_file zip_file
.start_file("test.json", options) .start_file("test.json", options)
.expect("zip start file"); .expect("zip start file");
let data = simd_json::to_vec(&batch).unwrap(); let data = serde_json::to_vec(&batch).unwrap();
let amount = zip_file.write(data.as_slice()).expect("write entry"); let amount = zip_file.write(data.as_slice()).expect("write entry");
assert_eq!(amount, data.len()); assert_eq!(amount, data.len());
zip_file.finish().expect("finish zip file"); zip_file.finish().expect("finish zip file");
@ -331,7 +331,7 @@ pub mod reader_async {
pub async fn get( pub async fn get(
&mut self, &mut self,
index: usize, index: usize,
) -> Result<(String, Result<SourceQuestionsBatch, simd_json::Error>), String> ) -> Result<(String, Result<SourceQuestionsBatch, serde_json::Error>), String>
where where
R: AsyncRead + AsyncSeek + Unpin, R: AsyncRead + AsyncSeek + Unpin,
{ {
@ -352,12 +352,12 @@ pub mod reader_async {
if let Err(error) = readed { if let Err(error) = readed {
return Err(format!("read_to_end: {error:?}")); return Err(format!("read_to_end: {error:?}"));
} }
let parsed: Result<SourceQuestionsBatch, _> = simd_json::from_slice(data.as_mut_slice()); let parsed: Result<SourceQuestionsBatch, _> = serde_json::from_slice(&data);
Ok((filename, parsed)) Ok((filename, parsed))
} }
pub async fn get_next( pub async fn get_next(
&mut self, &mut self,
) -> Option<Result<(String, Result<SourceQuestionsBatch, simd_json::Error>), String>> ) -> Option<Result<(String, Result<SourceQuestionsBatch, serde_json::Error>), String>>
where where
R: AsyncRead + AsyncSeek + Unpin, R: AsyncRead + AsyncSeek + Unpin,
{ {
@ -376,7 +376,7 @@ pub mod reader_async {
} }
pub fn stream( pub fn stream(
&mut self, &mut self,
) -> impl Stream<Item = (String, Result<SourceQuestionsBatch, simd_json::Error>)> + '_ ) -> impl Stream<Item = (String, Result<SourceQuestionsBatch, serde_json::Error>)> + '_
{ {
stream! { stream! {
while let Some(Ok(item)) = self.get_next().await { while let Some(Ok(item)) = self.get_next().await {
@ -425,7 +425,7 @@ pub mod reader_async {
let entry = let entry =
ZipEntryBuilder::new("test.json".into(), async_zip::Compression::Zstd).build(); ZipEntryBuilder::new("test.json".into(), async_zip::Compression::Zstd).build();
zip_file zip_file
.write_entry_whole(entry, simd_json::to_vec(&batch).unwrap().as_slice()) .write_entry_whole(entry, serde_json::to_vec(&batch).unwrap().as_slice())
.await .await
.expect("write entry"); .expect("write entry");
zip_file.close().await.expect("close zip"); zip_file.close().await.expect("close zip");
@ -500,7 +500,7 @@ pub use reader_async::{ReadSourceQuestionsBatchesAsync, SourceQuestionsZipReader
mod test { mod test {
use super::*; use super::*;
use insta::assert_yaml_snapshot; use insta::assert_yaml_snapshot;
use simd_json::json; use serde_json::json;
pub fn sample_batch() -> SourceQuestionsBatch { pub fn sample_batch() -> SourceQuestionsBatch {
SourceQuestionsBatch { SourceQuestionsBatch {
@ -544,7 +544,7 @@ mod test {
} }
#[test] #[test]
fn test_batch_de() { fn test_batch_de() {
let batch_from_json: Result<SourceQuestionsBatch, _> = simd_json::serde::from_owned_value(json!({ let batch_from_json: Result<SourceQuestionsBatch, _> = serde_json::from_value(json!({
"Чемпионат": "Тестовый", "Чемпионат": "Тестовый",
"Дата": "00-000-2000", "Дата": "00-000-2000",
"Вопросы": [ "Вопросы": [