From e1ef9cc3c3897af37a811efbda159e8bc2a75ed0 Mon Sep 17 00:00:00 2001 From: Dmitry Date: Wed, 23 Aug 2023 11:34:33 +0300 Subject: [PATCH] simd_json instead of serde_json --- Cargo.lock | 147 ++++++++++++++++++++++++++++++++++++++-- app/Cargo.toml | 1 - app/benches/db_bench.rs | 1 - app/src/main.rs | 1 - app_async/Cargo.toml | 1 - app_async/src/main.rs | 1 - lib/Cargo.toml | 4 +- lib/src/questions.rs | 20 +++--- lib/src/source.rs | 20 +++--- 9 files changed, 165 insertions(+), 31 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ec01a84..b2d90ce 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,6 +28,17 @@ dependencies = [ "cpufeatures", ] +[[package]] +name = "ahash" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" version = "1.0.2" @@ -318,7 +329,6 @@ dependencies = [ "rand", "serde", "serde_derive", - "serde_json", "tempfile", "zip", ] @@ -338,7 +348,6 @@ dependencies = [ "rand", "serde", "serde_derive", - "serde_json", "tempfile", "tokio", "tokio-stream", @@ -361,7 +370,7 @@ dependencies = [ "pin-project", "serde", "serde_derive", - "serde_json", + "simd-json", "tempfile", "tokio", "zip", @@ -712,6 +721,15 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "float-cmp" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" +dependencies = [ + "num-traits", +] + [[package]] name = "fmmap" version = "0.3.2" @@ -863,12 +881,31 @@ version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" +[[package]] +name = "halfbrown" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5681137554ddff44396e5f149892c769d45301dd9aa19c51602a89ee214cb0ec" +dependencies = [ + "hashbrown 0.13.2", + "serde", +] + [[package]] name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +[[package]] +name = "hashbrown" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +dependencies = [ + "ahash", +] + [[package]] name = "heck" version = "0.4.1" @@ -906,7 +943,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", - "hashbrown", + "hashbrown 0.12.3", ] [[package]] @@ -1008,6 +1045,70 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "lexical-core" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" +dependencies = [ + "lexical-util", + "static_assertions", +] + [[package]] name = "libc" version = "0.2.147" @@ -1489,6 +1590,26 @@ dependencies = [ "digest", ] +[[package]] +name = "simd-json" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de7f1293f0e4e11d52e588766fe9de8caa2857ff63809d40de83245452ca7c5c" +dependencies = [ + "halfbrown", + "lexical-core", + "serde", + "serde_json", + "simdutf8", + "value-trait", +] + +[[package]] +name = "simdutf8" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" + [[package]] name = "similar" version = "2.2.1" @@ -1504,6 +1625,12 @@ dependencies = [ "autocfg", ] +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.10.0" @@ -1695,6 +1822,18 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "value-trait" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09a5b6c8ceb01263b969cac48d4a6705134d490ded13d889e52c0cfc80c6945e" +dependencies = [ + "float-cmp", + "halfbrown", + "itoa", + "ryu", +] + [[package]] name = "version_check" version = "0.9.4" diff --git a/app/Cargo.toml b/app/Cargo.toml index ef32ab3..4f7dcf2 100644 --- a/app/Cargo.toml +++ b/app/Cargo.toml @@ -15,7 +15,6 @@ harness = false [dependencies] chgk_ledb_lib = {path = "../lib", features = ["sync", "source", "convert"]} -serde_json="1.0" zip="0.6" rand="0.8" clap = { version = "4.2.7", features = ["derive"] } diff --git a/app/benches/db_bench.rs b/app/benches/db_bench.rs index 8c49a84..bf4ac9f 100644 --- a/app/benches/db_bench.rs +++ b/app/benches/db_bench.rs @@ -3,7 +3,6 @@ extern crate criterion; extern crate bincode; extern crate serde; extern crate serde_derive; -extern crate serde_json; extern crate tempfile; use chgk_ledb_lib::db; diff --git a/app/src/main.rs b/app/src/main.rs index fbcb2e7..ca1f735 100644 --- a/app/src/main.rs +++ b/app/src/main.rs @@ -1,4 +1,3 @@ -extern crate serde_json; use clap::{Parser, Subcommand}; use rand::seq::IteratorRandom; diff --git a/app_async/Cargo.toml b/app_async/Cargo.toml index e974234..8f4bcc4 100644 --- a/app_async/Cargo.toml +++ b/app_async/Cargo.toml @@ -15,7 +15,6 @@ harness = false [dependencies] chgk_ledb_lib = {path = "../lib", features = ["async", "convert_async"]} -serde_json="1.0" async_zip = { version = "0.0.15", features = [ "zstd", "tokio", diff --git a/app_async/src/main.rs b/app_async/src/main.rs index 88e1256..c127da8 100644 --- a/app_async/src/main.rs +++ b/app_async/src/main.rs @@ -1,4 +1,3 @@ -extern crate serde_json; use clap::{Parser, Subcommand}; use futures::{pin_mut, Future}; use rand::distributions::Uniform; diff --git a/lib/Cargo.toml b/lib/Cargo.toml index a409f7c..8549879 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -44,10 +44,9 @@ convert_async = [ [dependencies] serde = "1.0" serde_derive = "1.0" -serde_json = "1.0" bincode = "^2.0.0-rc.2" zip = { version = "0.6", optional = true } -async_zip = { version = "0.0.15" , features = [ +async_zip = { version = "0.0.15", features = [ "zstd", "tokio", "tokio-fs", @@ -70,6 +69,7 @@ async-stream = { version = "0.3", optional = true } zstd = { version = "^0.12", default-features = false, optional = true } memmap = { version = "0.7.0", optional = true } pin-project = { version = "1.1.3", optional = true } +simd-json = "0.10.6" [dev-dependencies] insta = { version = "1.31.0", features = ["yaml"] } diff --git a/lib/src/questions.rs b/lib/src/questions.rs index 4bfc7b9..34668ae 100644 --- a/lib/src/questions.rs +++ b/lib/src/questions.rs @@ -142,7 +142,7 @@ pub mod convert { impl QuestionsConverter for T where - T: Iterator)>, + T: Iterator)>, { fn convert<'a>(&'a mut self) -> Box + 'a> { let iter = self @@ -169,7 +169,7 @@ pub mod convert { fn test_convert() { let mut source = iter::once(( String::from("test.json"), - Ok::(sample_batch()), + Ok::(sample_batch()), )); let converted: Vec<_> = source.convert().collect(); assert_yaml_snapshot!(converted, @r#" @@ -207,7 +207,7 @@ pub mod convert_async { pub struct QuestionsConverterAsync where - T: Stream)> + T: Stream)> + std::marker::Unpin, { inner: T, @@ -215,7 +215,7 @@ pub mod convert_async { impl From for QuestionsConverterAsync where - T: Stream)> + T: Stream)> + std::marker::Unpin, { fn from(inner: T) -> Self { @@ -225,7 +225,7 @@ pub mod convert_async { pub trait QuestionsConverterAsyncForStream where - T: Stream)> + T: Stream)> + std::marker::Unpin, { fn converter(&mut self) -> QuestionsConverterAsync<&mut T>; @@ -233,7 +233,7 @@ pub mod convert_async { impl QuestionsConverterAsyncForStream for T where - T: Stream)> + T: Stream)> + std::marker::Unpin, { fn converter(&mut self) -> QuestionsConverterAsync<&mut T> { @@ -243,7 +243,7 @@ pub mod convert_async { impl QuestionsConverterAsync where - T: Stream)> + T: Stream)> + std::marker::Unpin, { pub fn convert(self) -> impl Stream { @@ -279,7 +279,7 @@ pub mod convert_async { let source = futures::stream::once(async { ( String::from("test.json"), - Ok::(sample_batch()), + Ok::(sample_batch()), ) }); @@ -315,7 +315,7 @@ pub use convert_async::{QuestionsConverterAsync, QuestionsConverterAsyncForStrea mod test { use super::*; use insta::assert_yaml_snapshot; - use serde_json::json; + use simd_json::json; #[cfg(any(feature = "convert", feature = "convert_async"))] pub mod convert_common { @@ -373,7 +373,7 @@ mod test { } #[test] fn test_question_de() { - let question_from_json: Result = serde_json::from_value(json!({ + let question_from_json: Result = simd_json::serde::from_owned_value(json!({ "id": "Вопрос 1", "description": "Сколько будет (2 * 2 * 2 + 2) * 2 * 2 + 2", "answer": "42", diff --git a/lib/src/source.rs b/lib/src/source.rs index bb65bc1..d88cc91 100644 --- a/lib/src/source.rs +++ b/lib/src/source.rs @@ -142,7 +142,7 @@ pub mod reader_sync { where R: Read + Seek, { - type Item = (String, Result); + type Item = (String, Result); fn next(&mut self) -> Option { if self.index.is_none() && !self.zipfile.is_empty() { @@ -169,7 +169,7 @@ pub mod reader_sync { let name = file.mangled_name(); let name_str = name.to_str().unwrap(); - let data: Result = serde_json::from_reader(file); + let data: Result = simd_json::from_reader(file); Some((String::from(name_str), data)) } @@ -237,7 +237,7 @@ pub mod reader_sync { zip_file .start_file("test.json", options) .expect("zip start file"); - let data = serde_json::to_vec(&batch).unwrap(); + let data = simd_json::to_vec(&batch).unwrap(); let amount = zip_file.write(data.as_slice()).expect("write entry"); assert_eq!(amount, data.len()); zip_file.finish().expect("finish zip file"); @@ -331,7 +331,7 @@ pub mod reader_async { pub async fn get( &mut self, index: usize, - ) -> Result<(String, Result), String> + ) -> Result<(String, Result), String> where R: AsyncRead + AsyncSeek + Unpin, { @@ -352,12 +352,12 @@ pub mod reader_async { if let Err(error) = readed { return Err(format!("read_to_end: {error:?}")); } - let parsed: Result = serde_json::from_slice(&data); + let parsed: Result = simd_json::from_slice(data.as_mut_slice()); Ok((filename, parsed)) } pub async fn get_next( &mut self, - ) -> Option), String>> + ) -> Option), String>> where R: AsyncRead + AsyncSeek + Unpin, { @@ -376,7 +376,7 @@ pub mod reader_async { } pub fn stream( &mut self, - ) -> impl Stream)> + '_ + ) -> impl Stream)> + '_ { stream! { while let Some(Ok(item)) = self.get_next().await { @@ -425,7 +425,7 @@ pub mod reader_async { let entry = ZipEntryBuilder::new("test.json".into(), async_zip::Compression::Zstd).build(); zip_file - .write_entry_whole(entry, serde_json::to_vec(&batch).unwrap().as_slice()) + .write_entry_whole(entry, simd_json::to_vec(&batch).unwrap().as_slice()) .await .expect("write entry"); zip_file.close().await.expect("close zip"); @@ -500,7 +500,7 @@ pub use reader_async::{ReadSourceQuestionsBatchesAsync, SourceQuestionsZipReader mod test { use super::*; use insta::assert_yaml_snapshot; - use serde_json::json; + use simd_json::json; pub fn sample_batch() -> SourceQuestionsBatch { SourceQuestionsBatch { @@ -544,7 +544,7 @@ mod test { } #[test] fn test_batch_de() { - let batch_from_json: Result = serde_json::from_value(json!({ + let batch_from_json: Result = simd_json::serde::from_owned_value(json!({ "Чемпионат": "Тестовый", "Дата": "00-000-2000", "Вопросы": [