This commit is contained in:
Dmitry Belyaev 2022-09-12 15:59:35 +03:00
parent 9eca2a68df
commit 9cb95c435a
Signed by: b4tman
GPG Key ID: 41A00BF15EA7E5F3
5 changed files with 671 additions and 334 deletions

2
.gitignore vendored
View File

@ -1,4 +1,4 @@
/target /target
**/*.rs.bk **/*.rs.bk
baza.zip baza*.zip
/json/ /json/

712
Cargo.lock generated
View File

@ -9,15 +9,50 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]] [[package]]
name = "aes" name = "android_system_properties"
version = "0.7.5" version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e8b47f52ea9bae42228d07ec09eb676433d7c4ed1ebdf0f1d1c29ed446f1ab8" checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [ dependencies = [
"cfg-if", "libc",
"cipher", ]
"cpufeatures",
"opaque-debug", [[package]]
name = "async-compression"
version = "0.3.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "345fd392ab01f746c717b1357165b76f0b67a60192007b234058c9045fdcf695"
dependencies = [
"bzip2",
"flate2",
"futures-core",
"memchr 2.5.0",
"pin-project-lite",
"tokio",
"xz2",
"zstd",
"zstd-safe",
]
[[package]]
name = "async_io_utilities"
version = "0.1.3"
source = "git+https://github.com/Majored/rs-async-io-utilities#6661a0fb914ccc1f97a6acd446bc03e9547d64fc"
dependencies = [
"tokio",
]
[[package]]
name = "async_zip"
version = "0.0.8"
source = "git+https://github.com/Majored/rs-async-zip#0f4bf7f09d2fef1236924a6af45a5641392faf6b"
dependencies = [
"async-compression",
"async_io_utilities",
"chrono",
"crc32fast",
"thiserror",
"tokio",
] ]
[[package]] [[package]]
@ -27,25 +62,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]] [[package]]
name = "base64ct" name = "bitflags"
version = "1.0.1" version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a32fd6af2b5827bce66c29053ba0e7c42b9dcab01835835058558c10851a46b" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]] [[package]]
name = "block-buffer" name = "bumpalo"
version = "0.10.2" version = "3.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bf7fe51849ea569fd452f37822f606a5cabb684dc918707a0193fd4664ff324" checksum = "c1ad822118d20d2c234f427000d5acc36eabe1e29a348c89b63dd60b13f28e5d"
dependencies = [
"generic-array",
]
[[package]] [[package]]
name = "byteorder" name = "bytes"
version = "1.4.3" version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" checksum = "ec8a7b6a70fde80372154c65702f00a0f56f3e1c36abbc6c440484be248856db"
[[package]] [[package]]
name = "bzip2" name = "bzip2"
@ -87,36 +119,36 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
name = "chgk_txt2json" name = "chgk_txt2json"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"async_zip",
"encoding", "encoding",
"futures",
"json", "json",
"rayon", "rayon",
"textstream", "textstream",
"zip", "tokio",
"tokio-stream",
] ]
[[package]] [[package]]
name = "cipher" name = "chrono"
version = "0.3.0" version = "0.4.22"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ee52072ec15386f770805afd189a01c8841be8696bed250fa2f13c4c0d6dfb7" checksum = "bfd4d1b31faaa3a89d7934dbded3111da0d2ef28e3ebccdb4f0179f5929d1ef1"
dependencies = [ dependencies = [
"generic-array", "iana-time-zone",
"js-sys",
"num-integer",
"num-traits",
"time",
"wasm-bindgen",
"winapi",
] ]
[[package]] [[package]]
name = "constant_time_eq" name = "core-foundation-sys"
version = "0.1.5" version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc"
[[package]]
name = "cpufeatures"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59a6001667ab124aebae2a495118e11d30984c3a653e99d86d58971708cf5e4b"
dependencies = [
"libc",
]
[[package]] [[package]]
name = "crc32fast" name = "crc32fast"
@ -172,32 +204,11 @@ dependencies = [
"once_cell", "once_cell",
] ]
[[package]]
name = "crypto-common"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
dependencies = [
"generic-array",
"typenum",
]
[[package]]
name = "digest"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2fb860ca6fafa5552fb6d0e816a69c8e49f0908bf524e30a90d97c85892d506"
dependencies = [
"block-buffer",
"crypto-common",
"subtle",
]
[[package]] [[package]]
name = "either" name = "either"
version = "1.7.0" version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f107b87b6afc2a64fd13cac55fe06d6c8859f12d4b14cbcdd2c67d0976781be" checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797"
[[package]] [[package]]
name = "encoding" name = "encoding"
@ -274,13 +285,92 @@ dependencies = [
] ]
[[package]] [[package]]
name = "generic-array" name = "futures"
version = "0.14.6" version = "0.3.24"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" checksum = "7f21eda599937fba36daeb58a22e8f5cee2d14c4a17b5b7739c7c8e5e3b8230c"
dependencies = [ dependencies = [
"typenum", "futures-channel",
"version_check", "futures-core",
"futures-executor",
"futures-io",
"futures-sink",
"futures-task",
"futures-util",
]
[[package]]
name = "futures-channel"
version = "0.3.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30bdd20c28fadd505d0fd6712cdfcb0d4b5648baf45faef7f852afb2399bb050"
dependencies = [
"futures-core",
"futures-sink",
]
[[package]]
name = "futures-core"
version = "0.3.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e5aa3de05362c3fb88de6531e6296e85cde7739cccad4b9dfeeb7f6ebce56bf"
[[package]]
name = "futures-executor"
version = "0.3.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ff63c23854bee61b6e9cd331d523909f238fc7636290b96826e9cfa5faa00ab"
dependencies = [
"futures-core",
"futures-task",
"futures-util",
]
[[package]]
name = "futures-io"
version = "0.3.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbf4d2a7a308fd4578637c0b17c7e1c7ba127b8f6ba00b29f717e9655d85eb68"
[[package]]
name = "futures-macro"
version = "0.3.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42cd15d1c7456c04dbdf7e88bcd69760d74f3a798d6444e16974b505b0e62f17"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "futures-sink"
version = "0.3.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21b20ba5a92e727ba30e72834706623d94ac93a725410b6a6b6fbc1b07f7ba56"
[[package]]
name = "futures-task"
version = "0.3.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6508c467c73851293f390476d4491cf4d227dbabcd4170f3bb6044959b294f1"
[[package]]
name = "futures-util"
version = "0.3.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44fb6cb1be61cc1d2e43b262516aafcf63b241cffdb1d3fa115f91d9c7b09c90"
dependencies = [
"futures-channel",
"futures-core",
"futures-io",
"futures-macro",
"futures-sink",
"futures-task",
"memchr 2.5.0",
"pin-project-lite",
"pin-utils",
"slab",
] ]
[[package]] [[package]]
@ -293,20 +383,19 @@ dependencies = [
] ]
[[package]] [[package]]
name = "hmac" name = "iana-time-zone"
version = "0.12.1" version = "0.1.47"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" checksum = "4c495f162af0bf17656d0014a0eded5f3cd2f365fdd204548c2869db89359dc7"
dependencies = [ dependencies = [
"digest", "android_system_properties",
"core-foundation-sys",
"js-sys",
"once_cell",
"wasm-bindgen",
"winapi",
] ]
[[package]]
name = "itoa"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754"
[[package]] [[package]]
name = "jobserver" name = "jobserver"
version = "0.1.24" version = "0.1.24"
@ -316,6 +405,15 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "js-sys"
version = "0.3.59"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "258451ab10b34f8af53416d1fdab72c22e805f0c92a1136d59470ec0b11138b2"
dependencies = [
"wasm-bindgen",
]
[[package]] [[package]]
name = "json" name = "json"
version = "0.12.4" version = "0.12.4"
@ -324,9 +422,39 @@ checksum = "078e285eafdfb6c4b434e0d31e8cfcb5115b651496faca5749b88fafd4f23bfd"
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.131" version = "0.2.132"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04c3b4822ccebfa39c02fc03d1534441b22ead323fa0f48bb7ddd8e6ba076a40" checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5"
[[package]]
name = "lock_api"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f80bf5aacaf25cbfc8210d1cfb718f2bf3b11c4c54e5afe36c236853a8ec390"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "log"
version = "0.4.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
dependencies = [
"cfg-if",
]
[[package]]
name = "lzma-sys"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e06754c4acf47d49c727d5665ca9fb828851cda315ed3bd51edd148ef78a8772"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]] [[package]]
name = "memchr" name = "memchr"
@ -337,6 +465,12 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]] [[package]]
name = "memoffset" name = "memoffset"
version = "0.6.5" version = "0.6.5"
@ -348,13 +482,44 @@ dependencies = [
[[package]] [[package]]
name = "miniz_oxide" name = "miniz_oxide"
version = "0.5.3" version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f5c75688da582b8ffc1f1799e9db273f32133c49e048f614d22ec3256773ccc" checksum = "96590ba8f175222643a85693f33d26e9c8a015f599c216509b1a6894af675d34"
dependencies = [ dependencies = [
"adler", "adler",
] ]
[[package]]
name = "mio"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57ee1c23c7c63b0c9250c339ffdc69255f110b298b901b9f6c82547b7b87caaf"
dependencies = [
"libc",
"log",
"wasi 0.11.0+wasi-snapshot-preview1",
"windows-sys",
]
[[package]]
name = "num-integer"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9"
dependencies = [
"autocfg",
"num-traits",
]
[[package]]
name = "num-traits"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
dependencies = [
"autocfg",
]
[[package]] [[package]]
name = "num_cpus" name = "num_cpus"
version = "1.13.1" version = "1.13.1"
@ -365,50 +530,47 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "num_threads"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44"
dependencies = [
"libc",
]
[[package]] [[package]]
name = "once_cell" name = "once_cell"
version = "1.13.0" version = "1.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1" checksum = "2f7254b99e31cad77da24b08ebf628882739a608578bb1bcdfc1f9c21260d7c0"
[[package]] [[package]]
name = "opaque-debug" name = "parking_lot"
version = "0.3.0" version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
[[package]]
name = "password-hash"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d791538a6dcc1e7cb7fe6f6b58aca40e7f79403c45b2bc274008b5e647af1d8"
dependencies = [ dependencies = [
"base64ct", "lock_api",
"rand_core", "parking_lot_core",
"subtle",
] ]
[[package]] [[package]]
name = "pbkdf2" name = "parking_lot_core"
version = "0.10.1" version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "271779f35b581956db91a3e55737327a03aa051e90b1c47aeb189508533adfd7" checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929"
dependencies = [ dependencies = [
"digest", "cfg-if",
"hmac", "libc",
"password-hash", "redox_syscall",
"sha2", "smallvec",
"windows-sys",
] ]
[[package]]
name = "pin-project-lite"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116"
[[package]]
name = "pin-utils"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]] [[package]]
name = "pkg-config" name = "pkg-config"
version = "0.3.25" version = "0.3.25"
@ -416,10 +578,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae" checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae"
[[package]] [[package]]
name = "rand_core" name = "proc-macro2"
version = "0.6.3" version = "1.0.43"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
dependencies = [
"proc-macro2",
]
[[package]] [[package]]
name = "rayon" name = "rayon"
@ -445,6 +619,15 @@ dependencies = [
"num_cpus", "num_cpus",
] ]
[[package]]
name = "redox_syscall"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
dependencies = [
"bitflags",
]
[[package]] [[package]]
name = "scopeguard" name = "scopeguard"
version = "1.1.0" version = "1.1.0"
@ -452,32 +635,49 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]] [[package]]
name = "sha1" name = "signal-hook-registry"
version = "0.10.1" version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c77f4e7f65455545c2153c1253d25056825e77ee2533f0e41deb65a93a34852f" checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0"
dependencies = [ dependencies = [
"cfg-if", "libc",
"cpufeatures",
"digest",
] ]
[[package]] [[package]]
name = "sha2" name = "slab"
version = "0.10.2" version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55deaec60f81eefe3cce0dc50bda92d6d8e88f2a27df7c5033b42afeb1ed2676" checksum = "4614a76b2a8be0058caa9dbbaf66d988527d86d003c11a94fbd335d7661edcef"
dependencies = [ dependencies = [
"cfg-if", "autocfg",
"cpufeatures",
"digest",
] ]
[[package]] [[package]]
name = "subtle" name = "smallvec"
version = "2.4.1" version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1"
[[package]]
name = "socket2"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd"
dependencies = [
"libc",
"winapi",
]
[[package]]
name = "syn"
version = "1.0.99"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58dbef6ec655055e20b86b15a8cc6d439cca19b667537ac6a1369572d151ab13"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]] [[package]]
name = "textstream" name = "textstream"
@ -486,73 +686,243 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e7ed81b342f6566026755e7f4b7798810b1c159722e427d212ce72c2c58ffdaa" checksum = "e7ed81b342f6566026755e7f4b7798810b1c159722e427d212ce72c2c58ffdaa"
dependencies = [ dependencies = [
"encoding", "encoding",
"memchr", "memchr 1.0.2",
]
[[package]]
name = "thiserror"
version = "1.0.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c1b05ca9d106ba7d2e31a9dab4a64e7be2cce415321966ea3132c49a656e252"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8f2591983642de85c921015f3f070c665a197ed69e417af436115e3a1407487"
dependencies = [
"proc-macro2",
"quote",
"syn",
] ]
[[package]] [[package]]
name = "time" name = "time"
version = "0.3.13" version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db76ff9fa4b1458b3c7f077f3ff9887394058460d21e634355b273aaf11eea45" checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
dependencies = [ dependencies = [
"itoa",
"libc", "libc",
"num_threads", "wasi 0.10.0+wasi-snapshot-preview1",
"time-macros", "winapi",
] ]
[[package]] [[package]]
name = "time-macros" name = "tokio"
version = "0.2.4" version = "1.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42657b1a6f4d817cda8e7a0ace261fe0cc946cf3a80314390b22cc61ae080792" checksum = "89797afd69d206ccd11fb0ea560a44bbb87731d020670e79416d442919257d42"
[[package]]
name = "typenum"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "zip"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf225bcf73bb52cbb496e70475c7bd7a3f769df699c0020f6c7bd9a96dcf0b8d"
dependencies = [ dependencies = [
"aes", "autocfg",
"byteorder", "bytes",
"bzip2", "libc",
"constant_time_eq", "memchr 2.5.0",
"crc32fast", "mio",
"crossbeam-utils", "num_cpus",
"flate2", "once_cell",
"hmac", "parking_lot",
"pbkdf2", "pin-project-lite",
"sha1", "signal-hook-registry",
"time", "socket2",
"zstd", "tokio-macros",
"winapi",
]
[[package]]
name = "tokio-macros"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9724f9a975fb987ef7a3cd9be0350edcbe130698af5b8f7a631e23d42d052484"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "tokio-stream"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df54d54117d6fdc4e4fea40fe1e4e566b3505700e148a6827e59b34b0d2600d9"
dependencies = [
"futures-core",
"pin-project-lite",
"tokio",
]
[[package]]
name = "unicode-ident"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf"
[[package]]
name = "wasi"
version = "0.10.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "wasm-bindgen"
version = "0.2.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc7652e3f6c4706c8d9cd54832c4a4ccb9b5336e2c3bd154d5cccfbf1c1f5f7d"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "662cd44805586bd52971b9586b1df85cdbbd9112e4ef4d8f41559c334dc6ac3f"
dependencies = [
"bumpalo",
"log",
"once_cell",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b260f13d3012071dfb1512849c033b1925038373aea48ced3012c09df952c602"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5be8e654bdd9b79216c2929ab90721aa82faf65c48cdf08bdc4e7f51357b80da"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6598dd0bd3c7d51095ff6531a5b23e02acdc81804e30d8f07afb77b7215a140a"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-sys"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2"
dependencies = [
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47"
[[package]]
name = "windows_i686_gnu"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6"
[[package]]
name = "windows_i686_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024"
[[package]]
name = "windows_x86_64_gnu"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1"
[[package]]
name = "windows_x86_64_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"
[[package]]
name = "xz2"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
dependencies = [
"lzma-sys",
] ]
[[package]] [[package]]
name = "zstd" name = "zstd"
version = "0.10.2+zstd.1.5.2" version = "0.11.2+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f4a6bd64f22b5e3e94b4e238669ff9f10815c27a5180108b849d24174a83847" checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4"
dependencies = [ dependencies = [
"zstd-safe", "zstd-safe",
] ]
[[package]] [[package]]
name = "zstd-safe" name = "zstd-safe"
version = "4.1.6+zstd.1.5.2" version = "5.0.2+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94b61c51bb270702d6167b8ce67340d2754b088d0c091b06e593aa772c3ee9bb" checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db"
dependencies = [ dependencies = [
"libc", "libc",
"zstd-sys", "zstd-sys",
@ -560,9 +930,9 @@ dependencies = [
[[package]] [[package]]
name = "zstd-sys" name = "zstd-sys"
version = "1.6.3+zstd.1.5.2" version = "2.0.1+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc49afa5c8d634e75761feda8c592051e7eeb4683ba827211eb0d731d3402ea8" checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b"
dependencies = [ dependencies = [
"cc", "cc",
"libc", "libc",

View File

@ -7,11 +7,14 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
zip = "0.6"
encoding = "0.2" encoding = "0.2"
textstream = "0.1" textstream = "0.1"
json="0.12" json="0.12"
rayon="1.5" rayon="1.5"
tokio = { version = "1.21.0", features = ["full"] }
async_zip = { git = "https://github.com/Majored/rs-async-zip" }
tokio-stream = { version = "0.1.9", features = ["fs"] }
futures = "0.3.24"
[profile.release] [profile.release]
opt-level = 3 opt-level = 3

7
recode.ps1 Normal file
View File

@ -0,0 +1,7 @@
mkdir .\baza_utf8
Get-ChildItem baza | ForEach-Object -Process {
$n = $_.Name
get-content $_ -Encoding KOI8-R | Out-File ".\baza_utf8\$n" -Encoding utf8
}
# $c=0; $t=$(ls .\baza).Count; while ($t -gt $c) { $c = $(ls .\baza_utf8).Count ; $r = ($t-$c) ; echo $r; sleep 2 }

View File

@ -1,18 +1,16 @@
extern crate encoding; extern crate async_zip;
extern crate futures;
extern crate json; extern crate json;
extern crate rayon; extern crate tokio;
extern crate textstream;
extern crate zip;
use encoding::all::KOI8_R; use async_zip::read::fs::ZipFileReader;
use encoding::DecoderTrap; use futures::future;
use rayon::prelude::*;
use std::path::PathBuf; use std::path::PathBuf;
use std::str::FromStr; use std::str::FromStr;
use std::{fs, io}; use tokio::fs;
use textstream::TextReader; use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader};
const BASE_FILENAME: &str = "baza.zip"; const BASE_FILENAME: &str = "baza_utf8.zip";
const OUTPUT_PATH: &str = "json"; const OUTPUT_PATH: &str = "json";
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
@ -109,9 +107,11 @@ impl FromStr for KeywordType {
} }
} }
fn parse_file(file: impl io::Read) -> Result<json::JsonValue, Box<dyn std::error::Error>> { async fn parse_file(
let buf = io::BufReader::new(file); entry_reader: impl AsyncReadExt + Unpin,
let reader = TextReader::new(buf, KOI8_R, DecoderTrap::Ignore); ) -> Result<json::JsonValue, Box<dyn std::error::Error>> {
let buf_reader = BufReader::new(entry_reader);
let mut lines = buf_reader.lines();
let patterns = vec![ let patterns = vec![
"Чемпионат:", "Чемпионат:",
@ -141,11 +141,13 @@ fn parse_file(file: impl io::Read) -> Result<json::JsonValue, Box<dyn std::error
let mut context = Context::new(); let mut context = Context::new();
let mut ctx = &mut context; let mut ctx = &mut context;
reader while let Some(line_r) = lines.next_line().await? {
.lines() let line = line_r.trim();
.map(|line| String::from(line.unwrap().trim())) if line.is_empty() {
.filter(|line| !line.is_empty()) // ignore empty lines continue;
.for_each(|line| { }
let line = line.to_string();
match patterns match patterns
.iter() // find keyword .iter() // find keyword
.find(|&&pattern| line.starts_with(pattern) && line.ends_with(':')) .find(|&&pattern| line.starts_with(pattern) && line.ends_with(':'))
@ -206,7 +208,7 @@ fn parse_file(file: impl io::Read) -> Result<json::JsonValue, Box<dyn std::error
ctx.cur_content.push(line); ctx.cur_content.push(line);
} }
} }
}); }
// finish reading last question // finish reading last question
if ctx.have_new_question && !ctx.cur_content.is_empty() { if ctx.have_new_question && !ctx.cur_content.is_empty() {
@ -219,52 +221,9 @@ fn parse_file(file: impl io::Read) -> Result<json::JsonValue, Box<dyn std::error
Ok(ctx.data.clone()) Ok(ctx.data.clone())
} }
// split vector to a vector of [num] slices async fn process_file(index: usize, name: String) -> Result<(), Box<dyn std::error::Error>> {
trait SplitTo<T> { let archive = ZipFileReader::new(String::from(BASE_FILENAME)).await?;
fn split_to(&self, num: usize) -> Vec<&[T]>; let entry_reader = archive.entry_reader(index).await?;
}
impl<T> SplitTo<T> for Vec<T> {
fn split_to(&self, num: usize) -> Vec<&[T]> {
let part_len = self.len() / num;
let add_len = self.len() % num;
let mut result = Vec::<&[T]>::with_capacity(num);
if 0 == part_len {
result.push(self);
return result;
}
for i in 0..num {
let size = if (num - 1) == i {
part_len + add_len
} else {
part_len
};
let start = part_len * i;
result.push(&self[start..(start + size)]);
}
result
}
}
fn process_files(files: &&[PathBuf]) {
if files.is_empty() {
return;
}
let start_file = files[0].to_str().unwrap();
println!("-> start from \"{}\" ({} files)", start_file, files.len());
let zip_file = fs::File::open(BASE_FILENAME).unwrap();
let zip_reader = io::BufReader::new(zip_file);
let mut archive = zip::ZipArchive::new(zip_reader).unwrap();
files.iter().for_each(|name| {
let name_str = name.to_str().unwrap();
// parse txt file
let file = archive.by_name(name_str).unwrap();
let data = parse_file(file).unwrap();
// make output filename // make output filename
let mut outfilename = PathBuf::from(OUTPUT_PATH); let mut outfilename = PathBuf::from(OUTPUT_PATH);
@ -272,53 +231,51 @@ fn process_files(files: &&[PathBuf]) {
outfilename.set_extension("json"); outfilename.set_extension("json");
// save json to file // save json to file
let mut outfile = fs::File::create(outfilename).unwrap(); let new_data = parse_file(entry_reader).await?;
data.write_pretty(&mut outfile, 1).unwrap(); let data_str = new_data.pretty(2);
});
println!("<- done {} files (from \"{}\")", files.len(), start_file); let mut outfile = fs::File::create(outfilename).await?;
outfile.write_all(data_str.as_bytes()).await?;
Ok(())
} }
fn main() -> Result<(), Box<dyn std::error::Error>> { #[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// open archive just to list files // open archive just to list files
let zip_file = fs::File::open(BASE_FILENAME)?; let archive = ZipFileReader::new(String::from(BASE_FILENAME)).await?;
let zip_reader = io::BufReader::new(zip_file);
let mut archive = zip::ZipArchive::new(zip_reader)?;
let source_files: Vec<PathBuf> = (0..archive.len()) let source_files: Vec<(usize, String)> = archive
.map(|i| archive.by_index(i).unwrap().mangled_name()) .entries()
.filter(|name| { .iter()
.enumerate()
.filter(|item| !item.1.dir())
.filter(|item| {
// skip files without "txt" extension // skip files without "txt" extension
match name.extension() { item.1.name().ends_with(".txt")
Some(ext) => match ext.to_str() {
Some(ext_str) => ext_str.eq_ignore_ascii_case("txt"),
_ => false, // extension is not valid unicode or not txt
},
_ => false, // no extension in filename
}
}) })
.map(|item| (item.0, item.1.name().to_string()))
.collect(); .collect();
drop(archive); drop(archive);
// check output directory // check output directory
let out_dir: PathBuf = OUTPUT_PATH.into(); match fs::metadata(OUTPUT_PATH).await {
if out_dir.is_file() { Err(_) => fs::create_dir_all(OUTPUT_PATH).await?,
return Err("output directory is file!".into()); Ok(x) if x.is_file() => return Err("output directory is file!".into()),
} else if !out_dir.exists() { _ => (),
fs::create_dir_all(out_dir)?;
}; };
println!( println!("processing {} files ...", source_files.len());
"processing {} files with {} threads...",
source_files.len(),
rayon::current_num_threads()
);
// split vector and process its parts in parallel let mut handles = vec![];
source_files for index in source_files {
.split_to(rayon::current_num_threads()) let i = index.clone();
.par_iter() let handle = tokio::spawn(async move {
.for_each(process_files); process_file(i.0, i.1).await.unwrap();
});
handles.push(handle);
}
future::join_all(handles).await;
println!("done"); println!("done");
Ok(()) Ok(())