async v1
This commit is contained in:
parent
9eca2a68df
commit
9cb95c435a
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,4 +1,4 @@
|
||||
/target
|
||||
**/*.rs.bk
|
||||
baza.zip
|
||||
baza*.zip
|
||||
/json/
|
||||
|
712
Cargo.lock
generated
712
Cargo.lock
generated
@ -9,15 +9,50 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
|
||||
|
||||
[[package]]
|
||||
name = "aes"
|
||||
version = "0.7.5"
|
||||
name = "android_system_properties"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9e8b47f52ea9bae42228d07ec09eb676433d7c4ed1ebdf0f1d1c29ed446f1ab8"
|
||||
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"cipher",
|
||||
"cpufeatures",
|
||||
"opaque-debug",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-compression"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "345fd392ab01f746c717b1357165b76f0b67a60192007b234058c9045fdcf695"
|
||||
dependencies = [
|
||||
"bzip2",
|
||||
"flate2",
|
||||
"futures-core",
|
||||
"memchr 2.5.0",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
"xz2",
|
||||
"zstd",
|
||||
"zstd-safe",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async_io_utilities"
|
||||
version = "0.1.3"
|
||||
source = "git+https://github.com/Majored/rs-async-io-utilities#6661a0fb914ccc1f97a6acd446bc03e9547d64fc"
|
||||
dependencies = [
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async_zip"
|
||||
version = "0.0.8"
|
||||
source = "git+https://github.com/Majored/rs-async-zip#0f4bf7f09d2fef1236924a6af45a5641392faf6b"
|
||||
dependencies = [
|
||||
"async-compression",
|
||||
"async_io_utilities",
|
||||
"chrono",
|
||||
"crc32fast",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -27,25 +62,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "base64ct"
|
||||
version = "1.0.1"
|
||||
name = "bitflags"
|
||||
version = "1.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a32fd6af2b5827bce66c29053ba0e7c42b9dcab01835835058558c10851a46b"
|
||||
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
||||
|
||||
[[package]]
|
||||
name = "block-buffer"
|
||||
version = "0.10.2"
|
||||
name = "bumpalo"
|
||||
version = "3.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0bf7fe51849ea569fd452f37822f606a5cabb684dc918707a0193fd4664ff324"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
checksum = "c1ad822118d20d2c234f427000d5acc36eabe1e29a348c89b63dd60b13f28e5d"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.4.3"
|
||||
name = "bytes"
|
||||
version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
|
||||
checksum = "ec8a7b6a70fde80372154c65702f00a0f56f3e1c36abbc6c440484be248856db"
|
||||
|
||||
[[package]]
|
||||
name = "bzip2"
|
||||
@ -87,36 +119,36 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
name = "chgk_txt2json"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"async_zip",
|
||||
"encoding",
|
||||
"futures",
|
||||
"json",
|
||||
"rayon",
|
||||
"textstream",
|
||||
"zip",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cipher"
|
||||
version = "0.3.0"
|
||||
name = "chrono"
|
||||
version = "0.4.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7ee52072ec15386f770805afd189a01c8841be8696bed250fa2f13c4c0d6dfb7"
|
||||
checksum = "bfd4d1b31faaa3a89d7934dbded3111da0d2ef28e3ebccdb4f0179f5929d1ef1"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
"iana-time-zone",
|
||||
"js-sys",
|
||||
"num-integer",
|
||||
"num-traits",
|
||||
"time",
|
||||
"wasm-bindgen",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "constant_time_eq"
|
||||
version = "0.1.5"
|
||||
name = "core-foundation-sys"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
|
||||
|
||||
[[package]]
|
||||
name = "cpufeatures"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59a6001667ab124aebae2a495118e11d30984c3a653e99d86d58971708cf5e4b"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc"
|
||||
|
||||
[[package]]
|
||||
name = "crc32fast"
|
||||
@ -172,32 +204,11 @@ dependencies = [
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crypto-common"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
"typenum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.10.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f2fb860ca6fafa5552fb6d0e816a69c8e49f0908bf524e30a90d97c85892d506"
|
||||
dependencies = [
|
||||
"block-buffer",
|
||||
"crypto-common",
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.7.0"
|
||||
version = "1.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f107b87b6afc2a64fd13cac55fe06d6c8859f12d4b14cbcdd2c67d0976781be"
|
||||
checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797"
|
||||
|
||||
[[package]]
|
||||
name = "encoding"
|
||||
@ -274,13 +285,92 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "generic-array"
|
||||
version = "0.14.6"
|
||||
name = "futures"
|
||||
version = "0.3.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9"
|
||||
checksum = "7f21eda599937fba36daeb58a22e8f5cee2d14c4a17b5b7739c7c8e5e3b8230c"
|
||||
dependencies = [
|
||||
"typenum",
|
||||
"version_check",
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-executor",
|
||||
"futures-io",
|
||||
"futures-sink",
|
||||
"futures-task",
|
||||
"futures-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-channel"
|
||||
version = "0.3.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30bdd20c28fadd505d0fd6712cdfcb0d4b5648baf45faef7f852afb2399bb050"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-core"
|
||||
version = "0.3.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4e5aa3de05362c3fb88de6531e6296e85cde7739cccad4b9dfeeb7f6ebce56bf"
|
||||
|
||||
[[package]]
|
||||
name = "futures-executor"
|
||||
version = "0.3.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9ff63c23854bee61b6e9cd331d523909f238fc7636290b96826e9cfa5faa00ab"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-task",
|
||||
"futures-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-io"
|
||||
version = "0.3.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbf4d2a7a308fd4578637c0b17c7e1c7ba127b8f6ba00b29f717e9655d85eb68"
|
||||
|
||||
[[package]]
|
||||
name = "futures-macro"
|
||||
version = "0.3.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42cd15d1c7456c04dbdf7e88bcd69760d74f3a798d6444e16974b505b0e62f17"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-sink"
|
||||
version = "0.3.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "21b20ba5a92e727ba30e72834706623d94ac93a725410b6a6b6fbc1b07f7ba56"
|
||||
|
||||
[[package]]
|
||||
name = "futures-task"
|
||||
version = "0.3.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a6508c467c73851293f390476d4491cf4d227dbabcd4170f3bb6044959b294f1"
|
||||
|
||||
[[package]]
|
||||
name = "futures-util"
|
||||
version = "0.3.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "44fb6cb1be61cc1d2e43b262516aafcf63b241cffdb1d3fa115f91d9c7b09c90"
|
||||
dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-io",
|
||||
"futures-macro",
|
||||
"futures-sink",
|
||||
"futures-task",
|
||||
"memchr 2.5.0",
|
||||
"pin-project-lite",
|
||||
"pin-utils",
|
||||
"slab",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -293,20 +383,19 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hmac"
|
||||
version = "0.12.1"
|
||||
name = "iana-time-zone"
|
||||
version = "0.1.47"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
|
||||
checksum = "4c495f162af0bf17656d0014a0eded5f3cd2f365fdd204548c2869db89359dc7"
|
||||
dependencies = [
|
||||
"digest",
|
||||
"android_system_properties",
|
||||
"core-foundation-sys",
|
||||
"js-sys",
|
||||
"once_cell",
|
||||
"wasm-bindgen",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754"
|
||||
|
||||
[[package]]
|
||||
name = "jobserver"
|
||||
version = "0.1.24"
|
||||
@ -316,6 +405,15 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.59"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "258451ab10b34f8af53416d1fdab72c22e805f0c92a1136d59470ec0b11138b2"
|
||||
dependencies = [
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "json"
|
||||
version = "0.12.4"
|
||||
@ -324,9 +422,39 @@ checksum = "078e285eafdfb6c4b434e0d31e8cfcb5115b651496faca5749b88fafd4f23bfd"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.131"
|
||||
version = "0.2.132"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "04c3b4822ccebfa39c02fc03d1534441b22ead323fa0f48bb7ddd8e6ba076a40"
|
||||
checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5"
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.4.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9f80bf5aacaf25cbfc8210d1cfb718f2bf3b11c4c54e5afe36c236853a8ec390"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"scopeguard",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lzma-sys"
|
||||
version = "0.1.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e06754c4acf47d49c727d5665ca9fb828851cda315ed3bd51edd148ef78a8772"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
@ -337,6 +465,12 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
|
||||
|
||||
[[package]]
|
||||
name = "memoffset"
|
||||
version = "0.6.5"
|
||||
@ -348,13 +482,44 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.5.3"
|
||||
version = "0.5.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6f5c75688da582b8ffc1f1799e9db273f32133c49e048f614d22ec3256773ccc"
|
||||
checksum = "96590ba8f175222643a85693f33d26e9c8a015f599c216509b1a6894af675d34"
|
||||
dependencies = [
|
||||
"adler",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mio"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "57ee1c23c7c63b0c9250c339ffdc69255f110b298b901b9f6c82547b7b87caaf"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
"wasi 0.11.0+wasi-snapshot-preview1",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-integer"
|
||||
version = "0.1.45"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num_cpus"
|
||||
version = "1.13.1"
|
||||
@ -365,50 +530,47 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num_threads"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.13.0"
|
||||
version = "1.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1"
|
||||
checksum = "2f7254b99e31cad77da24b08ebf628882739a608578bb1bcdfc1f9c21260d7c0"
|
||||
|
||||
[[package]]
|
||||
name = "opaque-debug"
|
||||
version = "0.3.0"
|
||||
name = "parking_lot"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5"
|
||||
|
||||
[[package]]
|
||||
name = "password-hash"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1d791538a6dcc1e7cb7fe6f6b58aca40e7f79403c45b2bc274008b5e647af1d8"
|
||||
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
|
||||
dependencies = [
|
||||
"base64ct",
|
||||
"rand_core",
|
||||
"subtle",
|
||||
"lock_api",
|
||||
"parking_lot_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pbkdf2"
|
||||
version = "0.10.1"
|
||||
name = "parking_lot_core"
|
||||
version = "0.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "271779f35b581956db91a3e55737327a03aa051e90b1c47aeb189508533adfd7"
|
||||
checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929"
|
||||
dependencies = [
|
||||
"digest",
|
||||
"hmac",
|
||||
"password-hash",
|
||||
"sha2",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"redox_syscall",
|
||||
"smallvec",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project-lite"
|
||||
version = "0.2.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116"
|
||||
|
||||
[[package]]
|
||||
name = "pin-utils"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.25"
|
||||
@ -416,10 +578,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae"
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.6.3"
|
||||
name = "proc-macro2"
|
||||
version = "1.0.43"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
|
||||
checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
@ -445,6 +619,15 @@ dependencies = [
|
||||
"num_cpus",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.1.0"
|
||||
@ -452,32 +635,49 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
|
||||
|
||||
[[package]]
|
||||
name = "sha1"
|
||||
version = "0.10.1"
|
||||
name = "signal-hook-registry"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c77f4e7f65455545c2153c1253d25056825e77ee2533f0e41deb65a93a34852f"
|
||||
checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
"digest",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha2"
|
||||
version = "0.10.2"
|
||||
name = "slab"
|
||||
version = "0.4.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "55deaec60f81eefe3cce0dc50bda92d6d8e88f2a27df7c5033b42afeb1ed2676"
|
||||
checksum = "4614a76b2a8be0058caa9dbbaf66d988527d86d003c11a94fbd335d7661edcef"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
"digest",
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "subtle"
|
||||
version = "2.4.1"
|
||||
name = "smallvec"
|
||||
version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601"
|
||||
checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1"
|
||||
|
||||
[[package]]
|
||||
name = "socket2"
|
||||
version = "0.4.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.99"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "58dbef6ec655055e20b86b15a8cc6d439cca19b667537ac6a1369572d151ab13"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "textstream"
|
||||
@ -486,73 +686,243 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e7ed81b342f6566026755e7f4b7798810b1c159722e427d212ce72c2c58ffdaa"
|
||||
dependencies = [
|
||||
"encoding",
|
||||
"memchr",
|
||||
"memchr 1.0.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.34"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c1b05ca9d106ba7d2e31a9dab4a64e7be2cce415321966ea3132c49a656e252"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.34"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e8f2591983642de85c921015f3f070c665a197ed69e417af436115e3a1407487"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.3.13"
|
||||
version = "0.1.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "db76ff9fa4b1458b3c7f077f3ff9887394058460d21e634355b273aaf11eea45"
|
||||
checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"libc",
|
||||
"num_threads",
|
||||
"time-macros",
|
||||
"wasi 0.10.0+wasi-snapshot-preview1",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time-macros"
|
||||
version = "0.2.4"
|
||||
name = "tokio"
|
||||
version = "1.21.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42657b1a6f4d817cda8e7a0ace261fe0cc946cf3a80314390b22cc61ae080792"
|
||||
|
||||
[[package]]
|
||||
name = "typenum"
|
||||
version = "1.15.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||
|
||||
[[package]]
|
||||
name = "zip"
|
||||
version = "0.6.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf225bcf73bb52cbb496e70475c7bd7a3f769df699c0020f6c7bd9a96dcf0b8d"
|
||||
checksum = "89797afd69d206ccd11fb0ea560a44bbb87731d020670e79416d442919257d42"
|
||||
dependencies = [
|
||||
"aes",
|
||||
"byteorder",
|
||||
"bzip2",
|
||||
"constant_time_eq",
|
||||
"crc32fast",
|
||||
"crossbeam-utils",
|
||||
"flate2",
|
||||
"hmac",
|
||||
"pbkdf2",
|
||||
"sha1",
|
||||
"time",
|
||||
"zstd",
|
||||
"autocfg",
|
||||
"bytes",
|
||||
"libc",
|
||||
"memchr 2.5.0",
|
||||
"mio",
|
||||
"num_cpus",
|
||||
"once_cell",
|
||||
"parking_lot",
|
||||
"pin-project-lite",
|
||||
"signal-hook-registry",
|
||||
"socket2",
|
||||
"tokio-macros",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-macros"
|
||||
version = "1.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9724f9a975fb987ef7a3cd9be0350edcbe130698af5b8f7a631e23d42d052484"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-stream"
|
||||
version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df54d54117d6fdc4e4fea40fe1e4e566b3505700e148a6827e59b34b0d2600d9"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.10.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.11.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen"
|
||||
version = "0.2.82"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fc7652e3f6c4706c8d9cd54832c4a4ccb9b5336e2c3bd154d5cccfbf1c1f5f7d"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"wasm-bindgen-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-backend"
|
||||
version = "0.2.82"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "662cd44805586bd52971b9586b1df85cdbbd9112e4ef4d8f41559c334dc6ac3f"
|
||||
dependencies = [
|
||||
"bumpalo",
|
||||
"log",
|
||||
"once_cell",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro"
|
||||
version = "0.2.82"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b260f13d3012071dfb1512849c033b1925038373aea48ced3012c09df952c602"
|
||||
dependencies = [
|
||||
"quote",
|
||||
"wasm-bindgen-macro-support",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro-support"
|
||||
version = "0.2.82"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5be8e654bdd9b79216c2929ab90721aa82faf65c48cdf08bdc4e7f51357b80da"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"wasm-bindgen-backend",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-shared"
|
||||
version = "0.2.82"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6598dd0bd3c7d51095ff6531a5b23e02acdc81804e30d8f07afb77b7215a140a"
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu",
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.36.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2"
|
||||
dependencies = [
|
||||
"windows_aarch64_msvc",
|
||||
"windows_i686_gnu",
|
||||
"windows_i686_msvc",
|
||||
"windows_x86_64_gnu",
|
||||
"windows_x86_64_msvc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.36.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.36.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.36.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.36.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.36.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"
|
||||
|
||||
[[package]]
|
||||
name = "xz2"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
|
||||
dependencies = [
|
||||
"lzma-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd"
|
||||
version = "0.10.2+zstd.1.5.2"
|
||||
version = "0.11.2+zstd.1.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5f4a6bd64f22b5e3e94b4e238669ff9f10815c27a5180108b849d24174a83847"
|
||||
checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4"
|
||||
dependencies = [
|
||||
"zstd-safe",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd-safe"
|
||||
version = "4.1.6+zstd.1.5.2"
|
||||
version = "5.0.2+zstd.1.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94b61c51bb270702d6167b8ce67340d2754b088d0c091b06e593aa772c3ee9bb"
|
||||
checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"zstd-sys",
|
||||
@ -560,9 +930,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "zstd-sys"
|
||||
version = "1.6.3+zstd.1.5.2"
|
||||
version = "2.0.1+zstd.1.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fc49afa5c8d634e75761feda8c592051e7eeb4683ba827211eb0d731d3402ea8"
|
||||
checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
|
@ -7,11 +7,14 @@ edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
zip = "0.6"
|
||||
encoding = "0.2"
|
||||
textstream = "0.1"
|
||||
json="0.12"
|
||||
rayon="1.5"
|
||||
tokio = { version = "1.21.0", features = ["full"] }
|
||||
async_zip = { git = "https://github.com/Majored/rs-async-zip" }
|
||||
tokio-stream = { version = "0.1.9", features = ["fs"] }
|
||||
futures = "0.3.24"
|
||||
|
||||
[profile.release]
|
||||
opt-level = 3
|
||||
|
7
recode.ps1
Normal file
7
recode.ps1
Normal file
@ -0,0 +1,7 @@
|
||||
mkdir .\baza_utf8
|
||||
Get-ChildItem baza | ForEach-Object -Process {
|
||||
$n = $_.Name
|
||||
get-content $_ -Encoding KOI8-R | Out-File ".\baza_utf8\$n" -Encoding utf8
|
||||
}
|
||||
|
||||
# $c=0; $t=$(ls .\baza).Count; while ($t -gt $c) { $c = $(ls .\baza_utf8).Count ; $r = ($t-$c) ; echo $r; sleep 2 }
|
279
src/main.rs
279
src/main.rs
@ -1,18 +1,16 @@
|
||||
extern crate encoding;
|
||||
extern crate async_zip;
|
||||
extern crate futures;
|
||||
extern crate json;
|
||||
extern crate rayon;
|
||||
extern crate textstream;
|
||||
extern crate zip;
|
||||
extern crate tokio;
|
||||
|
||||
use encoding::all::KOI8_R;
|
||||
use encoding::DecoderTrap;
|
||||
use rayon::prelude::*;
|
||||
use async_zip::read::fs::ZipFileReader;
|
||||
use futures::future;
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
use std::{fs, io};
|
||||
use textstream::TextReader;
|
||||
use tokio::fs;
|
||||
use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader};
|
||||
|
||||
const BASE_FILENAME: &str = "baza.zip";
|
||||
const BASE_FILENAME: &str = "baza_utf8.zip";
|
||||
const OUTPUT_PATH: &str = "json";
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
@ -109,9 +107,11 @@ impl FromStr for KeywordType {
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_file(file: impl io::Read) -> Result<json::JsonValue, Box<dyn std::error::Error>> {
|
||||
let buf = io::BufReader::new(file);
|
||||
let reader = TextReader::new(buf, KOI8_R, DecoderTrap::Ignore);
|
||||
async fn parse_file(
|
||||
entry_reader: impl AsyncReadExt + Unpin,
|
||||
) -> Result<json::JsonValue, Box<dyn std::error::Error>> {
|
||||
let buf_reader = BufReader::new(entry_reader);
|
||||
let mut lines = buf_reader.lines();
|
||||
|
||||
let patterns = vec![
|
||||
"Чемпионат:",
|
||||
@ -141,72 +141,74 @@ fn parse_file(file: impl io::Read) -> Result<json::JsonValue, Box<dyn std::error
|
||||
let mut context = Context::new();
|
||||
let mut ctx = &mut context;
|
||||
|
||||
reader
|
||||
.lines()
|
||||
.map(|line| String::from(line.unwrap().trim()))
|
||||
.filter(|line| !line.is_empty()) // ignore empty lines
|
||||
.for_each(|line| {
|
||||
match patterns
|
||||
.iter() // find keyword
|
||||
.find(|&&pattern| line.starts_with(pattern) && line.ends_with(':'))
|
||||
{
|
||||
Some(pattern) => {
|
||||
use KeywordType::*;
|
||||
while let Some(line_r) = lines.next_line().await? {
|
||||
let line = line_r.trim();
|
||||
if line.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let line = line.to_string();
|
||||
|
||||
ctx.last_keyword_type = ctx.cur_keyword_type;
|
||||
ctx.last_tag = ctx.cur_tag.clone();
|
||||
ctx.cur_keyword_type = Some(pattern.parse().unwrap());
|
||||
ctx.cur_tag = pattern.replace(' ', "").replace(':', "");
|
||||
match patterns
|
||||
.iter() // find keyword
|
||||
.find(|&&pattern| line.starts_with(pattern) && line.ends_with(':'))
|
||||
{
|
||||
Some(pattern) => {
|
||||
use KeywordType::*;
|
||||
|
||||
// remember question id
|
||||
if let Some(QuestionStart) = ctx.cur_keyword_type {
|
||||
ctx.cur_question_pre["id"] = line.replace(':', "").as_str().into();
|
||||
};
|
||||
ctx.last_keyword_type = ctx.cur_keyword_type;
|
||||
ctx.last_tag = ctx.cur_tag.clone();
|
||||
ctx.cur_keyword_type = Some(pattern.parse().unwrap());
|
||||
ctx.cur_tag = pattern.replace(' ', "").replace(':', "");
|
||||
|
||||
// apply accumulated content when new keyword found
|
||||
match ctx.last_keyword_type {
|
||||
Some(Global) => {
|
||||
ctx.cur_scope = DataScope::Global;
|
||||
ctx.data[&ctx.last_tag] = ctx.cur_content.join("\n").into()
|
||||
// remember question id
|
||||
if let Some(QuestionStart) = ctx.cur_keyword_type {
|
||||
ctx.cur_question_pre["id"] = line.replace(':', "").as_str().into();
|
||||
};
|
||||
|
||||
// apply accumulated content when new keyword found
|
||||
match ctx.last_keyword_type {
|
||||
Some(Global) => {
|
||||
ctx.cur_scope = DataScope::Global;
|
||||
ctx.data[&ctx.last_tag] = ctx.cur_content.join("\n").into()
|
||||
}
|
||||
Some(QuestionPre) => {
|
||||
ctx.cur_scope = DataScope::QuestionPre;
|
||||
ctx.cur_question_pre[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||
}
|
||||
Some(QuestionStart) => {
|
||||
ctx.cur_scope = DataScope::QuestionContent;
|
||||
// store prev question before reading new
|
||||
if ctx.have_new_question {
|
||||
ctx.questions.push_if_valid(ctx.cur_question.clone());
|
||||
}
|
||||
Some(QuestionPre) => {
|
||||
ctx.cur_scope = DataScope::QuestionPre;
|
||||
ctx.cur_question_pre[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||
}
|
||||
Some(QuestionStart) => {
|
||||
ctx.cur_scope = DataScope::QuestionContent;
|
||||
// store prev question before reading new
|
||||
if ctx.have_new_question {
|
||||
ctx.questions.push_if_valid(ctx.cur_question.clone());
|
||||
}
|
||||
// prepare to read new question data with cur_question_pre values
|
||||
ctx.cur_question = ctx.cur_question_pre.clone();
|
||||
ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||
ctx.have_new_question = true;
|
||||
}
|
||||
Some(QuestionContent) => {
|
||||
ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||
}
|
||||
Some(CurrentScope) => {
|
||||
// match value to store data
|
||||
let scope_data = match ctx.cur_scope {
|
||||
DataScope::Global => &mut ctx.data,
|
||||
DataScope::QuestionPre => &mut ctx.cur_question_pre,
|
||||
DataScope::QuestionContent => &mut ctx.cur_question,
|
||||
};
|
||||
scope_data[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||
}
|
||||
_ => (), //None or Ignore
|
||||
};
|
||||
// clear content
|
||||
ctx.cur_content.clear();
|
||||
}
|
||||
None => {
|
||||
// accumulate content if line is not a keyword
|
||||
ctx.cur_content.push(line);
|
||||
}
|
||||
// prepare to read new question data with cur_question_pre values
|
||||
ctx.cur_question = ctx.cur_question_pre.clone();
|
||||
ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||
ctx.have_new_question = true;
|
||||
}
|
||||
Some(QuestionContent) => {
|
||||
ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||
}
|
||||
Some(CurrentScope) => {
|
||||
// match value to store data
|
||||
let scope_data = match ctx.cur_scope {
|
||||
DataScope::Global => &mut ctx.data,
|
||||
DataScope::QuestionPre => &mut ctx.cur_question_pre,
|
||||
DataScope::QuestionContent => &mut ctx.cur_question,
|
||||
};
|
||||
scope_data[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||
}
|
||||
_ => (), //None or Ignore
|
||||
};
|
||||
// clear content
|
||||
ctx.cur_content.clear();
|
||||
}
|
||||
});
|
||||
None => {
|
||||
// accumulate content if line is not a keyword
|
||||
ctx.cur_content.push(line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// finish reading last question
|
||||
if ctx.have_new_question && !ctx.cur_content.is_empty() {
|
||||
@ -219,106 +221,61 @@ fn parse_file(file: impl io::Read) -> Result<json::JsonValue, Box<dyn std::error
|
||||
Ok(ctx.data.clone())
|
||||
}
|
||||
|
||||
// split vector to a vector of [num] slices
|
||||
trait SplitTo<T> {
|
||||
fn split_to(&self, num: usize) -> Vec<&[T]>;
|
||||
async fn process_file(index: usize, name: String) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let archive = ZipFileReader::new(String::from(BASE_FILENAME)).await?;
|
||||
let entry_reader = archive.entry_reader(index).await?;
|
||||
|
||||
// make output filename
|
||||
let mut outfilename = PathBuf::from(OUTPUT_PATH);
|
||||
outfilename.push(name);
|
||||
outfilename.set_extension("json");
|
||||
|
||||
// save json to file
|
||||
let new_data = parse_file(entry_reader).await?;
|
||||
let data_str = new_data.pretty(2);
|
||||
|
||||
let mut outfile = fs::File::create(outfilename).await?;
|
||||
outfile.write_all(data_str.as_bytes()).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
impl<T> SplitTo<T> for Vec<T> {
|
||||
fn split_to(&self, num: usize) -> Vec<&[T]> {
|
||||
let part_len = self.len() / num;
|
||||
let add_len = self.len() % num;
|
||||
let mut result = Vec::<&[T]>::with_capacity(num);
|
||||
|
||||
if 0 == part_len {
|
||||
result.push(self);
|
||||
return result;
|
||||
}
|
||||
for i in 0..num {
|
||||
let size = if (num - 1) == i {
|
||||
part_len + add_len
|
||||
} else {
|
||||
part_len
|
||||
};
|
||||
let start = part_len * i;
|
||||
result.push(&self[start..(start + size)]);
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
fn process_files(files: &&[PathBuf]) {
|
||||
if files.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let start_file = files[0].to_str().unwrap();
|
||||
println!("-> start from \"{}\" ({} files)", start_file, files.len());
|
||||
|
||||
let zip_file = fs::File::open(BASE_FILENAME).unwrap();
|
||||
let zip_reader = io::BufReader::new(zip_file);
|
||||
let mut archive = zip::ZipArchive::new(zip_reader).unwrap();
|
||||
|
||||
files.iter().for_each(|name| {
|
||||
let name_str = name.to_str().unwrap();
|
||||
|
||||
// parse txt file
|
||||
let file = archive.by_name(name_str).unwrap();
|
||||
let data = parse_file(file).unwrap();
|
||||
|
||||
// make output filename
|
||||
let mut outfilename = PathBuf::from(OUTPUT_PATH);
|
||||
outfilename.push(name);
|
||||
outfilename.set_extension("json");
|
||||
|
||||
// save json to file
|
||||
let mut outfile = fs::File::create(outfilename).unwrap();
|
||||
data.write_pretty(&mut outfile, 1).unwrap();
|
||||
});
|
||||
|
||||
println!("<- done {} files (from \"{}\")", files.len(), start_file);
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// open archive just to list files
|
||||
let zip_file = fs::File::open(BASE_FILENAME)?;
|
||||
let zip_reader = io::BufReader::new(zip_file);
|
||||
let mut archive = zip::ZipArchive::new(zip_reader)?;
|
||||
let archive = ZipFileReader::new(String::from(BASE_FILENAME)).await?;
|
||||
|
||||
let source_files: Vec<PathBuf> = (0..archive.len())
|
||||
.map(|i| archive.by_index(i).unwrap().mangled_name())
|
||||
.filter(|name| {
|
||||
let source_files: Vec<(usize, String)> = archive
|
||||
.entries()
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|item| !item.1.dir())
|
||||
.filter(|item| {
|
||||
// skip files without "txt" extension
|
||||
match name.extension() {
|
||||
Some(ext) => match ext.to_str() {
|
||||
Some(ext_str) => ext_str.eq_ignore_ascii_case("txt"),
|
||||
_ => false, // extension is not valid unicode or not txt
|
||||
},
|
||||
_ => false, // no extension in filename
|
||||
}
|
||||
item.1.name().ends_with(".txt")
|
||||
})
|
||||
.map(|item| (item.0, item.1.name().to_string()))
|
||||
.collect();
|
||||
drop(archive);
|
||||
|
||||
// check output directory
|
||||
let out_dir: PathBuf = OUTPUT_PATH.into();
|
||||
if out_dir.is_file() {
|
||||
return Err("output directory is file!".into());
|
||||
} else if !out_dir.exists() {
|
||||
fs::create_dir_all(out_dir)?;
|
||||
match fs::metadata(OUTPUT_PATH).await {
|
||||
Err(_) => fs::create_dir_all(OUTPUT_PATH).await?,
|
||||
Ok(x) if x.is_file() => return Err("output directory is file!".into()),
|
||||
_ => (),
|
||||
};
|
||||
|
||||
println!(
|
||||
"processing {} files with {} threads...",
|
||||
source_files.len(),
|
||||
rayon::current_num_threads()
|
||||
);
|
||||
println!("processing {} files ...", source_files.len());
|
||||
|
||||
// split vector and process its parts in parallel
|
||||
source_files
|
||||
.split_to(rayon::current_num_threads())
|
||||
.par_iter()
|
||||
.for_each(process_files);
|
||||
let mut handles = vec![];
|
||||
for index in source_files {
|
||||
let i = index.clone();
|
||||
let handle = tokio::spawn(async move {
|
||||
process_file(i.0, i.1).await.unwrap();
|
||||
});
|
||||
handles.push(handle);
|
||||
}
|
||||
|
||||
future::join_all(handles).await;
|
||||
|
||||
println!("done");
|
||||
Ok(())
|
||||
|
Loading…
Reference in New Issue
Block a user