Compare commits
No commits in common. "master" and "parallel" have entirely different histories.
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,5 +1,4 @@
|
||||
/target
|
||||
**/*.rs.bk
|
||||
baza*.zip
|
||||
baza.zip
|
||||
/json/
|
||||
json.zip
|
||||
|
738
Cargo.lock
generated
738
Cargo.lock
generated
@ -3,31 +3,21 @@
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "async-compression"
|
||||
version = "0.4.1"
|
||||
name = "adler"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62b74f44609f0f91493e3082d3734d98497e094777144380ea4db9f9905dd5b6"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-io",
|
||||
"memchr",
|
||||
"pin-project-lite",
|
||||
"zstd",
|
||||
"zstd-safe",
|
||||
]
|
||||
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
|
||||
|
||||
[[package]]
|
||||
name = "async_zip"
|
||||
version = "0.0.15"
|
||||
source = "git+https://github.com/Majored/rs-async-zip?rev=ff0d985#ff0d985ef54cf00d73c497dbca0beea7541e37dc"
|
||||
name = "aes"
|
||||
version = "0.7.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9e8b47f52ea9bae42228d07ec09eb676433d7c4ed1ebdf0f1d1c29ed446f1ab8"
|
||||
dependencies = [
|
||||
"async-compression",
|
||||
"crc32fast",
|
||||
"futures-util",
|
||||
"pin-project",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"cfg-if",
|
||||
"cipher",
|
||||
"cpufeatures",
|
||||
"opaque-debug",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -37,10 +27,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.3.2"
|
||||
name = "base64ct"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
||||
checksum = "8a32fd6af2b5827bce66c29053ba0e7c42b9dcab01835835058558c10851a46b"
|
||||
|
||||
[[package]]
|
||||
name = "block-buffer"
|
||||
version = "0.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0bf7fe51849ea569fd452f37822f606a5cabb684dc918707a0193fd4664ff324"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
@ -49,26 +48,31 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
|
||||
|
||||
[[package]]
|
||||
name = "bytes"
|
||||
version = "0.4.12"
|
||||
name = "bzip2"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "206fdffcfa2df7cbe15601ef46c813fce0965eb3286db6b56c583b814b51c81c"
|
||||
checksum = "6afcd980b5f3a45017c57e57a2fcccbb351cc43a356ce117ef760ef8052b89b0"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"iovec",
|
||||
"bzip2-sys",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bytes"
|
||||
version = "1.2.1"
|
||||
name = "bzip2-sys"
|
||||
version = "0.1.11+1.0.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec8a7b6a70fde80372154c65702f00a0f56f3e1c36abbc6c440484be248856db"
|
||||
checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.79"
|
||||
version = "1.0.73"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
|
||||
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
|
||||
dependencies = [
|
||||
"jobserver",
|
||||
]
|
||||
@ -83,10 +87,35 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
name = "chgk_txt2json"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"async_zip",
|
||||
"futures-util",
|
||||
"encoding",
|
||||
"json",
|
||||
"tokio",
|
||||
"rayon",
|
||||
"textstream",
|
||||
"zip",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cipher"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7ee52072ec15386f770805afd189a01c8841be8696bed250fa2f13c4c0d6dfb7"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "constant_time_eq"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
|
||||
|
||||
[[package]]
|
||||
name = "cpufeatures"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59a6001667ab124aebae2a495118e11d30984c3a653e99d86d58971708cf5e4b"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -99,61 +128,159 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures"
|
||||
version = "0.1.31"
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a471a38ef8ed83cd6e40aa59c1ffe17db6855c18e3604d9c4ed8c08ebc28678"
|
||||
|
||||
[[package]]
|
||||
name = "futures-core"
|
||||
version = "0.3.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c"
|
||||
|
||||
[[package]]
|
||||
name = "futures-io"
|
||||
version = "0.3.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964"
|
||||
|
||||
[[package]]
|
||||
name = "futures-macro"
|
||||
version = "0.3.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
|
||||
checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.28",
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-sink"
|
||||
version = "0.3.28"
|
||||
name = "crossbeam-deque"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e"
|
||||
|
||||
[[package]]
|
||||
name = "futures-task"
|
||||
version = "0.3.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65"
|
||||
|
||||
[[package]]
|
||||
name = "futures-util"
|
||||
version = "0.3.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533"
|
||||
checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-io",
|
||||
"futures-macro",
|
||||
"futures-task",
|
||||
"memchr",
|
||||
"pin-project-lite",
|
||||
"pin-utils",
|
||||
"slab",
|
||||
"tokio-io",
|
||||
"cfg-if",
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-epoch"
|
||||
version = "0.9.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "045ebe27666471bb549370b4b0b3e51b07f56325befa4284db65fc89c02511b1"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
"memoffset",
|
||||
"once_cell",
|
||||
"scopeguard",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crypto-common"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
"typenum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.10.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f2fb860ca6fafa5552fb6d0e816a69c8e49f0908bf524e30a90d97c85892d506"
|
||||
dependencies = [
|
||||
"block-buffer",
|
||||
"crypto-common",
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f107b87b6afc2a64fd13cac55fe06d6c8859f12d4b14cbcdd2c67d0976781be"
|
||||
|
||||
[[package]]
|
||||
name = "encoding"
|
||||
version = "0.2.33"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec"
|
||||
dependencies = [
|
||||
"encoding-index-japanese",
|
||||
"encoding-index-korean",
|
||||
"encoding-index-simpchinese",
|
||||
"encoding-index-singlebyte",
|
||||
"encoding-index-tradchinese",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-japanese"
|
||||
version = "1.20141219.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91"
|
||||
dependencies = [
|
||||
"encoding_index_tests",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-korean"
|
||||
version = "1.20141219.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81"
|
||||
dependencies = [
|
||||
"encoding_index_tests",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-simpchinese"
|
||||
version = "1.20141219.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7"
|
||||
dependencies = [
|
||||
"encoding_index_tests",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-singlebyte"
|
||||
version = "1.20141219.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a"
|
||||
dependencies = [
|
||||
"encoding_index_tests",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-tradchinese"
|
||||
version = "1.20141219.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18"
|
||||
dependencies = [
|
||||
"encoding_index_tests",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_index_tests"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569"
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.0.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f82b0f4c27ad9f8bfd1f3208d882da2b09c301bc1c828fd3a00d0216d2fbbff6"
|
||||
dependencies = [
|
||||
"crc32fast",
|
||||
"miniz_oxide",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "generic-array"
|
||||
version = "0.14.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9"
|
||||
dependencies = [
|
||||
"typenum",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -166,19 +293,25 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iovec"
|
||||
version = "0.1.4"
|
||||
name = "hmac"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b2b3ea6ff95e175473f8ffe6a7eb7c00d054240321b84c57051175fe3c1e075e"
|
||||
checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jobserver"
|
||||
version = "0.1.26"
|
||||
name = "itoa"
|
||||
version = "1.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2"
|
||||
checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754"
|
||||
|
||||
[[package]]
|
||||
name = "jobserver"
|
||||
version = "0.1.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
@ -191,45 +324,35 @@ checksum = "078e285eafdfb6c4b434e0d31e8cfcb5115b651496faca5749b88fafd4f23bfd"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.132"
|
||||
version = "0.2.131"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5"
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.4.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9f80bf5aacaf25cbfc8210d1cfb718f2bf3b11c4c54e5afe36c236853a8ec390"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"scopeguard",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
checksum = "04c3b4822ccebfa39c02fc03d1534441b22ead323fa0f48bb7ddd8e6ba076a40"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.5.0"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
|
||||
|
||||
[[package]]
|
||||
name = "mio"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "57ee1c23c7c63b0c9250c339ffdc69255f110b298b901b9f6c82547b7b87caaf"
|
||||
checksum = "148fab2e51b4f1cfc66da2a7c32981d1d3c083a803978268bb11fe4b86925e7a"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
"wasi",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memoffset"
|
||||
version = "0.6.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6f5c75688da582b8ffc1f1799e9db273f32133c49e048f614d22ec3256773ccc"
|
||||
dependencies = [
|
||||
"adler",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -243,97 +366,83 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.14.0"
|
||||
name = "num_threads"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2f7254b99e31cad77da24b08ebf628882739a608578bb1bcdfc1f9c21260d7c0"
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
|
||||
checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44"
|
||||
dependencies = [
|
||||
"lock_api",
|
||||
"parking_lot_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot_core"
|
||||
version = "0.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"redox_syscall",
|
||||
"smallvec",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project"
|
||||
version = "1.1.2"
|
||||
name = "once_cell"
|
||||
version = "1.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "030ad2bc4db10a8944cb0d837f158bdfec4d4a4873ab701a95046770d11f8842"
|
||||
checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1"
|
||||
|
||||
[[package]]
|
||||
name = "opaque-debug"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5"
|
||||
|
||||
[[package]]
|
||||
name = "password-hash"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1d791538a6dcc1e7cb7fe6f6b58aca40e7f79403c45b2bc274008b5e647af1d8"
|
||||
dependencies = [
|
||||
"pin-project-internal",
|
||||
"base64ct",
|
||||
"rand_core",
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project-internal"
|
||||
version = "1.1.2"
|
||||
name = "pbkdf2"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec2e072ecce94ec471b13398d5402c188e76ac03cf74dd1a975161b23a3f6d9c"
|
||||
checksum = "271779f35b581956db91a3e55737327a03aa051e90b1c47aeb189508533adfd7"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.28",
|
||||
"digest",
|
||||
"hmac",
|
||||
"password-hash",
|
||||
"sha2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project-lite"
|
||||
version = "0.2.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116"
|
||||
|
||||
[[package]]
|
||||
name = "pin-utils"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.27"
|
||||
version = "0.3.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
|
||||
checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.66"
|
||||
name = "rand_core"
|
||||
version = "0.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
|
||||
checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
version = "1.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
"autocfg",
|
||||
"crossbeam-deque",
|
||||
"either",
|
||||
"rayon-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.32"
|
||||
name = "rayon-core"
|
||||
version = "1.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965"
|
||||
checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"crossbeam-channel",
|
||||
"crossbeam-deque",
|
||||
"crossbeam-utils",
|
||||
"num_cpus",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -343,229 +452,107 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook-registry"
|
||||
version = "1.4.0"
|
||||
name = "sha1"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0"
|
||||
checksum = "c77f4e7f65455545c2153c1253d25056825e77ee2533f0e41deb65a93a34852f"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "slab"
|
||||
version = "0.4.8"
|
||||
name = "sha2"
|
||||
version = "0.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d"
|
||||
checksum = "55deaec60f81eefe3cce0dc50bda92d6d8e88f2a27df7c5033b42afeb1ed2676"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "1.9.0"
|
||||
name = "subtle"
|
||||
version = "2.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1"
|
||||
checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601"
|
||||
|
||||
[[package]]
|
||||
name = "socket2"
|
||||
version = "0.4.7"
|
||||
name = "textstream"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd"
|
||||
checksum = "e7ed81b342f6566026755e7f4b7798810b1c159722e427d212ce72c2c58ffdaa"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.99"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "58dbef6ec655055e20b86b15a8cc6d439cca19b667537ac6a1369572d151ab13"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.34"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c1b05ca9d106ba7d2e31a9dab4a64e7be2cce415321966ea3132c49a656e252"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.34"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e8f2591983642de85c921015f3f070c665a197ed69e417af436115e3a1407487"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.99",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio"
|
||||
version = "1.21.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "89797afd69d206ccd11fb0ea560a44bbb87731d020670e79416d442919257d42"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"bytes 1.2.1",
|
||||
"libc",
|
||||
"encoding",
|
||||
"memchr",
|
||||
"mio",
|
||||
"num_cpus",
|
||||
"once_cell",
|
||||
"parking_lot",
|
||||
"pin-project-lite",
|
||||
"signal-hook-registry",
|
||||
"socket2",
|
||||
"tokio-macros",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-io"
|
||||
version = "0.1.13"
|
||||
name = "time"
|
||||
version = "0.3.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "57fc868aae093479e3131e3d165c93b1c7474109d13c90ec0dda2a1bbfff0674"
|
||||
checksum = "db76ff9fa4b1458b3c7f077f3ff9887394058460d21e634355b273aaf11eea45"
|
||||
dependencies = [
|
||||
"bytes 0.4.12",
|
||||
"futures",
|
||||
"log",
|
||||
"itoa",
|
||||
"libc",
|
||||
"num_threads",
|
||||
"time-macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-macros"
|
||||
version = "1.8.0"
|
||||
name = "time-macros"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9724f9a975fb987ef7a3cd9be0350edcbe130698af5b8f7a631e23d42d052484"
|
||||
checksum = "42657b1a6f4d817cda8e7a0ace261fe0cc946cf3a80314390b22cc61ae080792"
|
||||
|
||||
[[package]]
|
||||
name = "typenum"
|
||||
version = "1.15.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||
|
||||
[[package]]
|
||||
name = "zip"
|
||||
version = "0.6.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf225bcf73bb52cbb496e70475c7bd7a3f769df699c0020f6c7bd9a96dcf0b8d"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.99",
|
||||
"aes",
|
||||
"byteorder",
|
||||
"bzip2",
|
||||
"constant_time_eq",
|
||||
"crc32fast",
|
||||
"crossbeam-utils",
|
||||
"flate2",
|
||||
"hmac",
|
||||
"pbkdf2",
|
||||
"sha1",
|
||||
"time",
|
||||
"zstd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-util"
|
||||
version = "0.7.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7e267c18a719545b481171952a79f8c25c80361463ba44bc7fa9eba7c742ef4f"
|
||||
dependencies = [
|
||||
"bytes 1.2.1",
|
||||
"futures-core",
|
||||
"futures-io",
|
||||
"futures-sink",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.11.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu",
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.36.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2"
|
||||
dependencies = [
|
||||
"windows_aarch64_msvc",
|
||||
"windows_i686_gnu",
|
||||
"windows_i686_msvc",
|
||||
"windows_x86_64_gnu",
|
||||
"windows_x86_64_msvc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.36.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.36.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.36.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.36.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.36.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"
|
||||
|
||||
[[package]]
|
||||
name = "zstd"
|
||||
version = "0.12.4"
|
||||
version = "0.10.2+zstd.1.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c"
|
||||
checksum = "5f4a6bd64f22b5e3e94b4e238669ff9f10815c27a5180108b849d24174a83847"
|
||||
dependencies = [
|
||||
"zstd-safe",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd-safe"
|
||||
version = "6.0.6"
|
||||
version = "4.1.6+zstd.1.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581"
|
||||
checksum = "94b61c51bb270702d6167b8ce67340d2754b088d0c091b06e593aa772c3ee9bb"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"zstd-sys",
|
||||
@ -573,11 +560,10 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "zstd-sys"
|
||||
version = "2.0.8+zstd.1.5.5"
|
||||
version = "1.6.3+zstd.1.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c"
|
||||
checksum = "fc49afa5c8d634e75761feda8c592051e7eeb4683ba827211eb0d731d3402ea8"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"pkg-config",
|
||||
]
|
||||
|
13
Cargo.toml
13
Cargo.toml
@ -7,14 +7,11 @@ edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
json = "0.12"
|
||||
tokio = { version = "1.21.0", features = ["full"] }
|
||||
async_zip = { git = "https://github.com/Majored/rs-async-zip", rev = "ff0d985", features = [
|
||||
"zstd",
|
||||
"tokio",
|
||||
"tokio-fs",
|
||||
] }
|
||||
futures-util = { version = "0.3.28", features = ["io", "tokio-io"] }
|
||||
zip = "0.6"
|
||||
encoding = "0.2"
|
||||
textstream = "0.1"
|
||||
json="0.12"
|
||||
rayon="1.5"
|
||||
|
||||
[profile.release]
|
||||
opt-level = 3
|
||||
|
21
LICENSE
21
LICENSE
@ -1,21 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2022 Dmitry Belyaev
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
26
README.md
26
README.md
@ -1,26 +0,0 @@
|
||||
# chgk_txt2json
|
||||
|
||||
Конвертер **текстовых** файлов с вопросами в **JSON** формат.
|
||||
Исходные файлы вопросов должны находиться в ZIP файле `baza_utf8.zip`, результат будет записан в файл `json.zip`.
|
||||
|
||||
## Особенности
|
||||
|
||||
### Кодировка исходных файлов
|
||||
|
||||
В оригинальных файлах базы используется кодировка `KOI8-R`.
|
||||
Но из за того что [tokio::AsyncBufReadExt::lines](https://docs.rs/tokio/latest/tokio/io/trait.AsyncBufReadExt.html#method.lines) работает только с `UTF-8`, эта утилита работает с файлами которые уже в кодировке `UTF-8`.
|
||||
|
||||
### Алгоритм сжатия выходного архива
|
||||
|
||||
Для сжатия файлов в выходном архиве используется метод `Zstandard`, т.к. он достаточно быстр и по степени сжатия сопоставим с обычным `Deflate`.
|
||||
|
||||
Но для просмотра содержимого таких ZIP файлов нужно использовать ПО c поддержкой `Zstandard`, например:
|
||||
|
||||
- [Modern7z](https://www.tc4shell.com/ru/7zip/modern7z)
|
||||
- [7-Zip-zstd](https://github.com/mcmilk/7-Zip-zstd)
|
||||
|
||||
## Ссылки
|
||||
|
||||
- Исходная база вопросов: http://db.chgk.info
|
||||
- Копия файлов базы вопросов: https://gitlab.com/b4tman/db_chgk
|
||||
- Утилита конвертации кодировки текстовых файлов в ZIP архиве: https://gitea.b4tman.ru/temp/ztb_recode
|
536
src/main.rs
536
src/main.rs
@ -1,20 +1,19 @@
|
||||
extern crate async_zip;
|
||||
extern crate encoding;
|
||||
extern crate json;
|
||||
extern crate tokio;
|
||||
extern crate rayon;
|
||||
extern crate textstream;
|
||||
extern crate zip;
|
||||
|
||||
use async_zip::tokio::read::seek::ZipFileReader;
|
||||
use async_zip::tokio::write::ZipFileWriter;
|
||||
use async_zip::{Compression, ZipEntryBuilder};
|
||||
use futures_util::io::{AsyncBufReadExt, BufReader};
|
||||
use futures_util::stream::StreamExt;
|
||||
use encoding::all::KOI8_R;
|
||||
use encoding::DecoderTrap;
|
||||
use rayon::prelude::*;
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
use tokio::fs;
|
||||
use tokio::sync::mpsc::{self, UnboundedReceiver, UnboundedSender};
|
||||
use std::{fs, io};
|
||||
use textstream::TextReader;
|
||||
|
||||
const INPUT_FILENAME: &str = "baza_utf8.zip";
|
||||
const OUTPUT_FILENAME: &str = "json.zip";
|
||||
const OUTPUT_COMPRESSION: Compression = Compression::Zstd;
|
||||
const BASE_FILENAME: &str = "baza.zip";
|
||||
const OUTPUT_PATH: &str = "json";
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
enum KeywordType {
|
||||
@ -26,24 +25,6 @@ enum KeywordType {
|
||||
CurrentScope,
|
||||
}
|
||||
|
||||
impl FromStr for KeywordType {
|
||||
type Err = ();
|
||||
|
||||
fn from_str(pattern: &str) -> Result<Self, Self::Err> {
|
||||
use KeywordType::*;
|
||||
Ok(match pattern {
|
||||
"Мета:" => Ignore,
|
||||
"Чемпионат:" | "Пакет:" => Global,
|
||||
"Тур:" => QuestionPre,
|
||||
"Вопрос " | "Вопрос:" => QuestionStart,
|
||||
"Ответ:" | "Зачет:" => QuestionContent,
|
||||
_ => CurrentScope,
|
||||
// "URL:" | "Ссылка:" | "Дата:" | "Обработан:" | "Автор:" | "Редактор:" | "Копирайт:" | "Инфо:" |
|
||||
// "Тема:" | "Вид:" | "Тип:" | "Источник:" | "Рейтинг:" | "Комментарий:" | "Комментарии:"
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
enum DataScope {
|
||||
Global,
|
||||
@ -51,7 +32,7 @@ enum DataScope {
|
||||
QuestionContent,
|
||||
}
|
||||
|
||||
struct QuestionsParser {
|
||||
struct Context {
|
||||
// global output value
|
||||
data: json::JsonValue,
|
||||
// temp questions array
|
||||
@ -75,9 +56,64 @@ struct QuestionsParser {
|
||||
last_tag: String,
|
||||
}
|
||||
|
||||
/// Text questions parser
|
||||
impl QuestionsParser {
|
||||
const PATTERNS: &'static [&'static str] = &[
|
||||
// check questions before push
|
||||
trait PushIfValid {
|
||||
fn is_valid(&self) -> bool;
|
||||
fn push_if_valid(&mut self, value: json::JsonValue);
|
||||
}
|
||||
|
||||
impl PushIfValid for json::JsonValue {
|
||||
fn is_valid(&self) -> bool {
|
||||
self.has_key("Вопрос") && self.has_key("Ответ")
|
||||
}
|
||||
fn push_if_valid(&mut self, value: json::JsonValue) {
|
||||
if value.is_valid() {
|
||||
self.push(value).unwrap_or(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Context {
|
||||
fn new() -> Context {
|
||||
Context {
|
||||
data: json::JsonValue::new_object(),
|
||||
questions: json::JsonValue::new_array(),
|
||||
cur_keyword_type: None,
|
||||
cur_question: json::JsonValue::new_object(),
|
||||
cur_question_pre: json::JsonValue::new_object(),
|
||||
cur_tag: String::new(),
|
||||
cur_content: Vec::<String>::new(),
|
||||
cur_scope: DataScope::Global,
|
||||
have_new_question: false,
|
||||
last_keyword_type: None,
|
||||
last_tag: String::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for KeywordType {
|
||||
type Err = ();
|
||||
|
||||
fn from_str(pattern: &str) -> Result<Self, Self::Err> {
|
||||
use KeywordType::*;
|
||||
Ok(match pattern {
|
||||
"Мета:" => Ignore,
|
||||
"Чемпионат:" | "Пакет:" => Global,
|
||||
"Тур:" => QuestionPre,
|
||||
"Вопрос " | "Вопрос:" => QuestionStart,
|
||||
"Ответ:" | "Зачет:" => QuestionContent,
|
||||
_ => CurrentScope,
|
||||
// "URL:" | "Ссылка:" | "Дата:" | "Обработан:" | "Автор:" | "Редактор:" | "Копирайт:" | "Инфо:" |
|
||||
// "Тема:" | "Вид:" | "Тип:" | "Источник:" | "Рейтинг:" | "Комментарий:" | "Комментарии:"
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_file(file: impl io::Read) -> Result<json::JsonValue, Box<dyn std::error::Error>> {
|
||||
let buf = io::BufReader::new(file);
|
||||
let reader = TextReader::new(buf, KOI8_R, DecoderTrap::Ignore);
|
||||
|
||||
let patterns = vec![
|
||||
"Чемпионат:",
|
||||
"Пакет:",
|
||||
"URL:",
|
||||
@ -102,276 +138,188 @@ impl QuestionsParser {
|
||||
"Комментарий:",
|
||||
"Комментарии:",
|
||||
];
|
||||
let mut context = Context::new();
|
||||
let mut ctx = &mut context;
|
||||
|
||||
/// create new parser
|
||||
pub fn new() -> QuestionsParser {
|
||||
QuestionsParser {
|
||||
data: json::JsonValue::new_object(),
|
||||
questions: json::JsonValue::new_array(),
|
||||
cur_keyword_type: None,
|
||||
cur_question: json::JsonValue::new_object(),
|
||||
cur_question_pre: json::JsonValue::new_object(),
|
||||
cur_tag: String::new(),
|
||||
cur_content: Vec::<String>::new(),
|
||||
cur_scope: DataScope::Global,
|
||||
have_new_question: false,
|
||||
last_keyword_type: None,
|
||||
last_tag: String::new(),
|
||||
}
|
||||
}
|
||||
/// join current content lines
|
||||
fn get_current_content(&self) -> String {
|
||||
self.cur_content.join("\n")
|
||||
}
|
||||
/// clear current content
|
||||
fn clear_current_content(&mut self) {
|
||||
self.cur_content.clear()
|
||||
}
|
||||
/// add new line to current content
|
||||
fn append_to_current_content(&mut self, line: String) {
|
||||
self.cur_content.push(line);
|
||||
}
|
||||
/// check current question have required fields
|
||||
fn is_current_question_valid(&self) -> bool {
|
||||
self.cur_question.has_key("Вопрос") && self.cur_question.has_key("Ответ")
|
||||
}
|
||||
/// add current question to parsed array
|
||||
fn add_cur_question(&mut self) {
|
||||
if self.is_current_question_valid() {
|
||||
let current = std::mem::replace(&mut self.cur_question, self.cur_question_pre.clone());
|
||||
self.questions.push(current).unwrap()
|
||||
}
|
||||
}
|
||||
/// set current content to last tag(keyword) to data scope
|
||||
fn apply_content_to(&mut self, scope: DataScope) {
|
||||
let content = self.get_current_content();
|
||||
// match value to store data
|
||||
let scope_data = match scope {
|
||||
DataScope::Global => &mut self.data,
|
||||
DataScope::QuestionPre => &mut self.cur_question_pre,
|
||||
DataScope::QuestionContent => &mut self.cur_question,
|
||||
};
|
||||
scope_data[&self.last_tag] = content.into();
|
||||
self.clear_current_content();
|
||||
}
|
||||
/// set current content to last tag(keyword) to current scope
|
||||
fn apply_content_to_cur_scope(&mut self) {
|
||||
self.apply_content_to(self.cur_scope);
|
||||
}
|
||||
/// set current scope
|
||||
fn set_scope(&mut self, scope: DataScope) {
|
||||
self.cur_scope = scope;
|
||||
}
|
||||
/// set current scope and set current content to last tag(keyword) to data scope
|
||||
fn set_scope_and_apply(&mut self, scope: DataScope) {
|
||||
self.set_scope(scope);
|
||||
self.apply_content_to_cur_scope();
|
||||
}
|
||||
/// add last question (if have) and start collecting new one
|
||||
fn start_new_question(&mut self) {
|
||||
// store prev question before reading new
|
||||
if self.have_new_question {
|
||||
self.add_cur_question();
|
||||
}
|
||||
self.have_new_question = true;
|
||||
}
|
||||
|
||||
/// check last tag(keyword) and set current content to corresponding data scope
|
||||
fn apply_content_for_last_keyword(&mut self) {
|
||||
// apply accumulated content when new keyword found
|
||||
match self.last_keyword_type {
|
||||
Some(KeywordType::Global) => {
|
||||
self.set_scope_and_apply(DataScope::Global);
|
||||
}
|
||||
Some(KeywordType::QuestionPre) => {
|
||||
self.set_scope_and_apply(DataScope::QuestionPre);
|
||||
}
|
||||
Some(KeywordType::QuestionStart) => {
|
||||
self.start_new_question();
|
||||
self.set_scope_and_apply(DataScope::QuestionContent);
|
||||
}
|
||||
Some(KeywordType::QuestionContent) => {
|
||||
self.apply_content_to(DataScope::QuestionContent);
|
||||
}
|
||||
Some(KeywordType::CurrentScope) => {
|
||||
self.apply_content_to_cur_scope();
|
||||
}
|
||||
_ => (), //None or Ignore
|
||||
};
|
||||
}
|
||||
/// set current keyword(tag) and type as last, and set new as current
|
||||
fn set_new_keyword(&mut self, keyword: &str) {
|
||||
self.last_keyword_type =
|
||||
std::mem::replace(&mut self.cur_keyword_type, Some(keyword.parse().unwrap()));
|
||||
self.last_tag = std::mem::replace(
|
||||
&mut self.cur_tag,
|
||||
keyword.trim_end().trim_end_matches(':').to_string(),
|
||||
);
|
||||
}
|
||||
/// if line matched keyword
|
||||
fn on_keyword_match(&mut self, line: &str, keyword: &str) {
|
||||
self.set_new_keyword(keyword);
|
||||
|
||||
// remember question id
|
||||
if let Some(KeywordType::QuestionStart) = self.cur_keyword_type {
|
||||
self.cur_question_pre["id"] = line.trim_end().trim_end_matches(':').into();
|
||||
};
|
||||
|
||||
self.apply_content_for_last_keyword();
|
||||
}
|
||||
|
||||
/// parse next line
|
||||
pub fn parse_line(&mut self, line: &str) {
|
||||
match QuestionsParser::PATTERNS
|
||||
reader
|
||||
.lines()
|
||||
.map(|line| String::from(line.unwrap().trim()))
|
||||
.filter(|line| !line.is_empty()) // ignore empty lines
|
||||
.for_each(|line| {
|
||||
match patterns
|
||||
.iter() // find keyword
|
||||
.find(|&&pattern| line.starts_with(pattern) && line.ends_with(':'))
|
||||
{
|
||||
Some(pattern) => {
|
||||
self.on_keyword_match(line, pattern);
|
||||
}
|
||||
None => {
|
||||
self.append_to_current_content(line.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
use KeywordType::*;
|
||||
|
||||
/// finish parsing
|
||||
pub fn finish(&mut self) {
|
||||
if self.have_new_question && !self.cur_content.is_empty() {
|
||||
self.cur_question[&self.cur_tag] = self.get_current_content().into();
|
||||
self.add_cur_question();
|
||||
self.clear_current_content();
|
||||
self.have_new_question = false;
|
||||
}
|
||||
self.data["Вопросы"] = std::mem::replace(&mut self.questions, json::JsonValue::new_array());
|
||||
}
|
||||
ctx.last_keyword_type = ctx.cur_keyword_type;
|
||||
ctx.last_tag = ctx.cur_tag.clone();
|
||||
ctx.cur_keyword_type = Some(pattern.parse().unwrap());
|
||||
ctx.cur_tag = pattern.replace(' ', "").replace(':', "");
|
||||
|
||||
/// get parsed data
|
||||
pub fn get_parsed(self) -> json::JsonValue {
|
||||
self.data
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct FileText {
|
||||
name: String,
|
||||
text: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum TextReaderMessage {
|
||||
NextLine(String),
|
||||
EndOfFile(String),
|
||||
}
|
||||
|
||||
/// read txt files from zip and convert to json
|
||||
async fn zip_text_reader(tx: UnboundedSender<TextReaderMessage>) {
|
||||
// open archive just to list files
|
||||
let mut file = fs::File::open(INPUT_FILENAME).await.expect("open zip");
|
||||
let archive = ZipFileReader::with_tokio(&mut file)
|
||||
.await
|
||||
.expect("open zip file reader");
|
||||
|
||||
let source_files: Vec<(usize, String)> = archive
|
||||
.file()
|
||||
.entries()
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, entry)| !entry.dir().unwrap_or(true))
|
||||
.map(|(index, entry)| (index, entry.filename().as_str().unwrap().to_string()))
|
||||
.filter(|(_, filename)| filename.ends_with(".txt"))
|
||||
.collect();
|
||||
//
|
||||
drop(archive);
|
||||
let mut file = fs::File::open(INPUT_FILENAME).await.expect("open zip 2");
|
||||
let mut archive = ZipFileReader::with_tokio(&mut file)
|
||||
.await
|
||||
.expect("open zip file reader 2");
|
||||
for (index, name) in source_files {
|
||||
let entry_reader = archive.reader_with_entry(index).await.expect("read entry");
|
||||
let buf_reader = BufReader::new(entry_reader);
|
||||
let lines = buf_reader.lines();
|
||||
tokio::pin!(lines);
|
||||
while let Some(Ok(line)) = lines.next().await {
|
||||
tx.send(TextReaderMessage::NextLine(line))
|
||||
.expect("send line");
|
||||
}
|
||||
tx.send(TextReaderMessage::EndOfFile(name))
|
||||
.expect("send end");
|
||||
}
|
||||
|
||||
println!("read done ✅");
|
||||
}
|
||||
|
||||
/// convert text questions to json format
|
||||
async fn questions_converter(
|
||||
mut rx: UnboundedReceiver<TextReaderMessage>,
|
||||
tx: UnboundedSender<FileText>,
|
||||
) {
|
||||
let mut parser = QuestionsParser::new();
|
||||
while let Some(msg) = rx.recv().await {
|
||||
match msg {
|
||||
TextReaderMessage::NextLine(line) => {
|
||||
let line = line.trim();
|
||||
if line.is_empty() {
|
||||
continue;
|
||||
}
|
||||
parser.parse_line(line);
|
||||
}
|
||||
TextReaderMessage::EndOfFile(name) => {
|
||||
parser.finish();
|
||||
let data_json = parser.get_parsed();
|
||||
let text = data_json.pretty(2);
|
||||
tx.send(FileText { name, text }).expect("send json");
|
||||
parser = QuestionsParser::new();
|
||||
}
|
||||
}
|
||||
}
|
||||
println!("convert done ✅");
|
||||
}
|
||||
|
||||
/// write json data to zip files
|
||||
async fn zip_json_writer(mut rx: UnboundedReceiver<FileText>) {
|
||||
let mut file = fs::File::create(OUTPUT_FILENAME)
|
||||
.await
|
||||
.expect("create file");
|
||||
let mut writer = ZipFileWriter::with_tokio(&mut file);
|
||||
|
||||
while let Some(FileText { name, text: data }) = rx.recv().await {
|
||||
// make output filename
|
||||
let mut outfilename = PathBuf::from(name);
|
||||
outfilename.set_extension("json");
|
||||
let outfilename = outfilename.to_str().unwrap().to_string();
|
||||
let opts = ZipEntryBuilder::new(outfilename.into(), OUTPUT_COMPRESSION);
|
||||
|
||||
// write new zip entry
|
||||
writer
|
||||
.write_entry_whole(opts, data.as_bytes())
|
||||
.await
|
||||
.expect("write entry");
|
||||
}
|
||||
writer.close().await.expect("close writer");
|
||||
|
||||
println!("write done ✅");
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// check output filename
|
||||
match fs::metadata(OUTPUT_FILENAME).await {
|
||||
Ok(x) if x.is_dir() => return Err("output file is a directory!".into()),
|
||||
_ => (),
|
||||
// remember question id
|
||||
if let Some(QuestionStart) = ctx.cur_keyword_type {
|
||||
ctx.cur_question_pre["id"] = line.replace(':', "").as_str().into();
|
||||
};
|
||||
|
||||
let (reader_tx, reader_rx) = mpsc::unbounded_channel::<TextReaderMessage>();
|
||||
let (json_tx, json_rx) = mpsc::unbounded_channel::<FileText>();
|
||||
// apply accumulated content when new keyword found
|
||||
match ctx.last_keyword_type {
|
||||
Some(Global) => {
|
||||
ctx.cur_scope = DataScope::Global;
|
||||
ctx.data[&ctx.last_tag] = ctx.cur_content.join("\n").into()
|
||||
}
|
||||
Some(QuestionPre) => {
|
||||
ctx.cur_scope = DataScope::QuestionPre;
|
||||
ctx.cur_question_pre[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||
}
|
||||
Some(QuestionStart) => {
|
||||
ctx.cur_scope = DataScope::QuestionContent;
|
||||
// store prev question before reading new
|
||||
if ctx.have_new_question {
|
||||
ctx.questions.push_if_valid(ctx.cur_question.clone());
|
||||
}
|
||||
// prepare to read new question data with cur_question_pre values
|
||||
ctx.cur_question = ctx.cur_question_pre.clone();
|
||||
ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||
ctx.have_new_question = true;
|
||||
}
|
||||
Some(QuestionContent) => {
|
||||
ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||
}
|
||||
Some(CurrentScope) => {
|
||||
// match value to store data
|
||||
let scope_data = match ctx.cur_scope {
|
||||
DataScope::Global => &mut ctx.data,
|
||||
DataScope::QuestionPre => &mut ctx.cur_question_pre,
|
||||
DataScope::QuestionContent => &mut ctx.cur_question,
|
||||
};
|
||||
scope_data[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||
}
|
||||
_ => (), //None or Ignore
|
||||
};
|
||||
// clear content
|
||||
ctx.cur_content.clear();
|
||||
}
|
||||
None => {
|
||||
// accumulate content if line is not a keyword
|
||||
ctx.cur_content.push(line);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
tokio::try_join!(
|
||||
tokio::spawn(zip_text_reader(reader_tx)),
|
||||
tokio::spawn(questions_converter(reader_rx, json_tx)),
|
||||
tokio::spawn(zip_json_writer(json_rx))
|
||||
)?;
|
||||
// finish reading last question
|
||||
if ctx.have_new_question && !ctx.cur_content.is_empty() {
|
||||
ctx.cur_question[&ctx.cur_tag] = ctx.cur_content.join("\n").into();
|
||||
ctx.questions.push_if_valid(ctx.cur_question.clone());
|
||||
ctx.have_new_question = false;
|
||||
}
|
||||
|
||||
println!("all done ✅");
|
||||
ctx.data["Вопросы"] = ctx.questions.clone();
|
||||
Ok(ctx.data.clone())
|
||||
}
|
||||
|
||||
// split vector to a vector of [num] slices
|
||||
trait SplitTo<T> {
|
||||
fn split_to(&self, num: usize) -> Vec<&[T]>;
|
||||
}
|
||||
|
||||
impl<T> SplitTo<T> for Vec<T> {
|
||||
fn split_to(&self, num: usize) -> Vec<&[T]> {
|
||||
let part_len = self.len() / num;
|
||||
let add_len = self.len() % num;
|
||||
let mut result = Vec::<&[T]>::with_capacity(num);
|
||||
|
||||
if 0 == part_len {
|
||||
result.push(self);
|
||||
return result;
|
||||
}
|
||||
for i in 0..num {
|
||||
let size = if (num - 1) == i {
|
||||
part_len + add_len
|
||||
} else {
|
||||
part_len
|
||||
};
|
||||
let start = part_len * i;
|
||||
result.push(&self[start..(start + size)]);
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
fn process_files(files: &&[PathBuf]) {
|
||||
if files.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let start_file = files[0].to_str().unwrap();
|
||||
println!("-> start from \"{}\" ({} files)", start_file, files.len());
|
||||
|
||||
let zip_file = fs::File::open(BASE_FILENAME).unwrap();
|
||||
let zip_reader = io::BufReader::new(zip_file);
|
||||
let mut archive = zip::ZipArchive::new(zip_reader).unwrap();
|
||||
|
||||
files.iter().for_each(|name| {
|
||||
let name_str = name.to_str().unwrap();
|
||||
|
||||
// parse txt file
|
||||
let file = archive.by_name(name_str).unwrap();
|
||||
let data = parse_file(file).unwrap();
|
||||
|
||||
// make output filename
|
||||
let mut outfilename = PathBuf::from(OUTPUT_PATH);
|
||||
outfilename.push(name);
|
||||
outfilename.set_extension("json");
|
||||
|
||||
// save json to file
|
||||
let mut outfile = fs::File::create(outfilename).unwrap();
|
||||
data.write_pretty(&mut outfile, 1).unwrap();
|
||||
});
|
||||
|
||||
println!("<- done {} files (from \"{}\")", files.len(), start_file);
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// open archive just to list files
|
||||
let zip_file = fs::File::open(BASE_FILENAME)?;
|
||||
let zip_reader = io::BufReader::new(zip_file);
|
||||
let mut archive = zip::ZipArchive::new(zip_reader)?;
|
||||
|
||||
let source_files: Vec<PathBuf> = (0..archive.len())
|
||||
.map(|i| archive.by_index(i).unwrap().mangled_name())
|
||||
.filter(|name| {
|
||||
// skip files without "txt" extension
|
||||
match name.extension() {
|
||||
Some(ext) => match ext.to_str() {
|
||||
Some(ext_str) => ext_str.eq_ignore_ascii_case("txt"),
|
||||
_ => false, // extension is not valid unicode or not txt
|
||||
},
|
||||
_ => false, // no extension in filename
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
drop(archive);
|
||||
|
||||
// check output directory
|
||||
let out_dir: PathBuf = OUTPUT_PATH.into();
|
||||
if out_dir.is_file() {
|
||||
return Err("output directory is file!".into());
|
||||
} else if !out_dir.exists() {
|
||||
fs::create_dir_all(out_dir)?;
|
||||
};
|
||||
|
||||
println!(
|
||||
"processing {} files with {} threads...",
|
||||
source_files.len(),
|
||||
rayon::current_num_threads()
|
||||
);
|
||||
|
||||
// split vector and process its parts in parallel
|
||||
source_files
|
||||
.split_to(rayon::current_num_threads())
|
||||
.par_iter()
|
||||
.for_each(process_files);
|
||||
|
||||
println!("done");
|
||||
Ok(())
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user