initial commit

This commit is contained in:
Dmitry Belyaev 2022-09-12 20:50:31 +03:00
commit d44ccdffa3
Signed by: b4tman
GPG Key ID: 41A00BF15EA7E5F3
4 changed files with 604 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
/target
baza*.zip

456
Cargo.lock generated Normal file
View File

@ -0,0 +1,456 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "adler"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "aes"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e8b47f52ea9bae42228d07ec09eb676433d7c4ed1ebdf0f1d1c29ed446f1ab8"
dependencies = [
"cfg-if",
"cipher",
"cpufeatures",
"opaque-debug",
]
[[package]]
name = "base64ct"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a32fd6af2b5827bce66c29053ba0e7c42b9dcab01835835058558c10851a46b"
[[package]]
name = "block-buffer"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69cce20737498f97b993470a6e536b8523f0af7892a4f928cceb1ac5e52ebe7e"
dependencies = [
"generic-array",
]
[[package]]
name = "byteorder"
version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
[[package]]
name = "bzip2"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6afcd980b5f3a45017c57e57a2fcccbb351cc43a356ce117ef760ef8052b89b0"
dependencies = [
"bzip2-sys",
"libc",
]
[[package]]
name = "bzip2-sys"
version = "0.1.11+1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]]
name = "cc"
version = "1.0.73"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
dependencies = [
"jobserver",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "cipher"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ee52072ec15386f770805afd189a01c8841be8696bed250fa2f13c4c0d6dfb7"
dependencies = [
"generic-array",
]
[[package]]
name = "constant_time_eq"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
[[package]]
name = "cpufeatures"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28d997bd5e24a5928dd43e46dc529867e207907fe0b239c3477d924f7f2ca320"
dependencies = [
"libc",
]
[[package]]
name = "crc32fast"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
dependencies = [
"cfg-if",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc"
dependencies = [
"cfg-if",
"once_cell",
]
[[package]]
name = "crypto-common"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
dependencies = [
"generic-array",
"typenum",
]
[[package]]
name = "digest"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2fb860ca6fafa5552fb6d0e816a69c8e49f0908bf524e30a90d97c85892d506"
dependencies = [
"block-buffer",
"crypto-common",
"subtle",
]
[[package]]
name = "encoding"
version = "0.2.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec"
dependencies = [
"encoding-index-japanese",
"encoding-index-korean",
"encoding-index-simpchinese",
"encoding-index-singlebyte",
"encoding-index-tradchinese",
]
[[package]]
name = "encoding-index-japanese"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91"
dependencies = [
"encoding_index_tests",
]
[[package]]
name = "encoding-index-korean"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81"
dependencies = [
"encoding_index_tests",
]
[[package]]
name = "encoding-index-simpchinese"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7"
dependencies = [
"encoding_index_tests",
]
[[package]]
name = "encoding-index-singlebyte"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a"
dependencies = [
"encoding_index_tests",
]
[[package]]
name = "encoding-index-tradchinese"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18"
dependencies = [
"encoding_index_tests",
]
[[package]]
name = "encoding_index_tests"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569"
[[package]]
name = "flate2"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f82b0f4c27ad9f8bfd1f3208d882da2b09c301bc1c828fd3a00d0216d2fbbff6"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]]
name = "generic-array"
version = "0.14.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9"
dependencies = [
"typenum",
"version_check",
]
[[package]]
name = "hmac"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
dependencies = [
"digest",
]
[[package]]
name = "itoa"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754"
[[package]]
name = "jobserver"
version = "0.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa"
dependencies = [
"libc",
]
[[package]]
name = "libc"
version = "0.2.132"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5"
[[package]]
name = "memchr"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "148fab2e51b4f1cfc66da2a7c32981d1d3c083a803978268bb11fe4b86925e7a"
dependencies = [
"libc",
]
[[package]]
name = "miniz_oxide"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96590ba8f175222643a85693f33d26e9c8a015f599c216509b1a6894af675d34"
dependencies = [
"adler",
]
[[package]]
name = "num_threads"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44"
dependencies = [
"libc",
]
[[package]]
name = "once_cell"
version = "1.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f7254b99e31cad77da24b08ebf628882739a608578bb1bcdfc1f9c21260d7c0"
[[package]]
name = "opaque-debug"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5"
[[package]]
name = "password-hash"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d791538a6dcc1e7cb7fe6f6b58aca40e7f79403c45b2bc274008b5e647af1d8"
dependencies = [
"base64ct",
"rand_core",
"subtle",
]
[[package]]
name = "pbkdf2"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "271779f35b581956db91a3e55737327a03aa051e90b1c47aeb189508533adfd7"
dependencies = [
"digest",
"hmac",
"password-hash",
"sha2",
]
[[package]]
name = "pkg-config"
version = "0.3.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae"
[[package]]
name = "rand_core"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
[[package]]
name = "sha1"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "006769ba83e921b3085caa8334186b00cf92b4cb1a6cf4632fbccc8eff5c7549"
dependencies = [
"cfg-if",
"cpufeatures",
"digest",
]
[[package]]
name = "sha2"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf9db03534dff993187064c4e0c05a5708d2a9728ace9a8959b77bedf415dac5"
dependencies = [
"cfg-if",
"cpufeatures",
"digest",
]
[[package]]
name = "subtle"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601"
[[package]]
name = "textstream"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e7ed81b342f6566026755e7f4b7798810b1c159722e427d212ce72c2c58ffdaa"
dependencies = [
"encoding",
"memchr",
]
[[package]]
name = "time"
version = "0.3.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c3f9a28b618c3a6b9251b6908e9c99e04b9e5c02e6581ccbb67d59c34ef7f9b"
dependencies = [
"itoa",
"libc",
"num_threads",
"time-macros",
]
[[package]]
name = "time-macros"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42657b1a6f4d817cda8e7a0ace261fe0cc946cf3a80314390b22cc61ae080792"
[[package]]
name = "typenum"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "zip"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf225bcf73bb52cbb496e70475c7bd7a3f769df699c0020f6c7bd9a96dcf0b8d"
dependencies = [
"aes",
"byteorder",
"bzip2",
"constant_time_eq",
"crc32fast",
"crossbeam-utils",
"flate2",
"hmac",
"pbkdf2",
"sha1",
"time",
"zstd",
]
[[package]]
name = "zstd"
version = "0.10.2+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f4a6bd64f22b5e3e94b4e238669ff9f10815c27a5180108b849d24174a83847"
dependencies = [
"zstd-safe",
]
[[package]]
name = "zstd-safe"
version = "4.1.6+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94b61c51bb270702d6167b8ce67340d2754b088d0c091b06e593aa772c3ee9bb"
dependencies = [
"libc",
"zstd-sys",
]
[[package]]
name = "zstd-sys"
version = "1.6.3+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc49afa5c8d634e75761feda8c592051e7eeb4683ba827211eb0d731d3402ea8"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "ztb_recode"
version = "0.1.0"
dependencies = [
"encoding",
"textstream",
"zip",
]

16
Cargo.toml Normal file
View File

@ -0,0 +1,16 @@
[package]
name = "ztb_recode"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
encoding = "0.2.33"
textstream = "0.1.1"
zip = "0.6.2"
[profile.release]
opt-level = 3
debug = false
lto = true

129
src/main.rs Normal file
View File

@ -0,0 +1,129 @@
extern crate encoding;
extern crate textstream;
extern crate zip;
use encoding::all::{KOI8_R, UTF_8};
use encoding::Encoding;
use encoding::{DecoderTrap, EncoderTrap};
use std::io::{Read, Seek, Write};
use std::path::PathBuf;
use std::{fs, io};
use textstream::TextReader;
use zip::write::FileOptions;
use zip::ZipWriter;
const INPUT_FILENAME: &str = "baza.zip";
const OUTPUT_FILENAME: &str = "baza_utf8.zip";
struct OutFileDescr<'a> {
name: String,
options: &'a FileOptions,
encoding: &'a dyn Encoding,
trap: EncoderTrap,
}
impl<'a> OutFileDescr<'a> {
fn new(
name: String,
options: &'a FileOptions,
enc: &'a dyn Encoding,
trap: EncoderTrap,
) -> OutFileDescr<'a> {
OutFileDescr {
name,
options,
encoding: enc,
trap,
}
}
}
fn read_file(file: impl Read, enc: &dyn Encoding) -> Result<String, Box<dyn std::error::Error>> {
let buf = io::BufReader::new(file);
let mut reader = TextReader::new(buf, enc, DecoderTrap::Ignore);
let mut result = String::new();
reader.read_to_end(&mut result).or(Err("decode error"))?;
Ok(result)
}
fn write_file<T: Seek + Write>(
arc: &mut ZipWriter<T>,
f: OutFileDescr,
data: String,
) -> Result<(), Box<dyn std::error::Error>> {
arc.start_file(f.name, *f.options)?;
let data = f.encoding.encode(data.as_str(), f.trap)?;
arc.write_all(&data)?;
Ok(())
}
fn process_files(files: &[PathBuf]) {
if files.is_empty() {
return;
}
let enc_input = KOI8_R;
let enc_output = UTF_8;
let options = zip::write::FileOptions::default()
.compression_method(zip::CompressionMethod::Deflated)
.compression_level(Some(9));
let zip_file = fs::File::open(INPUT_FILENAME).unwrap();
let zip_reader = io::BufReader::new(zip_file);
let mut archive = zip::ZipArchive::new(zip_reader).unwrap();
let mut outfile = fs::File::create(OUTPUT_FILENAME).unwrap();
let mut zip_writer = ZipWriter::new(&mut outfile);
files.iter().for_each(|name| {
let name_str = name.to_str().unwrap();
// read string from file in input zip
let file = archive.by_name(name_str).unwrap();
let data = read_file(file, enc_input).unwrap();
// write string to file in output zip
let out_file = OutFileDescr::new(
name_str.to_string(),
&options,
enc_output,
EncoderTrap::Ignore,
);
write_file(&mut zip_writer, out_file, data).unwrap();
});
zip_writer.finish().unwrap();
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
// open archive just to list files
let zip_file = fs::File::open(INPUT_FILENAME)?;
let zip_reader = io::BufReader::new(zip_file);
let mut archive = zip::ZipArchive::new(zip_reader)?;
let mut source_files: Vec<PathBuf> = (0..archive.len())
.map(|i| archive.by_index(i).unwrap().mangled_name())
.filter(|name| {
// skip files without "txt" extension
match name.extension() {
Some(ext) => match ext.to_str() {
Some(ext_str) => ext_str.eq_ignore_ascii_case("txt"),
_ => false, // extension is not valid unicode or not txt
},
_ => false, // no extension in filename
}
})
.collect();
drop(archive);
println!("processing {} files...", source_files.len());
source_files.sort();
let source_files = source_files;
process_files(&source_files);
println!("done");
Ok(())
}