diff --git a/src/main.rs b/src/main.rs index 830104e..03e08b7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,9 +2,11 @@ extern crate encoding; extern crate textstream; extern crate zip; -use encoding::all::{KOI8_R, UTF_8}; +use clap::{Parser, ValueEnum}; +use encoding::label::encoding_from_whatwg_label; use encoding::Encoding; use encoding::{DecoderTrap, EncoderTrap}; +use regex::Regex; use std::io::{Read, Seek, Write}; use std::path::PathBuf; use std::{fs, io}; @@ -12,8 +14,59 @@ use textstream::TextReader; use zip::write::FileOptions; use zip::ZipWriter; -const INPUT_FILENAME: &str = "baza.zip"; -const OUTPUT_FILENAME: &str = "baza_utf8.zip"; +/// transcode txt files in zip archieve +#[derive(Parser, Debug)] +#[clap(author, version, about, long_about = None)] +#[clap(propagate_version = true)] +struct Cli { + /// input encoding + #[clap(short, long, default_value = "koi8-r")] + from: String, + + /// output encoding + #[clap(short, long, default_value = "utf8")] + to: String, + + /// output compression method + #[clap(arg_enum, short, long, default_value = "zstd")] + compression: OutputFileCompression, + + /// filename filter (regex) + #[clap(short, long, default_value = r#".*\.txt$"#)] + regex: String, + + /// input zip filename + #[clap(value_parser, default_value = "baza.zip")] + src: String, + + /// output zip filename + #[clap(value_parser, default_value = "baza_utf8.zip")] + dst: String, +} + +/// output file compression method +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum, Debug)] +enum OutputFileCompression { + /// Store the file as is + Store, + /// Compress the file using Deflate + Deflate, + /// Compress the file using BZIP2 + Bzip2, + /// Compress the file using ZStandard + Zstd, +} + +impl From for zip::CompressionMethod { + fn from(compression: OutputFileCompression) -> Self { + match compression { + OutputFileCompression::Store => Self::Stored, + OutputFileCompression::Deflate => Self::Deflated, + OutputFileCompression::Bzip2 => Self::Bzip2, + OutputFileCompression::Zstd => Self::Zstd, + } + } +} struct OutFileDescr<'a> { name: String, @@ -44,16 +97,18 @@ fn write_file( fn process_files( archive: &mut zip::ZipArchive, + output_filename: String, + enc_input: &(dyn Encoding + Send + Sync), + enc_output: &(dyn Encoding + Send + Sync), + compression: zip::CompressionMethod, files: &[PathBuf], ) -> Result<(), Box> { if files.is_empty() { return Ok(()); } - let enc_input = KOI8_R; - let enc_output = UTF_8; let options = zip::write::FileOptions::default() - .compression_method(zip::CompressionMethod::Deflated) + .compression_method(compression) .compression_level(Some(9)); let file_def = OutFileDescr { name: String::new(), @@ -62,7 +117,7 @@ fn process_files( trap: EncoderTrap::Ignore, }; - let mut outfile = fs::File::create(OUTPUT_FILENAME)?; + let mut outfile = fs::File::create(output_filename)?; let mut zip_writer = ZipWriter::new(&mut outfile); for name in files { @@ -86,23 +141,21 @@ fn process_files( } fn main() -> Result<(), Box> { + let args = Cli::parse(); + + let regex = Regex::new(&args.regex).unwrap(); + let encoding_input = encoding_from_whatwg_label(&args.from).expect("input encoding"); + let encoding_output = encoding_from_whatwg_label(&args.to).expect("output encoding"); + let compression: zip::CompressionMethod = args.compression.into(); + // open archive just to list files - let zip_file = fs::File::open(INPUT_FILENAME)?; + let zip_file = fs::File::open(args.src)?; let zip_reader = io::BufReader::new(zip_file); let mut archive = zip::ZipArchive::new(zip_reader)?; let mut source_files: Vec = (0..archive.len()) .map(|i| archive.by_index(i).unwrap().mangled_name()) - .filter(|name| { - // skip files without "txt" extension - match name.extension() { - Some(ext) => match ext.to_str() { - Some(ext_str) => ext_str.eq_ignore_ascii_case("txt"), - _ => false, // extension is not valid unicode or not txt - }, - _ => false, // no extension in filename - } - }) + .filter(|name| regex.is_match(name.to_str().unwrap())) .collect(); println!("processing {} files...", source_files.len()); @@ -110,7 +163,14 @@ fn main() -> Result<(), Box> { source_files.sort(); let source_files = source_files; - process_files(&mut archive, &source_files)?; + process_files( + &mut archive, + args.dst, + encoding_input, + encoding_output, + compression, + &source_files, + )?; println!("done"); Ok(())