add cli parser
This commit is contained in:
parent
f81e225427
commit
602916196b
98
src/main.rs
98
src/main.rs
@ -2,9 +2,11 @@ extern crate encoding;
|
|||||||
extern crate textstream;
|
extern crate textstream;
|
||||||
extern crate zip;
|
extern crate zip;
|
||||||
|
|
||||||
use encoding::all::{KOI8_R, UTF_8};
|
use clap::{Parser, ValueEnum};
|
||||||
|
use encoding::label::encoding_from_whatwg_label;
|
||||||
use encoding::Encoding;
|
use encoding::Encoding;
|
||||||
use encoding::{DecoderTrap, EncoderTrap};
|
use encoding::{DecoderTrap, EncoderTrap};
|
||||||
|
use regex::Regex;
|
||||||
use std::io::{Read, Seek, Write};
|
use std::io::{Read, Seek, Write};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::{fs, io};
|
use std::{fs, io};
|
||||||
@ -12,8 +14,59 @@ use textstream::TextReader;
|
|||||||
use zip::write::FileOptions;
|
use zip::write::FileOptions;
|
||||||
use zip::ZipWriter;
|
use zip::ZipWriter;
|
||||||
|
|
||||||
const INPUT_FILENAME: &str = "baza.zip";
|
/// transcode txt files in zip archieve
|
||||||
const OUTPUT_FILENAME: &str = "baza_utf8.zip";
|
#[derive(Parser, Debug)]
|
||||||
|
#[clap(author, version, about, long_about = None)]
|
||||||
|
#[clap(propagate_version = true)]
|
||||||
|
struct Cli {
|
||||||
|
/// input encoding
|
||||||
|
#[clap(short, long, default_value = "koi8-r")]
|
||||||
|
from: String,
|
||||||
|
|
||||||
|
/// output encoding
|
||||||
|
#[clap(short, long, default_value = "utf8")]
|
||||||
|
to: String,
|
||||||
|
|
||||||
|
/// output compression method
|
||||||
|
#[clap(arg_enum, short, long, default_value = "zstd")]
|
||||||
|
compression: OutputFileCompression,
|
||||||
|
|
||||||
|
/// filename filter (regex)
|
||||||
|
#[clap(short, long, default_value = r#".*\.txt$"#)]
|
||||||
|
regex: String,
|
||||||
|
|
||||||
|
/// input zip filename
|
||||||
|
#[clap(value_parser, default_value = "baza.zip")]
|
||||||
|
src: String,
|
||||||
|
|
||||||
|
/// output zip filename
|
||||||
|
#[clap(value_parser, default_value = "baza_utf8.zip")]
|
||||||
|
dst: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// output file compression method
|
||||||
|
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum, Debug)]
|
||||||
|
enum OutputFileCompression {
|
||||||
|
/// Store the file as is
|
||||||
|
Store,
|
||||||
|
/// Compress the file using Deflate
|
||||||
|
Deflate,
|
||||||
|
/// Compress the file using BZIP2
|
||||||
|
Bzip2,
|
||||||
|
/// Compress the file using ZStandard
|
||||||
|
Zstd,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<OutputFileCompression> for zip::CompressionMethod {
|
||||||
|
fn from(compression: OutputFileCompression) -> Self {
|
||||||
|
match compression {
|
||||||
|
OutputFileCompression::Store => Self::Stored,
|
||||||
|
OutputFileCompression::Deflate => Self::Deflated,
|
||||||
|
OutputFileCompression::Bzip2 => Self::Bzip2,
|
||||||
|
OutputFileCompression::Zstd => Self::Zstd,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
struct OutFileDescr<'a> {
|
struct OutFileDescr<'a> {
|
||||||
name: String,
|
name: String,
|
||||||
@ -44,16 +97,18 @@ fn write_file<T: Seek + Write>(
|
|||||||
|
|
||||||
fn process_files<R: Read + Seek>(
|
fn process_files<R: Read + Seek>(
|
||||||
archive: &mut zip::ZipArchive<R>,
|
archive: &mut zip::ZipArchive<R>,
|
||||||
|
output_filename: String,
|
||||||
|
enc_input: &(dyn Encoding + Send + Sync),
|
||||||
|
enc_output: &(dyn Encoding + Send + Sync),
|
||||||
|
compression: zip::CompressionMethod,
|
||||||
files: &[PathBuf],
|
files: &[PathBuf],
|
||||||
) -> Result<(), Box<dyn std::error::Error>> {
|
) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
if files.is_empty() {
|
if files.is_empty() {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
let enc_input = KOI8_R;
|
|
||||||
let enc_output = UTF_8;
|
|
||||||
let options = zip::write::FileOptions::default()
|
let options = zip::write::FileOptions::default()
|
||||||
.compression_method(zip::CompressionMethod::Deflated)
|
.compression_method(compression)
|
||||||
.compression_level(Some(9));
|
.compression_level(Some(9));
|
||||||
let file_def = OutFileDescr {
|
let file_def = OutFileDescr {
|
||||||
name: String::new(),
|
name: String::new(),
|
||||||
@ -62,7 +117,7 @@ fn process_files<R: Read + Seek>(
|
|||||||
trap: EncoderTrap::Ignore,
|
trap: EncoderTrap::Ignore,
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut outfile = fs::File::create(OUTPUT_FILENAME)?;
|
let mut outfile = fs::File::create(output_filename)?;
|
||||||
let mut zip_writer = ZipWriter::new(&mut outfile);
|
let mut zip_writer = ZipWriter::new(&mut outfile);
|
||||||
|
|
||||||
for name in files {
|
for name in files {
|
||||||
@ -86,23 +141,21 @@ fn process_files<R: Read + Seek>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let args = Cli::parse();
|
||||||
|
|
||||||
|
let regex = Regex::new(&args.regex).unwrap();
|
||||||
|
let encoding_input = encoding_from_whatwg_label(&args.from).expect("input encoding");
|
||||||
|
let encoding_output = encoding_from_whatwg_label(&args.to).expect("output encoding");
|
||||||
|
let compression: zip::CompressionMethod = args.compression.into();
|
||||||
|
|
||||||
// open archive just to list files
|
// open archive just to list files
|
||||||
let zip_file = fs::File::open(INPUT_FILENAME)?;
|
let zip_file = fs::File::open(args.src)?;
|
||||||
let zip_reader = io::BufReader::new(zip_file);
|
let zip_reader = io::BufReader::new(zip_file);
|
||||||
let mut archive = zip::ZipArchive::new(zip_reader)?;
|
let mut archive = zip::ZipArchive::new(zip_reader)?;
|
||||||
|
|
||||||
let mut source_files: Vec<PathBuf> = (0..archive.len())
|
let mut source_files: Vec<PathBuf> = (0..archive.len())
|
||||||
.map(|i| archive.by_index(i).unwrap().mangled_name())
|
.map(|i| archive.by_index(i).unwrap().mangled_name())
|
||||||
.filter(|name| {
|
.filter(|name| regex.is_match(name.to_str().unwrap()))
|
||||||
// skip files without "txt" extension
|
|
||||||
match name.extension() {
|
|
||||||
Some(ext) => match ext.to_str() {
|
|
||||||
Some(ext_str) => ext_str.eq_ignore_ascii_case("txt"),
|
|
||||||
_ => false, // extension is not valid unicode or not txt
|
|
||||||
},
|
|
||||||
_ => false, // no extension in filename
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
println!("processing {} files...", source_files.len());
|
println!("processing {} files...", source_files.len());
|
||||||
@ -110,7 +163,14 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
source_files.sort();
|
source_files.sort();
|
||||||
let source_files = source_files;
|
let source_files = source_files;
|
||||||
|
|
||||||
process_files(&mut archive, &source_files)?;
|
process_files(
|
||||||
|
&mut archive,
|
||||||
|
args.dst,
|
||||||
|
encoding_input,
|
||||||
|
encoding_output,
|
||||||
|
compression,
|
||||||
|
&source_files,
|
||||||
|
)?;
|
||||||
|
|
||||||
println!("done");
|
println!("done");
|
||||||
Ok(())
|
Ok(())
|
||||||
|
Loading…
Reference in New Issue
Block a user