130 lines
3.6 KiB
Rust
130 lines
3.6 KiB
Rust
extern crate encoding;
|
|
extern crate textstream;
|
|
extern crate zip;
|
|
|
|
use encoding::all::{KOI8_R, UTF_8};
|
|
use encoding::Encoding;
|
|
use encoding::{DecoderTrap, EncoderTrap};
|
|
use std::io::{Read, Seek, Write};
|
|
use std::path::PathBuf;
|
|
use std::{fs, io};
|
|
use textstream::TextReader;
|
|
use zip::write::FileOptions;
|
|
use zip::ZipWriter;
|
|
|
|
const INPUT_FILENAME: &str = "baza.zip";
|
|
const OUTPUT_FILENAME: &str = "baza_utf8.zip";
|
|
|
|
struct OutFileDescr<'a> {
|
|
name: String,
|
|
options: &'a FileOptions,
|
|
encoding: &'a dyn Encoding,
|
|
trap: EncoderTrap,
|
|
}
|
|
|
|
impl<'a> OutFileDescr<'a> {
|
|
fn new(
|
|
name: String,
|
|
options: &'a FileOptions,
|
|
enc: &'a dyn Encoding,
|
|
trap: EncoderTrap,
|
|
) -> OutFileDescr<'a> {
|
|
OutFileDescr {
|
|
name,
|
|
options,
|
|
encoding: enc,
|
|
trap,
|
|
}
|
|
}
|
|
}
|
|
|
|
fn read_file(file: impl Read, enc: &dyn Encoding) -> Result<String, Box<dyn std::error::Error>> {
|
|
let buf = io::BufReader::new(file);
|
|
let mut reader = TextReader::new(buf, enc, DecoderTrap::Ignore);
|
|
let mut result = String::new();
|
|
reader.read_to_end(&mut result).or(Err("decode error"))?;
|
|
|
|
Ok(result)
|
|
}
|
|
|
|
fn write_file<T: Seek + Write>(
|
|
arc: &mut ZipWriter<T>,
|
|
f: OutFileDescr,
|
|
data: String,
|
|
) -> Result<(), Box<dyn std::error::Error>> {
|
|
arc.start_file(f.name, *f.options)?;
|
|
let data = f.encoding.encode(data.as_str(), f.trap)?;
|
|
arc.write_all(&data)?;
|
|
Ok(())
|
|
}
|
|
|
|
fn process_files(files: &[PathBuf]) {
|
|
if files.is_empty() {
|
|
return;
|
|
}
|
|
|
|
let enc_input = KOI8_R;
|
|
let enc_output = UTF_8;
|
|
let options = zip::write::FileOptions::default()
|
|
.compression_method(zip::CompressionMethod::Deflated)
|
|
.compression_level(Some(9));
|
|
|
|
let zip_file = fs::File::open(INPUT_FILENAME).unwrap();
|
|
let zip_reader = io::BufReader::new(zip_file);
|
|
let mut archive = zip::ZipArchive::new(zip_reader).unwrap();
|
|
|
|
let mut outfile = fs::File::create(OUTPUT_FILENAME).unwrap();
|
|
let mut zip_writer = ZipWriter::new(&mut outfile);
|
|
|
|
files.iter().for_each(|name| {
|
|
let name_str = name.to_str().unwrap();
|
|
|
|
// read string from file in input zip
|
|
let file = archive.by_name(name_str).unwrap();
|
|
let data = read_file(file, enc_input).unwrap();
|
|
|
|
// write string to file in output zip
|
|
let out_file = OutFileDescr::new(
|
|
name_str.to_string(),
|
|
&options,
|
|
enc_output,
|
|
EncoderTrap::Ignore,
|
|
);
|
|
write_file(&mut zip_writer, out_file, data).unwrap();
|
|
});
|
|
|
|
zip_writer.finish().unwrap();
|
|
}
|
|
|
|
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
// open archive just to list files
|
|
let zip_file = fs::File::open(INPUT_FILENAME)?;
|
|
let zip_reader = io::BufReader::new(zip_file);
|
|
let mut archive = zip::ZipArchive::new(zip_reader)?;
|
|
|
|
let mut source_files: Vec<PathBuf> = (0..archive.len())
|
|
.map(|i| archive.by_index(i).unwrap().mangled_name())
|
|
.filter(|name| {
|
|
// skip files without "txt" extension
|
|
match name.extension() {
|
|
Some(ext) => match ext.to_str() {
|
|
Some(ext_str) => ext_str.eq_ignore_ascii_case("txt"),
|
|
_ => false, // extension is not valid unicode or not txt
|
|
},
|
|
_ => false, // no extension in filename
|
|
}
|
|
})
|
|
.collect();
|
|
drop(archive);
|
|
|
|
println!("processing {} files...", source_files.len());
|
|
|
|
source_files.sort();
|
|
let source_files = source_files;
|
|
|
|
process_files(&source_files);
|
|
|
|
println!("done");
|
|
Ok(())
|
|
}
|