zip to zip

This commit is contained in:
Dmitry Belyaev 2022-09-12 22:34:17 +03:00
parent bb126256c7
commit 56a20dc6b1
Signed by: b4tman
GPG Key ID: 41A00BF15EA7E5F3
2 changed files with 41 additions and 28 deletions

1
.gitignore vendored
View File

@ -2,3 +2,4 @@
**/*.rs.bk **/*.rs.bk
baza*.zip baza*.zip
/json/ /json/
json.zip

View File

@ -3,13 +3,15 @@ extern crate json;
extern crate tokio; extern crate tokio;
use async_zip::read::fs::ZipFileReader; use async_zip::read::fs::ZipFileReader;
use async_zip::write::{EntryOptions, ZipFileWriter};
use async_zip::Compression;
use std::path::PathBuf; use std::path::PathBuf;
use std::str::FromStr; use std::str::FromStr;
use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWrite, BufReader};
use tokio::{fs, task}; use tokio::{fs, task};
use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader};
const BASE_FILENAME: &str = "baza_utf8.zip"; const INPUT_FILENAME: &str = "baza_utf8.zip";
const OUTPUT_PATH: &str = "json"; const OUTPUT_FILENAME: &str = "json.zip";
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
enum KeywordType { enum KeywordType {
@ -219,59 +221,69 @@ async fn parse_file(
Ok(ctx.data.clone()) Ok(ctx.data.clone())
} }
async fn process_file( async fn process_file<T: AsyncWrite + Unpin>(
archive: &ZipFileReader, archive: &ZipFileReader,
writer: &mut ZipFileWriter<T>,
index: usize, index: usize,
name: String, name: String,
) -> Result<(), Box<dyn std::error::Error>> { ) -> Result<(), Box<dyn std::error::Error>> {
let entry_reader = archive.entry_reader(index).await?; let entry_reader = archive.entry_reader(index).await?;
// make output filename // parse file to json
let mut outfilename = PathBuf::from(OUTPUT_PATH);
outfilename.push(name);
outfilename.set_extension("json");
// save json to file
let new_data = parse_file(entry_reader).await?; let new_data = parse_file(entry_reader).await?;
let data_str = task::spawn_blocking(move || {
new_data.pretty(2)
}).await?;
let mut outfile = fs::File::create(outfilename).await?; // dump json to str
outfile.write_all(data_str.as_bytes()).await?; let data_str = task::spawn_blocking(move || new_data.pretty(2)).await?;
// make output filename
let mut outfilename = PathBuf::from(name);
outfilename.set_extension("json");
let outfilename = outfilename.to_str().unwrap().to_string();
let opts = EntryOptions::new(outfilename, Compression::Deflate);
// write new zip entry
writer.write_entry_whole(opts, data_str.as_bytes()).await?;
Ok(()) Ok(())
} }
#[tokio::main] #[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> { async fn main() -> Result<(), Box<dyn std::error::Error>> {
// open archive just to list files // open archive just to list files
let archive = ZipFileReader::new(String::from(BASE_FILENAME)).await?; let archive = ZipFileReader::new(String::from(INPUT_FILENAME)).await?;
let source_files: Vec<(usize, String)> = archive let source_files: Vec<(usize, String)> = archive
.entries() .entries()
.iter() .iter()
.enumerate() .enumerate()
.filter(|item| !item.1.dir()) .filter(|(_, entry)| !entry.dir())
.filter(|item| { .filter(|(_, entry)| {
// skip files without "txt" extension // skip files without "txt" extension
item.1.name().ends_with(".txt") entry.name().ends_with(".txt")
}) })
.map(|item| (item.0, item.1.name().to_string())) .map(|(index, entry)| (index, entry.name().to_string()))
.collect(); .collect();
// check output directory // check output filename
match fs::metadata(OUTPUT_PATH).await { match fs::metadata(OUTPUT_FILENAME).await {
Err(_) => fs::create_dir_all(OUTPUT_PATH).await?, Ok(x) if x.is_dir() => return Err("output file is a directory!".into()),
Ok(x) if x.is_file() => return Err("output directory is file!".into()),
_ => (), _ => (),
}; };
println!("processing {} files ...", source_files.len()); println!(
r#" 【 "{}" ➡ "{}" 】 processing {} files 💾⏳ ..."#,
INPUT_FILENAME,
OUTPUT_FILENAME,
source_files.len()
);
for i in source_files { let mut file = fs::File::create(OUTPUT_FILENAME).await?;
process_file(&archive, i.0, i.1).await?; let mut writer = ZipFileWriter::new(&mut file);
for (index, name) in source_files {
process_file(&archive, &mut writer, index, name).await?;
} }
writer.close().await?;
println!("done"); println!("done");
Ok(()) Ok(())
} }