split reader/converter
This commit is contained in:
parent
24f675ca16
commit
2a06eabd15
102
src/main.rs
102
src/main.rs
@ -8,7 +8,7 @@ use async_zip::Compression;
|
|||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use tokio::fs;
|
use tokio::fs;
|
||||||
use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader, BufWriter};
|
use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader, BufWriter};
|
||||||
use tokio::sync::mpsc::{self, UnboundedReceiver, UnboundedSender};
|
use tokio::sync::mpsc::{self, UnboundedReceiver, UnboundedSender};
|
||||||
|
|
||||||
const INPUT_FILENAME: &str = "baza_utf8.zip";
|
const INPUT_FILENAME: &str = "baza_utf8.zip";
|
||||||
@ -251,39 +251,22 @@ impl QuestionsParser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn parse_file(
|
#[derive(Debug)]
|
||||||
entry_reader: impl AsyncReadExt + Unpin,
|
struct FileText {
|
||||||
) -> Result<json::JsonValue, Box<dyn std::error::Error>> {
|
name: String,
|
||||||
let buf_reader = BufReader::new(entry_reader);
|
text: String,
|
||||||
let mut lines = buf_reader.lines();
|
|
||||||
|
|
||||||
let mut parser = QuestionsParser::new();
|
|
||||||
|
|
||||||
while let Some(line_r) = lines.next_line().await? {
|
|
||||||
let line = line_r.trim();
|
|
||||||
if line.is_empty() {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
parser.parse_line(line);
|
|
||||||
}
|
|
||||||
|
|
||||||
parser.finish();
|
|
||||||
Ok(parser.get_parsed())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
struct ConvertedFile {
|
enum TextReaderMessage {
|
||||||
name: String,
|
NextLine(String),
|
||||||
data: String,
|
EndOfFile(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// read txt files from zip and convert to json
|
/// read txt files from zip and convert to json
|
||||||
async fn reader_converter(tx: UnboundedSender<ConvertedFile>) {
|
async fn zip_text_reader(tx: UnboundedSender<TextReaderMessage>) {
|
||||||
// open archive just to list files
|
// open archive just to list files
|
||||||
let archive = ZipFileReader::new(String::from(INPUT_FILENAME))
|
let archive = ZipFileReader::new(INPUT_FILENAME).await.expect("open zip");
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let source_files = archive
|
let source_files = archive
|
||||||
.entries()
|
.entries()
|
||||||
@ -297,25 +280,56 @@ async fn reader_converter(tx: UnboundedSender<ConvertedFile>) {
|
|||||||
.map(|(index, entry)| (index, entry.name().to_string()));
|
.map(|(index, entry)| (index, entry.name().to_string()));
|
||||||
//
|
//
|
||||||
for (index, name) in source_files {
|
for (index, name) in source_files {
|
||||||
let entry_reader = archive.entry_reader(index).await.unwrap();
|
let entry_reader = archive.entry_reader(index).await.expect("read entry");
|
||||||
// parse file to json
|
let buf_reader = BufReader::new(entry_reader);
|
||||||
let new_data = parse_file(entry_reader).await.unwrap();
|
let mut lines = buf_reader.lines();
|
||||||
// dump json to str
|
while let Some(line) = lines.next_line().await.expect("next line") {
|
||||||
let data = new_data.pretty(2);
|
tx.send(TextReaderMessage::NextLine(line))
|
||||||
|
.expect("send line");
|
||||||
tx.send(ConvertedFile { name, data }).unwrap();
|
}
|
||||||
|
tx.send(TextReaderMessage::EndOfFile(name))
|
||||||
|
.expect("send end");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
println!("read done ✅");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// convert text questions to json format
|
||||||
|
async fn questions_converter(
|
||||||
|
mut rx: UnboundedReceiver<TextReaderMessage>,
|
||||||
|
tx: UnboundedSender<FileText>,
|
||||||
|
) {
|
||||||
|
let mut parser = QuestionsParser::new();
|
||||||
|
while let Some(msg) = rx.recv().await {
|
||||||
|
match msg {
|
||||||
|
TextReaderMessage::NextLine(line) => {
|
||||||
|
let line = line.trim();
|
||||||
|
if line.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
parser.parse_line(line);
|
||||||
|
}
|
||||||
|
TextReaderMessage::EndOfFile(name) => {
|
||||||
|
parser.finish();
|
||||||
|
let data_json = parser.get_parsed();
|
||||||
|
let text = data_json.pretty(2);
|
||||||
|
tx.send(FileText { name, text }).expect("send json");
|
||||||
|
parser = QuestionsParser::new();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
println!("convert done ✅");
|
println!("convert done ✅");
|
||||||
}
|
}
|
||||||
|
|
||||||
/// write json data to zip files
|
/// write json data to zip files
|
||||||
async fn zip_writer(mut rx: UnboundedReceiver<ConvertedFile>) {
|
async fn zip_json_writer(mut rx: UnboundedReceiver<FileText>) {
|
||||||
let file = fs::File::create(OUTPUT_FILENAME).await.unwrap();
|
let file = fs::File::create(OUTPUT_FILENAME)
|
||||||
|
.await
|
||||||
|
.expect("create file");
|
||||||
let mut buf = BufWriter::with_capacity(100 * 1024 * 1024, file);
|
let mut buf = BufWriter::with_capacity(100 * 1024 * 1024, file);
|
||||||
let mut writer = ZipFileWriter::new(&mut buf);
|
let mut writer = ZipFileWriter::new(&mut buf);
|
||||||
|
|
||||||
while let Some(ConvertedFile { name, data }) = rx.recv().await {
|
while let Some(FileText { name, text: data }) = rx.recv().await {
|
||||||
// make output filename
|
// make output filename
|
||||||
let mut outfilename = PathBuf::from(name);
|
let mut outfilename = PathBuf::from(name);
|
||||||
outfilename.set_extension("json");
|
outfilename.set_extension("json");
|
||||||
@ -326,10 +340,10 @@ async fn zip_writer(mut rx: UnboundedReceiver<ConvertedFile>) {
|
|||||||
writer
|
writer
|
||||||
.write_entry_whole(opts, data.as_bytes())
|
.write_entry_whole(opts, data.as_bytes())
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.expect("write entry");
|
||||||
}
|
}
|
||||||
writer.close().await.unwrap();
|
writer.close().await.expect("close writer");
|
||||||
buf.flush().await.unwrap();
|
buf.flush().await.expect("flush buffer");
|
||||||
|
|
||||||
println!("write done ✅");
|
println!("write done ✅");
|
||||||
}
|
}
|
||||||
@ -342,11 +356,13 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
_ => (),
|
_ => (),
|
||||||
};
|
};
|
||||||
|
|
||||||
let (tx, rx) = mpsc::unbounded_channel::<ConvertedFile>();
|
let (reader_tx, reader_rx) = mpsc::unbounded_channel::<TextReaderMessage>();
|
||||||
|
let (json_tx, json_rx) = mpsc::unbounded_channel::<FileText>();
|
||||||
|
|
||||||
tokio::try_join!(
|
tokio::try_join!(
|
||||||
tokio::spawn(reader_converter(tx)),
|
tokio::spawn(zip_text_reader(reader_tx)),
|
||||||
tokio::spawn(zip_writer(rx))
|
tokio::spawn(questions_converter(reader_rx, json_tx)),
|
||||||
|
tokio::spawn(zip_json_writer(json_rx))
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
println!("all done ✅");
|
println!("all done ✅");
|
||||||
|
Loading…
Reference in New Issue
Block a user