refactor
This commit is contained in:
parent
befca99019
commit
44d2567419
62
src/main.rs
62
src/main.rs
@ -7,12 +7,12 @@ extern crate zip;
|
|||||||
use encoding::all::KOI8_R;
|
use encoding::all::KOI8_R;
|
||||||
use encoding::DecoderTrap;
|
use encoding::DecoderTrap;
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use std::fs;
|
|
||||||
use std::io;
|
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
use std::{fs, io};
|
||||||
use textstream::TextReader;
|
use textstream::TextReader;
|
||||||
|
|
||||||
const BASE_FILENAME: &str = "./baza.zip";
|
const BASE_FILENAME: &str = "baza.zip";
|
||||||
|
const OUTPUT_PATH: &str = "json";
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
enum KeywordType {
|
enum KeywordType {
|
||||||
@ -31,21 +31,6 @@ enum DataScope {
|
|||||||
QuestionContent,
|
QuestionContent,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn keyword_type(pattern: &str) -> KeywordType {
|
|
||||||
use KeywordType::*;
|
|
||||||
match pattern {
|
|
||||||
"Мета:" => Ignore,
|
|
||||||
"Чемпионат:" | "Пакет:" => Global,
|
|
||||||
"Тур:" => QuestionPre,
|
|
||||||
"Вопрос " => QuestionStart,
|
|
||||||
"Вопрос:" => QuestionStart,
|
|
||||||
"Ответ:" | "Зачет:" => QuestionContent,
|
|
||||||
_ => CurrentScope,
|
|
||||||
// "URL:" | "Ссылка:" | "Дата:" | "Обработан:" | "Автор:" | "Редактор:" | "Копирайт:" | "Инфо:" |
|
|
||||||
// "Тема:" | "Вид:" | "Тип:" | "Источник:" | "Рейтинг:" | "Комментарий:" | "Комментарии:"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct Context {
|
struct Context {
|
||||||
// global output value
|
// global output value
|
||||||
data: json::JsonValue,
|
data: json::JsonValue,
|
||||||
@ -88,6 +73,23 @@ impl Context {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl KeywordType {
|
||||||
|
fn from(pattern: &str) -> KeywordType {
|
||||||
|
use KeywordType::*;
|
||||||
|
match pattern {
|
||||||
|
"Мета:" => Ignore,
|
||||||
|
"Чемпионат:" | "Пакет:" => Global,
|
||||||
|
"Тур:" => QuestionPre,
|
||||||
|
"Вопрос " => QuestionStart,
|
||||||
|
"Вопрос:" => QuestionStart,
|
||||||
|
"Ответ:" | "Зачет:" => QuestionContent,
|
||||||
|
_ => CurrentScope,
|
||||||
|
// "URL:" | "Ссылка:" | "Дата:" | "Обработан:" | "Автор:" | "Редактор:" | "Копирайт:" | "Инфо:" |
|
||||||
|
// "Тема:" | "Вид:" | "Тип:" | "Источник:" | "Рейтинг:" | "Комментарий:" | "Комментарии:"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn parse_file(file: impl io::Read) -> Result<json::JsonValue, Box<std::error::Error>> {
|
fn parse_file(file: impl io::Read) -> Result<json::JsonValue, Box<std::error::Error>> {
|
||||||
let buf = io::BufReader::new(file);
|
let buf = io::BufReader::new(file);
|
||||||
let reader = TextReader::new(buf, KOI8_R, DecoderTrap::Ignore);
|
let reader = TextReader::new(buf, KOI8_R, DecoderTrap::Ignore);
|
||||||
@ -117,7 +119,6 @@ fn parse_file(file: impl io::Read) -> Result<json::JsonValue, Box<std::error::Er
|
|||||||
"Комментарий:",
|
"Комментарий:",
|
||||||
"Комментарии:",
|
"Комментарии:",
|
||||||
];
|
];
|
||||||
|
|
||||||
let mut context = Context::new();
|
let mut context = Context::new();
|
||||||
let mut ctx = &mut context;
|
let mut ctx = &mut context;
|
||||||
|
|
||||||
@ -135,7 +136,7 @@ fn parse_file(file: impl io::Read) -> Result<json::JsonValue, Box<std::error::Er
|
|||||||
|
|
||||||
ctx.last_keyword_type = ctx.cur_keyword_type;
|
ctx.last_keyword_type = ctx.cur_keyword_type;
|
||||||
ctx.last_tag = ctx.cur_tag.clone();
|
ctx.last_tag = ctx.cur_tag.clone();
|
||||||
ctx.cur_keyword_type = Some(keyword_type(&pattern));
|
ctx.cur_keyword_type = Some(KeywordType::from(&pattern));
|
||||||
ctx.cur_tag = pattern.replace(' ', "").replace(':', "");
|
ctx.cur_tag = pattern.replace(' ', "").replace(':', "");
|
||||||
|
|
||||||
// remember question id
|
// remember question id
|
||||||
@ -159,9 +160,8 @@ fn parse_file(file: impl io::Read) -> Result<json::JsonValue, Box<std::error::Er
|
|||||||
if ctx.have_new_question {
|
if ctx.have_new_question {
|
||||||
ctx.questions.push(ctx.cur_question.clone()).unwrap();
|
ctx.questions.push(ctx.cur_question.clone()).unwrap();
|
||||||
}
|
}
|
||||||
// prepare for read new question data with cur_question_pre values
|
// prepare to read new question data with cur_question_pre values
|
||||||
ctx.cur_question = ctx.cur_question_pre.clone();
|
ctx.cur_question = ctx.cur_question_pre.clone();
|
||||||
// ctx.cur_question_pre = json::JsonValue::new_object(); // uncomment => forget pre at new question
|
|
||||||
ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||||
ctx.have_new_question = true;
|
ctx.have_new_question = true;
|
||||||
}
|
}
|
||||||
@ -170,11 +170,12 @@ fn parse_file(file: impl io::Read) -> Result<json::JsonValue, Box<std::error::Er
|
|||||||
}
|
}
|
||||||
Some(CurrentScope) => {
|
Some(CurrentScope) => {
|
||||||
// match value to store data
|
// match value to store data
|
||||||
(match ctx.cur_scope {
|
let scope_data = match ctx.cur_scope {
|
||||||
DataScope::Global => &mut ctx.data,
|
DataScope::Global => &mut ctx.data,
|
||||||
DataScope::QuestionPre => &mut ctx.cur_question_pre,
|
DataScope::QuestionPre => &mut ctx.cur_question_pre,
|
||||||
DataScope::QuestionContent => &mut ctx.cur_question,
|
DataScope::QuestionContent => &mut ctx.cur_question,
|
||||||
})[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
};
|
||||||
|
scope_data[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||||
}
|
}
|
||||||
_ => (), //None or Ignore
|
_ => (), //None or Ignore
|
||||||
};
|
};
|
||||||
@ -190,7 +191,7 @@ fn parse_file(file: impl io::Read) -> Result<json::JsonValue, Box<std::error::Er
|
|||||||
|
|
||||||
// finish reading last question
|
// finish reading last question
|
||||||
if ctx.have_new_question && !ctx.cur_content.is_empty() {
|
if ctx.have_new_question && !ctx.cur_content.is_empty() {
|
||||||
ctx.cur_question[&ctx.cur_tag] = ctx.cur_content.join("\n").as_str().into();
|
ctx.cur_question[&ctx.cur_tag] = ctx.cur_content.join("\n").into();
|
||||||
ctx.questions.push(ctx.cur_question.clone()).unwrap();
|
ctx.questions.push(ctx.cur_question.clone()).unwrap();
|
||||||
ctx.have_new_question = false;
|
ctx.have_new_question = false;
|
||||||
}
|
}
|
||||||
@ -201,10 +202,9 @@ fn parse_file(file: impl io::Read) -> Result<json::JsonValue, Box<std::error::Er
|
|||||||
|
|
||||||
// split slice to a vector of slices
|
// split slice to a vector of slices
|
||||||
fn split_vec<'a, T>(src: &'a [T], num: usize) -> Vec<&'a [T]> {
|
fn split_vec<'a, T>(src: &'a [T], num: usize) -> Vec<&'a [T]> {
|
||||||
let all_len = src.len();
|
let part_len = src.len() / num;
|
||||||
let part_len = all_len / num;
|
let add_len = src.len() % num;
|
||||||
let add_len = all_len % num;
|
let mut result = Vec::<&'a [T]>::with_capacity(num);
|
||||||
let mut result = Vec::<&'a [T]>::new();
|
|
||||||
|
|
||||||
if 0 == part_len {
|
if 0 == part_len {
|
||||||
result.push(src);
|
result.push(src);
|
||||||
@ -236,7 +236,7 @@ fn process_files(files: &&[PathBuf]) {
|
|||||||
let data = parse_file(file).unwrap();
|
let data = parse_file(file).unwrap();
|
||||||
|
|
||||||
// make output filename
|
// make output filename
|
||||||
let mut outfilename = PathBuf::from("./json");
|
let mut outfilename = PathBuf::from(OUTPUT_PATH);
|
||||||
outfilename.push(name);
|
outfilename.push(name);
|
||||||
outfilename.set_extension("json");
|
outfilename.set_extension("json");
|
||||||
|
|
||||||
@ -258,7 +258,7 @@ fn main() -> Result<(), Box<std::error::Error>> {
|
|||||||
// skip files without "txt" extension
|
// skip files without "txt" extension
|
||||||
match name.extension() {
|
match name.extension() {
|
||||||
Some(ext) => match ext.to_str() {
|
Some(ext) => match ext.to_str() {
|
||||||
Some(ext_str) if ext_str.eq_ignore_ascii_case("txt") => true,
|
Some(ext_str) => ext_str.eq_ignore_ascii_case("txt"),
|
||||||
_ => false, // extension is not valid unicode or not txt
|
_ => false, // extension is not valid unicode or not txt
|
||||||
},
|
},
|
||||||
_ => false, // no extension in filename
|
_ => false, // no extension in filename
|
||||||
|
Loading…
Reference in New Issue
Block a user