From b4de3877984c3df3356138ac1fd518812544e237 Mon Sep 17 00:00:00 2001 From: Dmitry Date: Fri, 26 Jul 2019 12:25:45 +0300 Subject: [PATCH] more keywords + autoscope --- src/main.rs | 96 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 69 insertions(+), 27 deletions(-) diff --git a/src/main.rs b/src/main.rs index b4339fb..7c990d1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,27 +6,39 @@ extern crate zip; use encoding::all::KOI8_R; use encoding::DecoderTrap; use std::fs; -use std::io::BufReader; -use std::io::Read; +use std::io; use std::path::PathBuf; use textstream::TextReader; #[derive(Debug, Clone, Copy)] enum KeywordType { + Ignore, Global, QuestionPre, QuestionStart, QuestionContent, + CurrentScope, +} + +#[derive(Debug, Clone, Copy)] +enum DataScope { + Global, + QuestionPre, + QuestionContent, } fn keyword_type(pattern: &str) -> KeywordType { + use KeywordType::*; match pattern { - "Чемпионат:" | "URL:" | "Дата:" | "Редактор:" | "Вид:" => { - KeywordType::Global - } - "Тур:" => KeywordType::QuestionPre, - "Вопрос " => KeywordType::QuestionStart, - _ => KeywordType::QuestionContent, + "Мета:" => Ignore, + "Чемпионат:" | "Пакет:" => Global, + "Тур:" => QuestionPre, + "Вопрос " => QuestionStart, + "Вопрос:" => QuestionStart, + "Ответ:" | "Зачет:" => QuestionContent, + _ => CurrentScope, + // "URL:" | "Ссылка:" | "Дата:" | "Обработан:" | "Автор:" | "Редактор:" | "Копирайт:" | "Инфо:" | + // "Тема:" | "Вид:" | "Тип:" | "Источник:" | "Рейтинг:" | "Комментарий:" | "Комментарии:" } } @@ -40,6 +52,8 @@ struct Context { cur_question: json::JsonValue, // temp value for pre'question fields cur_question_pre: json::JsonValue, + // scope for data fields + cur_scope: DataScope, // curent json key cur_tag: String, // current json value @@ -52,23 +66,34 @@ struct Context { last_tag: String, } -fn parse_file(file: R) -> Result> { - let buf = BufReader::new(file); +fn parse_file(file: impl io::Read) -> Result> { + let buf = io::BufReader::new(file); let reader = TextReader::new(buf, KOI8_R, DecoderTrap::Ignore); let patterns = vec![ "Чемпионат:", + "Пакет:", "URL:", + "Ссылка:", "Дата:", "Редактор:", + "Обработан:", + "Копирайт:", + "Инфо:", + "Тема:", "Вид:", + "Тип:", "Тур:", + "Мета:", "Вопрос ", + "Вопрос:", "Ответ:", "Зачет:", "Источник:", + "Рейтинг:", "Автор:", "Комментарий:", + "Комментарии:", ]; // init context let mut context = Context { @@ -79,6 +104,7 @@ fn parse_file(file: R) -> Result::new(), + cur_scope: DataScope::Global, have_new_question: false, last_keyword_type: None, last_tag: String::new(), @@ -98,6 +124,8 @@ fn parse_file(file: R) -> Result { + use KeywordType::*; + ctx.last_keyword_type = ctx.cur_keyword_type; ctx.last_tag = ctx.cur_tag.clone(); ctx.cur_keyword_type = Some(keyword_type(&pattern)); @@ -105,35 +133,46 @@ fn parse_file(file: R) -> Result { - ctx.cur_question_pre["id"] = line_s.replace(":", "").as_str().into() + Some(QuestionStart) => { + ctx.cur_question_pre["id"] = line_s.replace(":", "").as_str().into(); } _ => (), }; // apply accumulated content when new keyword found match ctx.last_keyword_type { - Some(KeywordType::Global) => { + Some(Global) => { + ctx.cur_scope = DataScope::Global; ctx.data[&ctx.last_tag] = ctx.cur_content.join("\n").into() } - Some(KeywordType::QuestionPre) => { + Some(QuestionPre) => { + ctx.cur_scope = DataScope::QuestionPre; ctx.cur_question_pre[&ctx.last_tag] = ctx.cur_content.join("\n").into(); } - Some(KeywordType::QuestionContent) => { - ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into(); - } - Some(KeywordType::QuestionStart) => { + Some(QuestionStart) => { + ctx.cur_scope = DataScope::QuestionContent; // store prev question before reading new if ctx.have_new_question { ctx.questions.push(ctx.cur_question.clone()).unwrap(); } // prepare for read new question data with cur_question_pre values ctx.cur_question = ctx.cur_question_pre.clone(); - ctx.cur_question_pre = json::JsonValue::new_object(); + // ctx.cur_question_pre = json::JsonValue::new_object(); // uncomment => forget pre at new question ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into(); ctx.have_new_question = true; } - None => (), + Some(QuestionContent) => { + ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into(); + } + Some(CurrentScope) => { + // match value to store data + (match ctx.cur_scope { + DataScope::Global => &mut ctx.data, + DataScope::QuestionPre => &mut ctx.cur_question_pre, + DataScope::QuestionContent => &mut ctx.cur_question, + })[&ctx.last_tag] = ctx.cur_content.join("\n").into(); + } + _ => (), //None or Ignore }; // clear content ctx.cur_content.clear(); @@ -159,18 +198,21 @@ fn parse_file(file: R) -> Result Result<(), Box> { let fname = std::path::Path::new("./baza.zip"); let zip_file = fs::File::open(&fname)?; - let zip_reader = BufReader::new(zip_file); + let zip_reader = io::BufReader::new(zip_file); let mut archive = zip::ZipArchive::new(zip_reader)?; for i in 0..archive.len() { let file = archive.by_index(i)?; - // FIXME - //if ! file.is_file() { - // continue; - //} - let name = file.sanitized_name(); + // skip files without "txt" extension + match name.extension() { + Some(ext) => match ext.to_str() { + Some(ext_str) if ext_str.eq_ignore_ascii_case("txt") => (), + _ => continue, // extension is not valid unicode or not txt + }, + _ => continue, // no extension in filename + } println!("{}", name.as_path().display()); let data: json::JsonValue = parse_file(file)?; let mut outfilename = PathBuf::from("./json"); @@ -182,7 +224,7 @@ fn main() -> Result<(), Box> { data.write_pretty(&mut outfile, 1)?; //data.write(&mut outfile)?; //debug - //break; + break; } Ok(()) }