From d77e164d07bda87a84d84eb33b28ed608df9c7ba Mon Sep 17 00:00:00 2001 From: Dmitry Date: Tue, 20 Sep 2022 14:31:37 +0300 Subject: [PATCH] parser refactor --- src/main.rs | 316 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 182 insertions(+), 134 deletions(-) diff --git a/src/main.rs b/src/main.rs index 1cf8ba4..0306a43 100644 --- a/src/main.rs +++ b/src/main.rs @@ -26,6 +26,24 @@ enum KeywordType { CurrentScope, } +impl FromStr for KeywordType { + type Err = (); + + fn from_str(pattern: &str) -> Result { + use KeywordType::*; + Ok(match pattern { + "Мета:" => Ignore, + "Чемпионат:" | "Пакет:" => Global, + "Тур:" => QuestionPre, + "Вопрос " | "Вопрос:" => QuestionStart, + "Ответ:" | "Зачет:" => QuestionContent, + _ => CurrentScope, + // "URL:" | "Ссылка:" | "Дата:" | "Обработан:" | "Автор:" | "Редактор:" | "Копирайт:" | "Инфо:" | + // "Тема:" | "Вид:" | "Тип:" | "Источник:" | "Рейтинг:" | "Комментарий:" | "Комментарии:" + }) + } +} + #[derive(Debug, Clone, Copy)] enum DataScope { Global, @@ -33,7 +51,7 @@ enum DataScope { QuestionContent, } -struct Context { +struct QuestionsParser { // global output value data: json::JsonValue, // temp questions array @@ -57,66 +75,9 @@ struct Context { last_tag: String, } -// check questions before push -trait PushIfValid { - fn is_valid(&self) -> bool; - fn push_if_valid(&mut self, value: json::JsonValue); -} - -impl PushIfValid for json::JsonValue { - fn is_valid(&self) -> bool { - self.has_key("Вопрос") && self.has_key("Ответ") - } - fn push_if_valid(&mut self, value: json::JsonValue) { - if value.is_valid() { - self.push(value).unwrap_or(()) - } - } -} - -impl Context { - fn new() -> Context { - Context { - data: json::JsonValue::new_object(), - questions: json::JsonValue::new_array(), - cur_keyword_type: None, - cur_question: json::JsonValue::new_object(), - cur_question_pre: json::JsonValue::new_object(), - cur_tag: String::new(), - cur_content: Vec::::new(), - cur_scope: DataScope::Global, - have_new_question: false, - last_keyword_type: None, - last_tag: String::new(), - } - } -} - -impl FromStr for KeywordType { - type Err = (); - - fn from_str(pattern: &str) -> Result { - use KeywordType::*; - Ok(match pattern { - "Мета:" => Ignore, - "Чемпионат:" | "Пакет:" => Global, - "Тур:" => QuestionPre, - "Вопрос " | "Вопрос:" => QuestionStart, - "Ответ:" | "Зачет:" => QuestionContent, - _ => CurrentScope, - // "URL:" | "Ссылка:" | "Дата:" | "Обработан:" | "Автор:" | "Редактор:" | "Копирайт:" | "Инфо:" | - // "Тема:" | "Вид:" | "Тип:" | "Источник:" | "Рейтинг:" | "Комментарий:" | "Комментарии:" - }) - } -} - -async fn parse_file( - entry_reader: impl AsyncReadExt + Unpin, -) -> Result> { - let buf_reader = BufReader::new(entry_reader); - let mut lines = buf_reader.lines(); - - let patterns = vec![ +/// Text questions parser +impl QuestionsParser { + const PATTERNS: &'static [&'static str] = &[ "Чемпионат:", "Пакет:", "URL:", @@ -141,87 +102,174 @@ async fn parse_file( "Комментарий:", "Комментарии:", ]; - let mut context = Context::new(); - let mut ctx = &mut context; + + /// create new parser + pub fn new() -> QuestionsParser { + QuestionsParser { + data: json::JsonValue::new_object(), + questions: json::JsonValue::new_array(), + cur_keyword_type: None, + cur_question: json::JsonValue::new_object(), + cur_question_pre: json::JsonValue::new_object(), + cur_tag: String::new(), + cur_content: Vec::::new(), + cur_scope: DataScope::Global, + have_new_question: false, + last_keyword_type: None, + last_tag: String::new(), + } + } + /// join current content lines + fn get_current_content(&self) -> String { + self.cur_content.join("\n") + } + /// clear current content + fn clear_current_content(&mut self) { + self.cur_content.clear() + } + /// add new line to current content + fn append_to_current_content(&mut self, line: String) { + self.cur_content.push(line); + } + /// check current question have required fields + fn is_current_question_valid(&self) -> bool { + self.cur_question.has_key("Вопрос") && self.cur_question.has_key("Ответ") + } + /// add current question to parsed array + fn add_cur_question(&mut self) { + if self.is_current_question_valid() { + self.questions.push(self.cur_question.clone()).unwrap_or(()) + } + } + /// set current content to last tag(keyword) to data scope + fn apply_content_to(&mut self, scope: DataScope) { + let content = self.get_current_content(); + // match value to store data + let scope_data = match scope { + DataScope::Global => &mut self.data, + DataScope::QuestionPre => &mut self.cur_question_pre, + DataScope::QuestionContent => &mut self.cur_question, + }; + scope_data[&self.last_tag] = content.into(); + self.clear_current_content(); + } + /// set current content to last tag(keyword) to current scope + fn apply_content_to_cur_scope(&mut self) { + self.apply_content_to(self.cur_scope); + } + /// set current scope + fn set_scope(&mut self, scope: DataScope) { + self.cur_scope = scope; + } + /// set current scope and set current content to last tag(keyword) to data scope + fn set_scope_and_apply(&mut self, scope: DataScope) { + self.set_scope(scope); + self.apply_content_to_cur_scope(); + } + /// add last question (if have) and start collecting new one + fn start_new_question(&mut self) { + // store prev question before reading new + if self.have_new_question { + self.add_cur_question(); + } + // prepare to read new question data with cur_question_pre values + self.cur_question = self.cur_question_pre.clone(); + self.have_new_question = true; + } + + /// check last tag(keyword) and set current content to corresponding data scope + fn apply_content_for_last_keyword(&mut self) { + // apply accumulated content when new keyword found + match self.last_keyword_type { + Some(KeywordType::Global) => { + self.set_scope_and_apply(DataScope::Global); + } + Some(KeywordType::QuestionPre) => { + self.set_scope_and_apply(DataScope::QuestionPre); + } + Some(KeywordType::QuestionStart) => { + self.start_new_question(); + self.set_scope_and_apply(DataScope::QuestionContent); + } + Some(KeywordType::QuestionContent) => { + self.apply_content_to(DataScope::QuestionContent); + } + Some(KeywordType::CurrentScope) => { + self.apply_content_to_cur_scope(); + } + _ => (), //None or Ignore + }; + } + /// set current keyword(tag) and type as last, and set new as current + fn set_new_keyword(&mut self, keyword: &str) { + self.last_keyword_type = self.cur_keyword_type; + self.last_tag = self.cur_tag.clone(); + self.cur_keyword_type = Some(keyword.parse().unwrap()); + self.cur_tag = keyword.replace(' ', "").replace(':', ""); + } + /// if line matched keyword + fn on_keyword_match(&mut self, line: &str, keyword: &str) { + self.set_new_keyword(keyword); + + // remember question id + if let Some(KeywordType::QuestionStart) = self.cur_keyword_type { + self.cur_question_pre["id"] = line.replace(':', "").into(); + }; + + self.apply_content_for_last_keyword(); + } + + /// parse next line + pub fn parse_line(&mut self, line: &str) { + match QuestionsParser::PATTERNS + .iter() // find keyword + .find(|&&pattern| line.starts_with(pattern) && line.ends_with(':')) + { + Some(pattern) => { + self.on_keyword_match(line, pattern); + } + None => { + self.append_to_current_content(line.to_string()); + } + } + } + + /// finish parsing + pub fn finish(&mut self) { + if self.have_new_question && !self.cur_content.is_empty() { + self.cur_question[&self.cur_tag] = self.get_current_content().into(); + self.add_cur_question(); + self.clear_current_content(); + self.have_new_question = false; + } + self.data["Вопросы"] = self.questions.clone(); + } + + /// get parsed data + pub fn get_parsed(self) -> json::JsonValue { + self.data + } +} + +async fn parse_file( + entry_reader: impl AsyncReadExt + Unpin, +) -> Result> { + let buf_reader = BufReader::new(entry_reader); + let mut lines = buf_reader.lines(); + + let mut parser = QuestionsParser::new(); while let Some(line_r) = lines.next_line().await? { let line = line_r.trim(); if line.is_empty() { continue; } - let line = line.to_string(); - match patterns - .iter() // find keyword - .find(|&&pattern| line.starts_with(pattern) && line.ends_with(':')) - { - Some(pattern) => { - use KeywordType::*; - - ctx.last_keyword_type = ctx.cur_keyword_type; - ctx.last_tag = ctx.cur_tag.clone(); - ctx.cur_keyword_type = Some(pattern.parse().unwrap()); - ctx.cur_tag = pattern.replace(' ', "").replace(':', ""); - - // remember question id - if let Some(QuestionStart) = ctx.cur_keyword_type { - ctx.cur_question_pre["id"] = line.replace(':', "").as_str().into(); - }; - - // apply accumulated content when new keyword found - match ctx.last_keyword_type { - Some(Global) => { - ctx.cur_scope = DataScope::Global; - ctx.data[&ctx.last_tag] = ctx.cur_content.join("\n").into() - } - Some(QuestionPre) => { - ctx.cur_scope = DataScope::QuestionPre; - ctx.cur_question_pre[&ctx.last_tag] = ctx.cur_content.join("\n").into(); - } - Some(QuestionStart) => { - ctx.cur_scope = DataScope::QuestionContent; - // store prev question before reading new - if ctx.have_new_question { - ctx.questions.push_if_valid(ctx.cur_question.clone()); - } - // prepare to read new question data with cur_question_pre values - ctx.cur_question = ctx.cur_question_pre.clone(); - ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into(); - ctx.have_new_question = true; - } - Some(QuestionContent) => { - ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into(); - } - Some(CurrentScope) => { - // match value to store data - let scope_data = match ctx.cur_scope { - DataScope::Global => &mut ctx.data, - DataScope::QuestionPre => &mut ctx.cur_question_pre, - DataScope::QuestionContent => &mut ctx.cur_question, - }; - scope_data[&ctx.last_tag] = ctx.cur_content.join("\n").into(); - } - _ => (), //None or Ignore - }; - // clear content - ctx.cur_content.clear(); - } - None => { - // accumulate content if line is not a keyword - ctx.cur_content.push(line); - } - } + parser.parse_line(line); } - // finish reading last question - if ctx.have_new_question && !ctx.cur_content.is_empty() { - ctx.cur_question[&ctx.cur_tag] = ctx.cur_content.join("\n").into(); - ctx.questions.push_if_valid(ctx.cur_question.clone()); - ctx.have_new_question = false; - } - - ctx.data["Вопросы"] = ctx.questions.clone(); - Ok(ctx.data.clone()) + parser.finish(); + Ok(parser.get_parsed()) } struct WriteQueueItem { @@ -305,7 +353,7 @@ async fn data_writer(queue: WriteQueue) { } } writer.close().await.unwrap(); - + println!("write done ✅"); }