From d77e164d07bda87a84d84eb33b28ed608df9c7ba Mon Sep 17 00:00:00 2001
From: Dmitry <b4tm4n@mail.ru>
Date: Tue, 20 Sep 2022 14:31:37 +0300
Subject: [PATCH] parser refactor

---
 src/main.rs | 316 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 182 insertions(+), 134 deletions(-)

diff --git a/src/main.rs b/src/main.rs
index 1cf8ba4..0306a43 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -26,6 +26,24 @@ enum KeywordType {
     CurrentScope,
 }
 
+impl FromStr for KeywordType {
+    type Err = ();
+
+    fn from_str(pattern: &str) -> Result<Self, Self::Err> {
+        use KeywordType::*;
+        Ok(match pattern {
+            "Мета:" => Ignore,
+            "Чемпионат:" | "Пакет:" => Global,
+            "Тур:" => QuestionPre,
+            "Вопрос " | "Вопрос:" => QuestionStart,
+            "Ответ:" | "Зачет:" => QuestionContent,
+            _ => CurrentScope,
+            // "URL:" | "Ссылка:" | "Дата:" | "Обработан:" | "Автор:" | "Редактор:" | "Копирайт:" | "Инфо:" |
+            // "Тема:" | "Вид:" | "Тип:" | "Источник:" | "Рейтинг:" | "Комментарий:" | "Комментарии:"
+        })
+    }
+}
+
 #[derive(Debug, Clone, Copy)]
 enum DataScope {
     Global,
@@ -33,7 +51,7 @@ enum DataScope {
     QuestionContent,
 }
 
-struct Context {
+struct QuestionsParser {
     // global output value
     data: json::JsonValue,
     // temp questions array
@@ -57,66 +75,9 @@ struct Context {
     last_tag: String,
 }
 
-// check questions before push
-trait PushIfValid {
-    fn is_valid(&self) -> bool;
-    fn push_if_valid(&mut self, value: json::JsonValue);
-}
-
-impl PushIfValid for json::JsonValue {
-    fn is_valid(&self) -> bool {
-        self.has_key("Вопрос") && self.has_key("Ответ")
-    }
-    fn push_if_valid(&mut self, value: json::JsonValue) {
-        if value.is_valid() {
-            self.push(value).unwrap_or(())
-        }
-    }
-}
-
-impl Context {
-    fn new() -> Context {
-        Context {
-            data: json::JsonValue::new_object(),
-            questions: json::JsonValue::new_array(),
-            cur_keyword_type: None,
-            cur_question: json::JsonValue::new_object(),
-            cur_question_pre: json::JsonValue::new_object(),
-            cur_tag: String::new(),
-            cur_content: Vec::<String>::new(),
-            cur_scope: DataScope::Global,
-            have_new_question: false,
-            last_keyword_type: None,
-            last_tag: String::new(),
-        }
-    }
-}
-
-impl FromStr for KeywordType {
-    type Err = ();
-
-    fn from_str(pattern: &str) -> Result<Self, Self::Err> {
-        use KeywordType::*;
-        Ok(match pattern {
-            "Мета:" => Ignore,
-            "Чемпионат:" | "Пакет:" => Global,
-            "Тур:" => QuestionPre,
-            "Вопрос " | "Вопрос:" => QuestionStart,
-            "Ответ:" | "Зачет:" => QuestionContent,
-            _ => CurrentScope,
-            // "URL:" | "Ссылка:" | "Дата:" | "Обработан:" | "Автор:" | "Редактор:" | "Копирайт:" | "Инфо:" |
-            // "Тема:" | "Вид:" | "Тип:" | "Источник:" | "Рейтинг:" | "Комментарий:" | "Комментарии:"
-        })
-    }
-}
-
-async fn parse_file(
-    entry_reader: impl AsyncReadExt + Unpin,
-) -> Result<json::JsonValue, Box<dyn std::error::Error>> {
-    let buf_reader = BufReader::new(entry_reader);
-    let mut lines = buf_reader.lines();
-
-    let patterns = vec![
+/// Text questions parser
+impl QuestionsParser {
+    const PATTERNS: &'static [&'static str] = &[
         "Чемпионат:",
         "Пакет:",
         "URL:",
@@ -141,87 +102,174 @@ async fn parse_file(
         "Комментарий:",
         "Комментарии:",
     ];
-    let mut context = Context::new();
-    let mut ctx = &mut context;
+
+    /// create new parser
+    pub fn new() -> QuestionsParser {
+        QuestionsParser {
+            data: json::JsonValue::new_object(),
+            questions: json::JsonValue::new_array(),
+            cur_keyword_type: None,
+            cur_question: json::JsonValue::new_object(),
+            cur_question_pre: json::JsonValue::new_object(),
+            cur_tag: String::new(),
+            cur_content: Vec::<String>::new(),
+            cur_scope: DataScope::Global,
+            have_new_question: false,
+            last_keyword_type: None,
+            last_tag: String::new(),
+        }
+    }
+    /// join current content lines
+    fn get_current_content(&self) -> String {
+        self.cur_content.join("\n")
+    }
+    /// clear current content
+    fn clear_current_content(&mut self) {
+        self.cur_content.clear()
+    }
+    /// add new line to current content
+    fn append_to_current_content(&mut self, line: String) {
+        self.cur_content.push(line);
+    }
+    /// check current question have required fields
+    fn is_current_question_valid(&self) -> bool {
+        self.cur_question.has_key("Вопрос") && self.cur_question.has_key("Ответ")
+    }
+    /// add current question to parsed array
+    fn add_cur_question(&mut self) {
+        if self.is_current_question_valid() {
+            self.questions.push(self.cur_question.clone()).unwrap_or(())
+        }
+    }
+    /// set current content to last tag(keyword) to data scope
+    fn apply_content_to(&mut self, scope: DataScope) {
+        let content = self.get_current_content();
+        // match value to store data
+        let scope_data = match scope {
+            DataScope::Global => &mut self.data,
+            DataScope::QuestionPre => &mut self.cur_question_pre,
+            DataScope::QuestionContent => &mut self.cur_question,
+        };
+        scope_data[&self.last_tag] = content.into();
+        self.clear_current_content();
+    }
+    /// set current content to last tag(keyword) to current scope
+    fn apply_content_to_cur_scope(&mut self) {
+        self.apply_content_to(self.cur_scope);
+    }
+    /// set current scope
+    fn set_scope(&mut self, scope: DataScope) {
+        self.cur_scope = scope;
+    }
+    /// set current scope and set current content to last tag(keyword) to data scope
+    fn set_scope_and_apply(&mut self, scope: DataScope) {
+        self.set_scope(scope);
+        self.apply_content_to_cur_scope();
+    }
+    /// add last question (if have) and start collecting new one
+    fn start_new_question(&mut self) {
+        // store prev question before reading new
+        if self.have_new_question {
+            self.add_cur_question();
+        }
+        // prepare to read new question data with cur_question_pre values
+        self.cur_question = self.cur_question_pre.clone();
+        self.have_new_question = true;
+    }
+
+    /// check last tag(keyword) and set current content to corresponding data scope
+    fn apply_content_for_last_keyword(&mut self) {
+        // apply accumulated content when new keyword found
+        match self.last_keyword_type {
+            Some(KeywordType::Global) => {
+                self.set_scope_and_apply(DataScope::Global);
+            }
+            Some(KeywordType::QuestionPre) => {
+                self.set_scope_and_apply(DataScope::QuestionPre);
+            }
+            Some(KeywordType::QuestionStart) => {
+                self.start_new_question();
+                self.set_scope_and_apply(DataScope::QuestionContent);
+            }
+            Some(KeywordType::QuestionContent) => {
+                self.apply_content_to(DataScope::QuestionContent);
+            }
+            Some(KeywordType::CurrentScope) => {
+                self.apply_content_to_cur_scope();
+            }
+            _ => (), //None or Ignore
+        };
+    }
+    /// set current keyword(tag) and type as last, and set new as current
+    fn set_new_keyword(&mut self, keyword: &str) {
+        self.last_keyword_type = self.cur_keyword_type;
+        self.last_tag = self.cur_tag.clone();
+        self.cur_keyword_type = Some(keyword.parse().unwrap());
+        self.cur_tag = keyword.replace(' ', "").replace(':', "");
+    }
+    /// if line matched keyword
+    fn on_keyword_match(&mut self, line: &str, keyword: &str) {
+        self.set_new_keyword(keyword);
+
+        // remember question id
+        if let Some(KeywordType::QuestionStart) = self.cur_keyword_type {
+            self.cur_question_pre["id"] = line.replace(':', "").into();
+        };
+
+        self.apply_content_for_last_keyword();
+    }
+
+    /// parse next line
+    pub fn parse_line(&mut self, line: &str) {
+        match QuestionsParser::PATTERNS
+            .iter() // find keyword
+            .find(|&&pattern| line.starts_with(pattern) && line.ends_with(':'))
+        {
+            Some(pattern) => {
+                self.on_keyword_match(line, pattern);
+            }
+            None => {
+                self.append_to_current_content(line.to_string());
+            }
+        }
+    }
+
+    /// finish parsing
+    pub fn finish(&mut self) {
+        if self.have_new_question && !self.cur_content.is_empty() {
+            self.cur_question[&self.cur_tag] = self.get_current_content().into();
+            self.add_cur_question();
+            self.clear_current_content();
+            self.have_new_question = false;
+        }
+        self.data["Вопросы"] = self.questions.clone();
+    }
+
+    /// get parsed data
+    pub fn get_parsed(self) -> json::JsonValue {
+        self.data
+    }
+}
+
+async fn parse_file(
+    entry_reader: impl AsyncReadExt + Unpin,
+) -> Result<json::JsonValue, Box<dyn std::error::Error>> {
+    let buf_reader = BufReader::new(entry_reader);
+    let mut lines = buf_reader.lines();
+
+    let mut parser = QuestionsParser::new();
 
     while let Some(line_r) = lines.next_line().await? {
         let line = line_r.trim();
         if line.is_empty() {
             continue;
         }
-        let line = line.to_string();
 
-        match patterns
-            .iter() // find keyword
-            .find(|&&pattern| line.starts_with(pattern) && line.ends_with(':'))
-        {
-            Some(pattern) => {
-                use KeywordType::*;
-
-                ctx.last_keyword_type = ctx.cur_keyword_type;
-                ctx.last_tag = ctx.cur_tag.clone();
-                ctx.cur_keyword_type = Some(pattern.parse().unwrap());
-                ctx.cur_tag = pattern.replace(' ', "").replace(':', "");
-
-                // remember question id
-                if let Some(QuestionStart) = ctx.cur_keyword_type {
-                    ctx.cur_question_pre["id"] = line.replace(':', "").as_str().into();
-                };
-
-                // apply accumulated content when new keyword found
-                match ctx.last_keyword_type {
-                    Some(Global) => {
-                        ctx.cur_scope = DataScope::Global;
-                        ctx.data[&ctx.last_tag] = ctx.cur_content.join("\n").into()
-                    }
-                    Some(QuestionPre) => {
-                        ctx.cur_scope = DataScope::QuestionPre;
-                        ctx.cur_question_pre[&ctx.last_tag] = ctx.cur_content.join("\n").into();
-                    }
-                    Some(QuestionStart) => {
-                        ctx.cur_scope = DataScope::QuestionContent;
-                        // store prev question before reading new
-                        if ctx.have_new_question {
-                            ctx.questions.push_if_valid(ctx.cur_question.clone());
-                        }
-                        // prepare to read new question data with cur_question_pre values
-                        ctx.cur_question = ctx.cur_question_pre.clone();
-                        ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into();
-                        ctx.have_new_question = true;
-                    }
-                    Some(QuestionContent) => {
-                        ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into();
-                    }
-                    Some(CurrentScope) => {
-                        // match value to store data
-                        let scope_data = match ctx.cur_scope {
-                            DataScope::Global => &mut ctx.data,
-                            DataScope::QuestionPre => &mut ctx.cur_question_pre,
-                            DataScope::QuestionContent => &mut ctx.cur_question,
-                        };
-                        scope_data[&ctx.last_tag] = ctx.cur_content.join("\n").into();
-                    }
-                    _ => (), //None or Ignore
-                };
-                // clear content
-                ctx.cur_content.clear();
-            }
-            None => {
-                // accumulate content if line is not a keyword
-                ctx.cur_content.push(line);
-            }
-        }
+        parser.parse_line(line);
     }
 
-    // finish reading last question
-    if ctx.have_new_question && !ctx.cur_content.is_empty() {
-        ctx.cur_question[&ctx.cur_tag] = ctx.cur_content.join("\n").into();
-        ctx.questions.push_if_valid(ctx.cur_question.clone());
-        ctx.have_new_question = false;
-    }
-
-    ctx.data["Вопросы"] = ctx.questions.clone();
-    Ok(ctx.data.clone())
+    parser.finish();
+    Ok(parser.get_parsed())
 }
 
 struct WriteQueueItem {
@@ -305,7 +353,7 @@ async fn data_writer(queue: WriteQueue) {
         }
     }
     writer.close().await.unwrap();
-    
+
     println!("write done ✅");
 }