parser refactor
This commit is contained in:
parent
7eaa394d7d
commit
d77e164d07
314
src/main.rs
314
src/main.rs
@ -26,6 +26,24 @@ enum KeywordType {
|
||||
CurrentScope,
|
||||
}
|
||||
|
||||
impl FromStr for KeywordType {
|
||||
type Err = ();
|
||||
|
||||
fn from_str(pattern: &str) -> Result<Self, Self::Err> {
|
||||
use KeywordType::*;
|
||||
Ok(match pattern {
|
||||
"Мета:" => Ignore,
|
||||
"Чемпионат:" | "Пакет:" => Global,
|
||||
"Тур:" => QuestionPre,
|
||||
"Вопрос " | "Вопрос:" => QuestionStart,
|
||||
"Ответ:" | "Зачет:" => QuestionContent,
|
||||
_ => CurrentScope,
|
||||
// "URL:" | "Ссылка:" | "Дата:" | "Обработан:" | "Автор:" | "Редактор:" | "Копирайт:" | "Инфо:" |
|
||||
// "Тема:" | "Вид:" | "Тип:" | "Источник:" | "Рейтинг:" | "Комментарий:" | "Комментарии:"
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
enum DataScope {
|
||||
Global,
|
||||
@ -33,7 +51,7 @@ enum DataScope {
|
||||
QuestionContent,
|
||||
}
|
||||
|
||||
struct Context {
|
||||
struct QuestionsParser {
|
||||
// global output value
|
||||
data: json::JsonValue,
|
||||
// temp questions array
|
||||
@ -57,66 +75,9 @@ struct Context {
|
||||
last_tag: String,
|
||||
}
|
||||
|
||||
// check questions before push
|
||||
trait PushIfValid {
|
||||
fn is_valid(&self) -> bool;
|
||||
fn push_if_valid(&mut self, value: json::JsonValue);
|
||||
}
|
||||
|
||||
impl PushIfValid for json::JsonValue {
|
||||
fn is_valid(&self) -> bool {
|
||||
self.has_key("Вопрос") && self.has_key("Ответ")
|
||||
}
|
||||
fn push_if_valid(&mut self, value: json::JsonValue) {
|
||||
if value.is_valid() {
|
||||
self.push(value).unwrap_or(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Context {
|
||||
fn new() -> Context {
|
||||
Context {
|
||||
data: json::JsonValue::new_object(),
|
||||
questions: json::JsonValue::new_array(),
|
||||
cur_keyword_type: None,
|
||||
cur_question: json::JsonValue::new_object(),
|
||||
cur_question_pre: json::JsonValue::new_object(),
|
||||
cur_tag: String::new(),
|
||||
cur_content: Vec::<String>::new(),
|
||||
cur_scope: DataScope::Global,
|
||||
have_new_question: false,
|
||||
last_keyword_type: None,
|
||||
last_tag: String::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for KeywordType {
|
||||
type Err = ();
|
||||
|
||||
fn from_str(pattern: &str) -> Result<Self, Self::Err> {
|
||||
use KeywordType::*;
|
||||
Ok(match pattern {
|
||||
"Мета:" => Ignore,
|
||||
"Чемпионат:" | "Пакет:" => Global,
|
||||
"Тур:" => QuestionPre,
|
||||
"Вопрос " | "Вопрос:" => QuestionStart,
|
||||
"Ответ:" | "Зачет:" => QuestionContent,
|
||||
_ => CurrentScope,
|
||||
// "URL:" | "Ссылка:" | "Дата:" | "Обработан:" | "Автор:" | "Редактор:" | "Копирайт:" | "Инфо:" |
|
||||
// "Тема:" | "Вид:" | "Тип:" | "Источник:" | "Рейтинг:" | "Комментарий:" | "Комментарии:"
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
async fn parse_file(
|
||||
entry_reader: impl AsyncReadExt + Unpin,
|
||||
) -> Result<json::JsonValue, Box<dyn std::error::Error>> {
|
||||
let buf_reader = BufReader::new(entry_reader);
|
||||
let mut lines = buf_reader.lines();
|
||||
|
||||
let patterns = vec![
|
||||
/// Text questions parser
|
||||
impl QuestionsParser {
|
||||
const PATTERNS: &'static [&'static str] = &[
|
||||
"Чемпионат:",
|
||||
"Пакет:",
|
||||
"URL:",
|
||||
@ -141,87 +102,174 @@ async fn parse_file(
|
||||
"Комментарий:",
|
||||
"Комментарии:",
|
||||
];
|
||||
let mut context = Context::new();
|
||||
let mut ctx = &mut context;
|
||||
|
||||
/// create new parser
|
||||
pub fn new() -> QuestionsParser {
|
||||
QuestionsParser {
|
||||
data: json::JsonValue::new_object(),
|
||||
questions: json::JsonValue::new_array(),
|
||||
cur_keyword_type: None,
|
||||
cur_question: json::JsonValue::new_object(),
|
||||
cur_question_pre: json::JsonValue::new_object(),
|
||||
cur_tag: String::new(),
|
||||
cur_content: Vec::<String>::new(),
|
||||
cur_scope: DataScope::Global,
|
||||
have_new_question: false,
|
||||
last_keyword_type: None,
|
||||
last_tag: String::new(),
|
||||
}
|
||||
}
|
||||
/// join current content lines
|
||||
fn get_current_content(&self) -> String {
|
||||
self.cur_content.join("\n")
|
||||
}
|
||||
/// clear current content
|
||||
fn clear_current_content(&mut self) {
|
||||
self.cur_content.clear()
|
||||
}
|
||||
/// add new line to current content
|
||||
fn append_to_current_content(&mut self, line: String) {
|
||||
self.cur_content.push(line);
|
||||
}
|
||||
/// check current question have required fields
|
||||
fn is_current_question_valid(&self) -> bool {
|
||||
self.cur_question.has_key("Вопрос") && self.cur_question.has_key("Ответ")
|
||||
}
|
||||
/// add current question to parsed array
|
||||
fn add_cur_question(&mut self) {
|
||||
if self.is_current_question_valid() {
|
||||
self.questions.push(self.cur_question.clone()).unwrap_or(())
|
||||
}
|
||||
}
|
||||
/// set current content to last tag(keyword) to data scope
|
||||
fn apply_content_to(&mut self, scope: DataScope) {
|
||||
let content = self.get_current_content();
|
||||
// match value to store data
|
||||
let scope_data = match scope {
|
||||
DataScope::Global => &mut self.data,
|
||||
DataScope::QuestionPre => &mut self.cur_question_pre,
|
||||
DataScope::QuestionContent => &mut self.cur_question,
|
||||
};
|
||||
scope_data[&self.last_tag] = content.into();
|
||||
self.clear_current_content();
|
||||
}
|
||||
/// set current content to last tag(keyword) to current scope
|
||||
fn apply_content_to_cur_scope(&mut self) {
|
||||
self.apply_content_to(self.cur_scope);
|
||||
}
|
||||
/// set current scope
|
||||
fn set_scope(&mut self, scope: DataScope) {
|
||||
self.cur_scope = scope;
|
||||
}
|
||||
/// set current scope and set current content to last tag(keyword) to data scope
|
||||
fn set_scope_and_apply(&mut self, scope: DataScope) {
|
||||
self.set_scope(scope);
|
||||
self.apply_content_to_cur_scope();
|
||||
}
|
||||
/// add last question (if have) and start collecting new one
|
||||
fn start_new_question(&mut self) {
|
||||
// store prev question before reading new
|
||||
if self.have_new_question {
|
||||
self.add_cur_question();
|
||||
}
|
||||
// prepare to read new question data with cur_question_pre values
|
||||
self.cur_question = self.cur_question_pre.clone();
|
||||
self.have_new_question = true;
|
||||
}
|
||||
|
||||
/// check last tag(keyword) and set current content to corresponding data scope
|
||||
fn apply_content_for_last_keyword(&mut self) {
|
||||
// apply accumulated content when new keyword found
|
||||
match self.last_keyword_type {
|
||||
Some(KeywordType::Global) => {
|
||||
self.set_scope_and_apply(DataScope::Global);
|
||||
}
|
||||
Some(KeywordType::QuestionPre) => {
|
||||
self.set_scope_and_apply(DataScope::QuestionPre);
|
||||
}
|
||||
Some(KeywordType::QuestionStart) => {
|
||||
self.start_new_question();
|
||||
self.set_scope_and_apply(DataScope::QuestionContent);
|
||||
}
|
||||
Some(KeywordType::QuestionContent) => {
|
||||
self.apply_content_to(DataScope::QuestionContent);
|
||||
}
|
||||
Some(KeywordType::CurrentScope) => {
|
||||
self.apply_content_to_cur_scope();
|
||||
}
|
||||
_ => (), //None or Ignore
|
||||
};
|
||||
}
|
||||
/// set current keyword(tag) and type as last, and set new as current
|
||||
fn set_new_keyword(&mut self, keyword: &str) {
|
||||
self.last_keyword_type = self.cur_keyword_type;
|
||||
self.last_tag = self.cur_tag.clone();
|
||||
self.cur_keyword_type = Some(keyword.parse().unwrap());
|
||||
self.cur_tag = keyword.replace(' ', "").replace(':', "");
|
||||
}
|
||||
/// if line matched keyword
|
||||
fn on_keyword_match(&mut self, line: &str, keyword: &str) {
|
||||
self.set_new_keyword(keyword);
|
||||
|
||||
// remember question id
|
||||
if let Some(KeywordType::QuestionStart) = self.cur_keyword_type {
|
||||
self.cur_question_pre["id"] = line.replace(':', "").into();
|
||||
};
|
||||
|
||||
self.apply_content_for_last_keyword();
|
||||
}
|
||||
|
||||
/// parse next line
|
||||
pub fn parse_line(&mut self, line: &str) {
|
||||
match QuestionsParser::PATTERNS
|
||||
.iter() // find keyword
|
||||
.find(|&&pattern| line.starts_with(pattern) && line.ends_with(':'))
|
||||
{
|
||||
Some(pattern) => {
|
||||
self.on_keyword_match(line, pattern);
|
||||
}
|
||||
None => {
|
||||
self.append_to_current_content(line.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// finish parsing
|
||||
pub fn finish(&mut self) {
|
||||
if self.have_new_question && !self.cur_content.is_empty() {
|
||||
self.cur_question[&self.cur_tag] = self.get_current_content().into();
|
||||
self.add_cur_question();
|
||||
self.clear_current_content();
|
||||
self.have_new_question = false;
|
||||
}
|
||||
self.data["Вопросы"] = self.questions.clone();
|
||||
}
|
||||
|
||||
/// get parsed data
|
||||
pub fn get_parsed(self) -> json::JsonValue {
|
||||
self.data
|
||||
}
|
||||
}
|
||||
|
||||
async fn parse_file(
|
||||
entry_reader: impl AsyncReadExt + Unpin,
|
||||
) -> Result<json::JsonValue, Box<dyn std::error::Error>> {
|
||||
let buf_reader = BufReader::new(entry_reader);
|
||||
let mut lines = buf_reader.lines();
|
||||
|
||||
let mut parser = QuestionsParser::new();
|
||||
|
||||
while let Some(line_r) = lines.next_line().await? {
|
||||
let line = line_r.trim();
|
||||
if line.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let line = line.to_string();
|
||||
|
||||
match patterns
|
||||
.iter() // find keyword
|
||||
.find(|&&pattern| line.starts_with(pattern) && line.ends_with(':'))
|
||||
{
|
||||
Some(pattern) => {
|
||||
use KeywordType::*;
|
||||
|
||||
ctx.last_keyword_type = ctx.cur_keyword_type;
|
||||
ctx.last_tag = ctx.cur_tag.clone();
|
||||
ctx.cur_keyword_type = Some(pattern.parse().unwrap());
|
||||
ctx.cur_tag = pattern.replace(' ', "").replace(':', "");
|
||||
|
||||
// remember question id
|
||||
if let Some(QuestionStart) = ctx.cur_keyword_type {
|
||||
ctx.cur_question_pre["id"] = line.replace(':', "").as_str().into();
|
||||
};
|
||||
|
||||
// apply accumulated content when new keyword found
|
||||
match ctx.last_keyword_type {
|
||||
Some(Global) => {
|
||||
ctx.cur_scope = DataScope::Global;
|
||||
ctx.data[&ctx.last_tag] = ctx.cur_content.join("\n").into()
|
||||
}
|
||||
Some(QuestionPre) => {
|
||||
ctx.cur_scope = DataScope::QuestionPre;
|
||||
ctx.cur_question_pre[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||
}
|
||||
Some(QuestionStart) => {
|
||||
ctx.cur_scope = DataScope::QuestionContent;
|
||||
// store prev question before reading new
|
||||
if ctx.have_new_question {
|
||||
ctx.questions.push_if_valid(ctx.cur_question.clone());
|
||||
}
|
||||
// prepare to read new question data with cur_question_pre values
|
||||
ctx.cur_question = ctx.cur_question_pre.clone();
|
||||
ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||
ctx.have_new_question = true;
|
||||
}
|
||||
Some(QuestionContent) => {
|
||||
ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||
}
|
||||
Some(CurrentScope) => {
|
||||
// match value to store data
|
||||
let scope_data = match ctx.cur_scope {
|
||||
DataScope::Global => &mut ctx.data,
|
||||
DataScope::QuestionPre => &mut ctx.cur_question_pre,
|
||||
DataScope::QuestionContent => &mut ctx.cur_question,
|
||||
};
|
||||
scope_data[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||
}
|
||||
_ => (), //None or Ignore
|
||||
};
|
||||
// clear content
|
||||
ctx.cur_content.clear();
|
||||
}
|
||||
None => {
|
||||
// accumulate content if line is not a keyword
|
||||
ctx.cur_content.push(line);
|
||||
}
|
||||
}
|
||||
parser.parse_line(line);
|
||||
}
|
||||
|
||||
// finish reading last question
|
||||
if ctx.have_new_question && !ctx.cur_content.is_empty() {
|
||||
ctx.cur_question[&ctx.cur_tag] = ctx.cur_content.join("\n").into();
|
||||
ctx.questions.push_if_valid(ctx.cur_question.clone());
|
||||
ctx.have_new_question = false;
|
||||
}
|
||||
|
||||
ctx.data["Вопросы"] = ctx.questions.clone();
|
||||
Ok(ctx.data.clone())
|
||||
parser.finish();
|
||||
Ok(parser.get_parsed())
|
||||
}
|
||||
|
||||
struct WriteQueueItem {
|
||||
|
Loading…
Reference in New Issue
Block a user