more keywords + autoscope
This commit is contained in:
parent
cbea45413c
commit
b4de387798
96
src/main.rs
96
src/main.rs
@ -6,27 +6,39 @@ extern crate zip;
|
|||||||
use encoding::all::KOI8_R;
|
use encoding::all::KOI8_R;
|
||||||
use encoding::DecoderTrap;
|
use encoding::DecoderTrap;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::io::BufReader;
|
use std::io;
|
||||||
use std::io::Read;
|
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use textstream::TextReader;
|
use textstream::TextReader;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
enum KeywordType {
|
enum KeywordType {
|
||||||
|
Ignore,
|
||||||
Global,
|
Global,
|
||||||
QuestionPre,
|
QuestionPre,
|
||||||
QuestionStart,
|
QuestionStart,
|
||||||
QuestionContent,
|
QuestionContent,
|
||||||
|
CurrentScope,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
enum DataScope {
|
||||||
|
Global,
|
||||||
|
QuestionPre,
|
||||||
|
QuestionContent,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn keyword_type(pattern: &str) -> KeywordType {
|
fn keyword_type(pattern: &str) -> KeywordType {
|
||||||
|
use KeywordType::*;
|
||||||
match pattern {
|
match pattern {
|
||||||
"Чемпионат:" | "URL:" | "Дата:" | "Редактор:" | "Вид:" => {
|
"Мета:" => Ignore,
|
||||||
KeywordType::Global
|
"Чемпионат:" | "Пакет:" => Global,
|
||||||
}
|
"Тур:" => QuestionPre,
|
||||||
"Тур:" => KeywordType::QuestionPre,
|
"Вопрос " => QuestionStart,
|
||||||
"Вопрос " => KeywordType::QuestionStart,
|
"Вопрос:" => QuestionStart,
|
||||||
_ => KeywordType::QuestionContent,
|
"Ответ:" | "Зачет:" => QuestionContent,
|
||||||
|
_ => CurrentScope,
|
||||||
|
// "URL:" | "Ссылка:" | "Дата:" | "Обработан:" | "Автор:" | "Редактор:" | "Копирайт:" | "Инфо:" |
|
||||||
|
// "Тема:" | "Вид:" | "Тип:" | "Источник:" | "Рейтинг:" | "Комментарий:" | "Комментарии:"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -40,6 +52,8 @@ struct Context {
|
|||||||
cur_question: json::JsonValue,
|
cur_question: json::JsonValue,
|
||||||
// temp value for pre'question fields
|
// temp value for pre'question fields
|
||||||
cur_question_pre: json::JsonValue,
|
cur_question_pre: json::JsonValue,
|
||||||
|
// scope for data fields
|
||||||
|
cur_scope: DataScope,
|
||||||
// curent json key
|
// curent json key
|
||||||
cur_tag: String,
|
cur_tag: String,
|
||||||
// current json value
|
// current json value
|
||||||
@ -52,23 +66,34 @@ struct Context {
|
|||||||
last_tag: String,
|
last_tag: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_file<R: Read>(file: R) -> Result<json::JsonValue, Box<std::error::Error>> {
|
fn parse_file(file: impl io::Read) -> Result<json::JsonValue, Box<std::error::Error>> {
|
||||||
let buf = BufReader::new(file);
|
let buf = io::BufReader::new(file);
|
||||||
let reader = TextReader::new(buf, KOI8_R, DecoderTrap::Ignore);
|
let reader = TextReader::new(buf, KOI8_R, DecoderTrap::Ignore);
|
||||||
|
|
||||||
let patterns = vec![
|
let patterns = vec![
|
||||||
"Чемпионат:",
|
"Чемпионат:",
|
||||||
|
"Пакет:",
|
||||||
"URL:",
|
"URL:",
|
||||||
|
"Ссылка:",
|
||||||
"Дата:",
|
"Дата:",
|
||||||
"Редактор:",
|
"Редактор:",
|
||||||
|
"Обработан:",
|
||||||
|
"Копирайт:",
|
||||||
|
"Инфо:",
|
||||||
|
"Тема:",
|
||||||
"Вид:",
|
"Вид:",
|
||||||
|
"Тип:",
|
||||||
"Тур:",
|
"Тур:",
|
||||||
|
"Мета:",
|
||||||
"Вопрос ",
|
"Вопрос ",
|
||||||
|
"Вопрос:",
|
||||||
"Ответ:",
|
"Ответ:",
|
||||||
"Зачет:",
|
"Зачет:",
|
||||||
"Источник:",
|
"Источник:",
|
||||||
|
"Рейтинг:",
|
||||||
"Автор:",
|
"Автор:",
|
||||||
"Комментарий:",
|
"Комментарий:",
|
||||||
|
"Комментарии:",
|
||||||
];
|
];
|
||||||
// init context
|
// init context
|
||||||
let mut context = Context {
|
let mut context = Context {
|
||||||
@ -79,6 +104,7 @@ fn parse_file<R: Read>(file: R) -> Result<json::JsonValue, Box<std::error::Error
|
|||||||
cur_question_pre: json::JsonValue::new_object(),
|
cur_question_pre: json::JsonValue::new_object(),
|
||||||
cur_tag: String::new(),
|
cur_tag: String::new(),
|
||||||
cur_content: Vec::<String>::new(),
|
cur_content: Vec::<String>::new(),
|
||||||
|
cur_scope: DataScope::Global,
|
||||||
have_new_question: false,
|
have_new_question: false,
|
||||||
last_keyword_type: None,
|
last_keyword_type: None,
|
||||||
last_tag: String::new(),
|
last_tag: String::new(),
|
||||||
@ -98,6 +124,8 @@ fn parse_file<R: Read>(file: R) -> Result<json::JsonValue, Box<std::error::Error
|
|||||||
.find(|&&pattern| line_s.starts_with(pattern) && line_s.ends_with(":"))
|
.find(|&&pattern| line_s.starts_with(pattern) && line_s.ends_with(":"))
|
||||||
{
|
{
|
||||||
Some(pattern) => {
|
Some(pattern) => {
|
||||||
|
use KeywordType::*;
|
||||||
|
|
||||||
ctx.last_keyword_type = ctx.cur_keyword_type;
|
ctx.last_keyword_type = ctx.cur_keyword_type;
|
||||||
ctx.last_tag = ctx.cur_tag.clone();
|
ctx.last_tag = ctx.cur_tag.clone();
|
||||||
ctx.cur_keyword_type = Some(keyword_type(&pattern));
|
ctx.cur_keyword_type = Some(keyword_type(&pattern));
|
||||||
@ -105,35 +133,46 @@ fn parse_file<R: Read>(file: R) -> Result<json::JsonValue, Box<std::error::Error
|
|||||||
|
|
||||||
// remember question id
|
// remember question id
|
||||||
match ctx.cur_keyword_type {
|
match ctx.cur_keyword_type {
|
||||||
Some(KeywordType::QuestionStart) => {
|
Some(QuestionStart) => {
|
||||||
ctx.cur_question_pre["id"] = line_s.replace(":", "").as_str().into()
|
ctx.cur_question_pre["id"] = line_s.replace(":", "").as_str().into();
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
};
|
};
|
||||||
|
|
||||||
// apply accumulated content when new keyword found
|
// apply accumulated content when new keyword found
|
||||||
match ctx.last_keyword_type {
|
match ctx.last_keyword_type {
|
||||||
Some(KeywordType::Global) => {
|
Some(Global) => {
|
||||||
|
ctx.cur_scope = DataScope::Global;
|
||||||
ctx.data[&ctx.last_tag] = ctx.cur_content.join("\n").into()
|
ctx.data[&ctx.last_tag] = ctx.cur_content.join("\n").into()
|
||||||
}
|
}
|
||||||
Some(KeywordType::QuestionPre) => {
|
Some(QuestionPre) => {
|
||||||
|
ctx.cur_scope = DataScope::QuestionPre;
|
||||||
ctx.cur_question_pre[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
ctx.cur_question_pre[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||||
}
|
}
|
||||||
Some(KeywordType::QuestionContent) => {
|
Some(QuestionStart) => {
|
||||||
ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
ctx.cur_scope = DataScope::QuestionContent;
|
||||||
}
|
|
||||||
Some(KeywordType::QuestionStart) => {
|
|
||||||
// store prev question before reading new
|
// store prev question before reading new
|
||||||
if ctx.have_new_question {
|
if ctx.have_new_question {
|
||||||
ctx.questions.push(ctx.cur_question.clone()).unwrap();
|
ctx.questions.push(ctx.cur_question.clone()).unwrap();
|
||||||
}
|
}
|
||||||
// prepare for read new question data with cur_question_pre values
|
// prepare for read new question data with cur_question_pre values
|
||||||
ctx.cur_question = ctx.cur_question_pre.clone();
|
ctx.cur_question = ctx.cur_question_pre.clone();
|
||||||
ctx.cur_question_pre = json::JsonValue::new_object();
|
// ctx.cur_question_pre = json::JsonValue::new_object(); // uncomment => forget pre at new question
|
||||||
ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||||
ctx.have_new_question = true;
|
ctx.have_new_question = true;
|
||||||
}
|
}
|
||||||
None => (),
|
Some(QuestionContent) => {
|
||||||
|
ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||||
|
}
|
||||||
|
Some(CurrentScope) => {
|
||||||
|
// match value to store data
|
||||||
|
(match ctx.cur_scope {
|
||||||
|
DataScope::Global => &mut ctx.data,
|
||||||
|
DataScope::QuestionPre => &mut ctx.cur_question_pre,
|
||||||
|
DataScope::QuestionContent => &mut ctx.cur_question,
|
||||||
|
})[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||||
|
}
|
||||||
|
_ => (), //None or Ignore
|
||||||
};
|
};
|
||||||
// clear content
|
// clear content
|
||||||
ctx.cur_content.clear();
|
ctx.cur_content.clear();
|
||||||
@ -159,18 +198,21 @@ fn parse_file<R: Read>(file: R) -> Result<json::JsonValue, Box<std::error::Error
|
|||||||
fn main() -> Result<(), Box<std::error::Error>> {
|
fn main() -> Result<(), Box<std::error::Error>> {
|
||||||
let fname = std::path::Path::new("./baza.zip");
|
let fname = std::path::Path::new("./baza.zip");
|
||||||
let zip_file = fs::File::open(&fname)?;
|
let zip_file = fs::File::open(&fname)?;
|
||||||
let zip_reader = BufReader::new(zip_file);
|
let zip_reader = io::BufReader::new(zip_file);
|
||||||
|
|
||||||
let mut archive = zip::ZipArchive::new(zip_reader)?;
|
let mut archive = zip::ZipArchive::new(zip_reader)?;
|
||||||
|
|
||||||
for i in 0..archive.len() {
|
for i in 0..archive.len() {
|
||||||
let file = archive.by_index(i)?;
|
let file = archive.by_index(i)?;
|
||||||
// FIXME
|
|
||||||
//if ! file.is_file() {
|
|
||||||
// continue;
|
|
||||||
//}
|
|
||||||
|
|
||||||
let name = file.sanitized_name();
|
let name = file.sanitized_name();
|
||||||
|
// skip files without "txt" extension
|
||||||
|
match name.extension() {
|
||||||
|
Some(ext) => match ext.to_str() {
|
||||||
|
Some(ext_str) if ext_str.eq_ignore_ascii_case("txt") => (),
|
||||||
|
_ => continue, // extension is not valid unicode or not txt
|
||||||
|
},
|
||||||
|
_ => continue, // no extension in filename
|
||||||
|
}
|
||||||
println!("{}", name.as_path().display());
|
println!("{}", name.as_path().display());
|
||||||
let data: json::JsonValue = parse_file(file)?;
|
let data: json::JsonValue = parse_file(file)?;
|
||||||
let mut outfilename = PathBuf::from("./json");
|
let mut outfilename = PathBuf::from("./json");
|
||||||
@ -182,7 +224,7 @@ fn main() -> Result<(), Box<std::error::Error>> {
|
|||||||
data.write_pretty(&mut outfile, 1)?;
|
data.write_pretty(&mut outfile, 1)?;
|
||||||
//data.write(&mut outfile)?;
|
//data.write(&mut outfile)?;
|
||||||
//debug
|
//debug
|
||||||
//break;
|
break;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user