first
This commit is contained in:
189
src/main.rs
189
src/main.rs
@@ -1,3 +1,188 @@
|
||||
fn main() {
|
||||
println!("Hello, world!");
|
||||
extern crate encoding;
|
||||
extern crate json;
|
||||
extern crate textstream;
|
||||
extern crate zip;
|
||||
|
||||
use encoding::all::KOI8_R;
|
||||
use encoding::DecoderTrap;
|
||||
use std::fs;
|
||||
use std::io::BufReader;
|
||||
use std::io::Read;
|
||||
use std::path::PathBuf;
|
||||
use textstream::TextReader;
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
enum KeywordType {
|
||||
Global,
|
||||
QuestionPre,
|
||||
QuestionStart,
|
||||
QuestionContent,
|
||||
}
|
||||
|
||||
fn keyword_type(pattern: &str) -> KeywordType {
|
||||
match pattern {
|
||||
"Чемпионат:" | "URL:" | "Дата:" | "Редактор:" | "Вид:" => {
|
||||
KeywordType::Global
|
||||
}
|
||||
"Тур:" => KeywordType::QuestionPre,
|
||||
"Вопрос " => KeywordType::QuestionStart,
|
||||
_ => KeywordType::QuestionContent,
|
||||
}
|
||||
}
|
||||
|
||||
struct Context {
|
||||
// global output value
|
||||
data: json::JsonValue,
|
||||
// temp questions array
|
||||
questions: json::JsonValue,
|
||||
cur_keyword_type: Option<KeywordType>,
|
||||
// temp question value
|
||||
cur_question: json::JsonValue,
|
||||
// temp value for pre'question fields
|
||||
cur_question_pre: json::JsonValue,
|
||||
// curent json key
|
||||
cur_tag: String,
|
||||
// current json value
|
||||
cur_content: Vec<String>,
|
||||
// need to push temp question value if true
|
||||
have_new_question: bool,
|
||||
// prev. keyword type
|
||||
last_keyword_type: Option<KeywordType>,
|
||||
// prev. json key (used for store acummulated content when new keyword readed)
|
||||
last_tag: String,
|
||||
}
|
||||
|
||||
fn parse_file<R: Read>(file: R) -> Result<json::JsonValue, Box<std::error::Error>> {
|
||||
let buf = BufReader::new(file);
|
||||
let reader = TextReader::new(buf, KOI8_R, DecoderTrap::Ignore);
|
||||
|
||||
let patterns = vec![
|
||||
"Чемпионат:",
|
||||
"URL:",
|
||||
"Дата:",
|
||||
"Редактор:",
|
||||
"Вид:",
|
||||
"Тур:",
|
||||
"Вопрос ",
|
||||
"Ответ:",
|
||||
"Зачет:",
|
||||
"Источник:",
|
||||
"Автор:",
|
||||
"Комментарий:",
|
||||
];
|
||||
// init context
|
||||
let mut context = Context {
|
||||
data: json::JsonValue::new_object(),
|
||||
questions: json::JsonValue::new_array(),
|
||||
cur_keyword_type: None,
|
||||
cur_question: json::JsonValue::new_object(),
|
||||
cur_question_pre: json::JsonValue::new_object(),
|
||||
cur_tag: String::new(),
|
||||
cur_content: Vec::<String>::new(),
|
||||
have_new_question: false,
|
||||
last_keyword_type: None,
|
||||
last_tag: String::new(),
|
||||
};
|
||||
let mut ctx = &mut context;
|
||||
|
||||
for line in reader.lines() {
|
||||
// ignore empty lines
|
||||
let line_str = String::from(line.unwrap().trim());
|
||||
let line_s = &line_str;
|
||||
if 0 == line_s.len() {
|
||||
continue;
|
||||
}
|
||||
// find keywords
|
||||
match patterns
|
||||
.iter()
|
||||
.find(|&&pattern| line_s.starts_with(pattern) && line_s.ends_with(":"))
|
||||
{
|
||||
Some(pattern) => {
|
||||
ctx.last_keyword_type = ctx.cur_keyword_type;
|
||||
ctx.last_tag = ctx.cur_tag.clone();
|
||||
ctx.cur_keyword_type = Some(keyword_type(&pattern));
|
||||
ctx.cur_tag = pattern.replace(" ", "").replace(":", "");
|
||||
|
||||
// remember question id
|
||||
match ctx.cur_keyword_type {
|
||||
Some(KeywordType::QuestionStart) => {
|
||||
ctx.cur_question_pre["id"] = line_s.replace(":", "").as_str().into()
|
||||
}
|
||||
_ => (),
|
||||
};
|
||||
|
||||
// apply accumulated content when new keyword found
|
||||
match ctx.last_keyword_type {
|
||||
Some(KeywordType::Global) => {
|
||||
ctx.data[&ctx.last_tag] = ctx.cur_content.join("\n").into()
|
||||
}
|
||||
Some(KeywordType::QuestionPre) => {
|
||||
ctx.cur_question_pre[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||
}
|
||||
Some(KeywordType::QuestionContent) => {
|
||||
ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||
}
|
||||
Some(KeywordType::QuestionStart) => {
|
||||
// store prev question before reading new
|
||||
if ctx.have_new_question {
|
||||
ctx.questions.push(ctx.cur_question.clone()).unwrap();
|
||||
}
|
||||
// prepare for read new question data with cur_question_pre values
|
||||
ctx.cur_question = ctx.cur_question_pre.clone();
|
||||
ctx.cur_question_pre = json::JsonValue::new_object();
|
||||
ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into();
|
||||
ctx.have_new_question = true;
|
||||
}
|
||||
None => (),
|
||||
};
|
||||
// clear content
|
||||
ctx.cur_content.clear();
|
||||
}
|
||||
None => {
|
||||
// accumulate content if line is not a keyword
|
||||
ctx.cur_content.push(String::from(line_s));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// finish reading last question
|
||||
if ctx.have_new_question && !ctx.cur_content.is_empty() {
|
||||
ctx.cur_question[&ctx.cur_tag] = ctx.cur_content.join("\n").as_str().into();
|
||||
ctx.questions.push(ctx.cur_question.clone()).unwrap();
|
||||
ctx.have_new_question = false;
|
||||
}
|
||||
|
||||
ctx.data["Вопросы"] = ctx.questions.clone();
|
||||
Ok(ctx.data.clone())
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Box<std::error::Error>> {
|
||||
let fname = std::path::Path::new("./baza.zip");
|
||||
let zip_file = fs::File::open(&fname)?;
|
||||
let zip_reader = BufReader::new(zip_file);
|
||||
|
||||
let mut archive = zip::ZipArchive::new(zip_reader)?;
|
||||
|
||||
for i in 0..archive.len() {
|
||||
let file = archive.by_index(i)?;
|
||||
// FIXME
|
||||
//if ! file.is_file() {
|
||||
// continue;
|
||||
//}
|
||||
|
||||
let name = file.sanitized_name();
|
||||
println!("{}", name.as_path().display());
|
||||
let data: json::JsonValue = parse_file(file)?;
|
||||
let mut outfilename = PathBuf::from("./json");
|
||||
outfilename.push(name);
|
||||
outfilename.set_extension("json");
|
||||
|
||||
let mut outfile = fs::File::create(outfilename)?;
|
||||
|
||||
data.write_pretty(&mut outfile, 1)?;
|
||||
//data.write(&mut outfile)?;
|
||||
//debug
|
||||
//break;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
Reference in New Issue
Block a user