refactor

2019-07-27 20:24:49 +03:00
parent befca99019
commit 44d2567419
1 changed files with 222 additions and 222 deletions
--- a/src/main.rs
+++ b/src/main.rs
@@ -7,269 +7,269 @@ extern crate zip;
 use encoding::all::KOI8_R;
 use encoding::DecoderTrap;
 use rayon::prelude::*;
-use std::fs;
-use std::io;
 use std::path::PathBuf;
+use std::{fs, io};
 use textstream::TextReader;

-const BASE_FILENAME: &str = "./baza.zip";
+const BASE_FILENAME: &str = "baza.zip";
+const OUTPUT_PATH: &str = "json";

 #[derive(Debug, Clone, Copy)]
 enum KeywordType {
-	Ignore,
-	Global,
-	QuestionPre,
-	QuestionStart,
-	QuestionContent,
-	CurrentScope,
+    Ignore,
+    Global,
+    QuestionPre,
+    QuestionStart,
+    QuestionContent,
+    CurrentScope,
 }

 #[derive(Debug, Clone, Copy)]
 enum DataScope {
-	Global,
-	QuestionPre,
-	QuestionContent,
-}
-
-fn keyword_type(pattern: &str) -> KeywordType {
-	use KeywordType::*;
-	match pattern {
-		"Мета:" => Ignore,
-		"Чемпионат:" | "Пакет:" => Global,
-		"Тур:" => QuestionPre,
-		"Вопрос " => QuestionStart,
-		"Вопрос:" => QuestionStart,
-		"Ответ:" | "Зачет:" => QuestionContent,
-		_ => CurrentScope,
-		// "URL:" | "Ссылка:" | "Дата:" | "Обработан:" | "Автор:" | "Редактор:" | "Копирайт:" | "Инфо:" |
-		// "Тема:" | "Вид:" | "Тип:" | "Источник:" | "Рейтинг:" | "Комментарий:" | "Комментарии:"
-	}
+    Global,
+    QuestionPre,
+    QuestionContent,
 }

 struct Context {
-	// global output value
-	data: json::JsonValue,
-	// temp questions array
-	questions: json::JsonValue,
-	cur_keyword_type: Option<KeywordType>,
-	// temp question value
-	cur_question: json::JsonValue,
-	// temp value for pre'question fields
-	cur_question_pre: json::JsonValue,
-	// scope for data fields
-	cur_scope: DataScope,
-	// curent json key
-	cur_tag: String,
-	// current json value
-	cur_content: Vec<String>,
-	// need to push temp question value if true
-	have_new_question: bool,
-	// prev. keyword type
-	last_keyword_type: Option<KeywordType>,
-	// prev. json key (used for store acummulated content when new keyword readed)
-	last_tag: String,
+    // global output value
+    data: json::JsonValue,
+    // temp questions array
+    questions: json::JsonValue,
+    cur_keyword_type: Option<KeywordType>,
+    // temp question value
+    cur_question: json::JsonValue,
+    // temp value for pre'question fields
+    cur_question_pre: json::JsonValue,
+    // scope for data fields
+    cur_scope: DataScope,
+    // curent json key
+    cur_tag: String,
+    // current json value
+    cur_content: Vec<String>,
+    // need to push temp question value if true
+    have_new_question: bool,
+    // prev. keyword type
+    last_keyword_type: Option<KeywordType>,
+    // prev. json key (used for store acummulated content when new keyword readed)
+    last_tag: String,
 }

 impl Context {
-	fn new() -> Context {
-		Context {
-			data: json::JsonValue::new_object(),
-			questions: json::JsonValue::new_array(),
-			cur_keyword_type: None,
-			cur_question: json::JsonValue::new_object(),
-			cur_question_pre: json::JsonValue::new_object(),
-			cur_tag: String::new(),
-			cur_content: Vec::<String>::new(),
-			cur_scope: DataScope::Global,
-			have_new_question: false,
-			last_keyword_type: None,
-			last_tag: String::new(),
-		}
-	}
+    fn new() -> Context {
+        Context {
+            data: json::JsonValue::new_object(),
+            questions: json::JsonValue::new_array(),
+            cur_keyword_type: None,
+            cur_question: json::JsonValue::new_object(),
+            cur_question_pre: json::JsonValue::new_object(),
+            cur_tag: String::new(),
+            cur_content: Vec::<String>::new(),
+            cur_scope: DataScope::Global,
+            have_new_question: false,
+            last_keyword_type: None,
+            last_tag: String::new(),
+        }
+    }
+}
+
+impl KeywordType {
+    fn from(pattern: &str) -> KeywordType {
+        use KeywordType::*;
+        match pattern {
+            "Мета:" => Ignore,
+            "Чемпионат:" | "Пакет:" => Global,
+            "Тур:" => QuestionPre,
+            "Вопрос " => QuestionStart,
+            "Вопрос:" => QuestionStart,
+            "Ответ:" | "Зачет:" => QuestionContent,
+            _ => CurrentScope,
+            // "URL:" | "Ссылка:" | "Дата:" | "Обработан:" | "Автор:" | "Редактор:" | "Копирайт:" | "Инфо:" |
+            // "Тема:" | "Вид:" | "Тип:" | "Источник:" | "Рейтинг:" | "Комментарий:" | "Комментарии:"
+        }
+    }
 }

 fn parse_file(file: impl io::Read) -> Result<json::JsonValue, Box<std::error::Error>> {
-	let buf = io::BufReader::new(file);
-	let reader = TextReader::new(buf, KOI8_R, DecoderTrap::Ignore);
+    let buf = io::BufReader::new(file);
+    let reader = TextReader::new(buf, KOI8_R, DecoderTrap::Ignore);

-	let patterns = vec![
-		"Чемпионат:",
-		"Пакет:",
-		"URL:",
-		"Ссылка:",
-		"Дата:",
-		"Редактор:",
-		"Обработан:",
-		"Копирайт:",
-		"Инфо:",
-		"Тема:",
-		"Вид:",
-		"Тип:",
-		"Тур:",
-		"Мета:",
-		"Вопрос ",
-		"Вопрос:",
-		"Ответ:",
-		"Зачет:",
-		"Источник:",
-		"Рейтинг:",
-		"Автор:",
-		"Комментарий:",
-		"Комментарии:",
-	];
+    let patterns = vec![
+        "Чемпионат:",
+        "Пакет:",
+        "URL:",
+        "Ссылка:",
+        "Дата:",
+        "Редактор:",
+        "Обработан:",
+        "Копирайт:",
+        "Инфо:",
+        "Тема:",
+        "Вид:",
+        "Тип:",
+        "Тур:",
+        "Мета:",
+        "Вопрос ",
+        "Вопрос:",
+        "Ответ:",
+        "Зачет:",
+        "Источник:",
+        "Рейтинг:",
+        "Автор:",
+        "Комментарий:",
+        "Комментарии:",
+    ];
+    let mut context = Context::new();
+    let mut ctx = &mut context;

-	let mut context = Context::new();
-	let mut ctx = &mut context;
+    reader
+        .lines()
+        .map(|line| String::from(line.unwrap().trim()))
+        .filter(|line| !line.is_empty()) // ignore empty lines
+        .for_each(|line| {
+            match patterns
+                .iter() // find keyword
+                .find(|&&pattern| line.starts_with(pattern) && line.ends_with(':'))
+            {
+                Some(pattern) => {
+                    use KeywordType::*;

-	reader
-		.lines()
-		.map(|line| String::from(line.unwrap().trim()))
-		.filter(|line| !line.is_empty()) // ignore empty lines
-		.for_each(|line| {
-			match patterns
-				.iter() // find keyword
-				.find(|&&pattern| line.starts_with(pattern) && line.ends_with(':'))
-			{
-				Some(pattern) => {
-					use KeywordType::*;
+                    ctx.last_keyword_type = ctx.cur_keyword_type;
+                    ctx.last_tag = ctx.cur_tag.clone();
+                    ctx.cur_keyword_type = Some(KeywordType::from(&pattern));
+                    ctx.cur_tag = pattern.replace(' ', "").replace(':', "");

-					ctx.last_keyword_type = ctx.cur_keyword_type;
-					ctx.last_tag = ctx.cur_tag.clone();
-					ctx.cur_keyword_type = Some(keyword_type(&pattern));
-					ctx.cur_tag = pattern.replace(' ', "").replace(':', "");
+                    // remember question id
+                    if let Some(QuestionStart) = ctx.cur_keyword_type {
+                        ctx.cur_question_pre["id"] = line.replace(':', "").as_str().into();
+                    };

-					// remember question id
-					if let Some(QuestionStart) = ctx.cur_keyword_type {
-						ctx.cur_question_pre["id"] = line.replace(':', "").as_str().into();
-					};
+                    // apply accumulated content when new keyword found
+                    match ctx.last_keyword_type {
+                        Some(Global) => {
+                            ctx.cur_scope = DataScope::Global;
+                            ctx.data[&ctx.last_tag] = ctx.cur_content.join("\n").into()
+                        }
+                        Some(QuestionPre) => {
+                            ctx.cur_scope = DataScope::QuestionPre;
+                            ctx.cur_question_pre[&ctx.last_tag] = ctx.cur_content.join("\n").into();
+                        }
+                        Some(QuestionStart) => {
+                            ctx.cur_scope = DataScope::QuestionContent;
+                            // store prev question before reading new
+                            if ctx.have_new_question {
+                                ctx.questions.push(ctx.cur_question.clone()).unwrap();
+                            }
+                            // prepare to read new question data with cur_question_pre values
+                            ctx.cur_question = ctx.cur_question_pre.clone();
+                            ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into();
+                            ctx.have_new_question = true;
+                        }
+                        Some(QuestionContent) => {
+                            ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into();
+                        }
+                        Some(CurrentScope) => {
+                            // match value to store data
+                            let scope_data = match ctx.cur_scope {
+                                DataScope::Global => &mut ctx.data,
+                                DataScope::QuestionPre => &mut ctx.cur_question_pre,
+                                DataScope::QuestionContent => &mut ctx.cur_question,
+                            };
+                            scope_data[&ctx.last_tag] = ctx.cur_content.join("\n").into();
+                        }
+                        _ => (), //None or Ignore
+                    };
+                    // clear content
+                    ctx.cur_content.clear();
+                }
+                None => {
+                    // accumulate content if line is not a keyword
+                    ctx.cur_content.push(line);
+                }
+            }
+        });

-					// apply accumulated content when new keyword found
-					match ctx.last_keyword_type {
-						Some(Global) => {
-							ctx.cur_scope = DataScope::Global;
-							ctx.data[&ctx.last_tag] = ctx.cur_content.join("\n").into()
-						}
-						Some(QuestionPre) => {
-							ctx.cur_scope = DataScope::QuestionPre;
-							ctx.cur_question_pre[&ctx.last_tag] = ctx.cur_content.join("\n").into();
-						}
-						Some(QuestionStart) => {
-							ctx.cur_scope = DataScope::QuestionContent;
-							// store prev question before reading new
-							if ctx.have_new_question {
-								ctx.questions.push(ctx.cur_question.clone()).unwrap();
-							}
-							// prepare for read new question data with cur_question_pre values
-							ctx.cur_question = ctx.cur_question_pre.clone();
-							// ctx.cur_question_pre = json::JsonValue::new_object(); // uncomment => forget pre at new question
-							ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into();
-							ctx.have_new_question = true;
-						}
-						Some(QuestionContent) => {
-							ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into();
-						}
-						Some(CurrentScope) => {
-							// match value to store data
-							(match ctx.cur_scope {
-								DataScope::Global => &mut ctx.data,
-								DataScope::QuestionPre => &mut ctx.cur_question_pre,
-								DataScope::QuestionContent => &mut ctx.cur_question,
-							})[&ctx.last_tag] = ctx.cur_content.join("\n").into();
-						}
-						_ => (), //None or Ignore
-					};
-					// clear content
-					ctx.cur_content.clear();
-				}
-				None => {
-					// accumulate content if line is not a keyword
-					ctx.cur_content.push(line);
-				}
-			}
-		});
+    // finish reading last question
+    if ctx.have_new_question && !ctx.cur_content.is_empty() {
+        ctx.cur_question[&ctx.cur_tag] = ctx.cur_content.join("\n").into();
+        ctx.questions.push(ctx.cur_question.clone()).unwrap();
+        ctx.have_new_question = false;
+    }

-	// finish reading last question
-	if ctx.have_new_question && !ctx.cur_content.is_empty() {
-		ctx.cur_question[&ctx.cur_tag] = ctx.cur_content.join("\n").as_str().into();
-		ctx.questions.push(ctx.cur_question.clone()).unwrap();
-		ctx.have_new_question = false;
-	}
-
-	ctx.data["Вопросы"] = ctx.questions.clone();
-	Ok(ctx.data.clone())
+    ctx.data["Вопросы"] = ctx.questions.clone();
+    Ok(ctx.data.clone())
 }

 // split slice to a vector of slices
 fn split_vec<'a, T>(src: &'a [T], num: usize) -> Vec<&'a [T]> {
-	let all_len = src.len();
-	let part_len = all_len / num;
-	let add_len = all_len % num;
-	let mut result = Vec::<&'a [T]>::new();
+    let part_len = src.len() / num;
+    let add_len = src.len() % num;
+    let mut result = Vec::<&'a [T]>::with_capacity(num);

-	if 0 == part_len {
-		result.push(src);
-		return result;
-	}
-	for i in 0..num {
-		let size = if (num - 1) == i {
-			part_len + add_len
-		} else {
-			part_len
-		};
-		let start = part_len * i;
-		result.push(&src[start..(start + size)]);
-	}
-	result
+    if 0 == part_len {
+        result.push(src);
+        return result;
+    }
+    for i in 0..num {
+        let size = if (num - 1) == i {
+            part_len + add_len
+        } else {
+            part_len
+        };
+        let start = part_len * i;
+        result.push(&src[start..(start + size)]);
+    }
+    result
 }

 fn process_files(files: &&[PathBuf]) {
-	let zip_file = fs::File::open(BASE_FILENAME).unwrap();
-	let zip_reader = io::BufReader::new(zip_file);
-	let mut archive = zip::ZipArchive::new(zip_reader).unwrap();
+    let zip_file = fs::File::open(BASE_FILENAME).unwrap();
+    let zip_reader = io::BufReader::new(zip_file);
+    let mut archive = zip::ZipArchive::new(zip_reader).unwrap();

-	files.iter().for_each(|name| {
-		let name_str = name.to_str().unwrap();
-		println!("{:}", name_str);
+    files.iter().for_each(|name| {
+        let name_str = name.to_str().unwrap();
+        println!("{:}", name_str);

-		// parse txt file
-		let file = archive.by_name(name_str).unwrap();
-		let data = parse_file(file).unwrap();
+        // parse txt file
+        let file = archive.by_name(name_str).unwrap();
+        let data = parse_file(file).unwrap();

-		// make output filename
-		let mut outfilename = PathBuf::from("./json");
-		outfilename.push(name);
-		outfilename.set_extension("json");
+        // make output filename
+        let mut outfilename = PathBuf::from(OUTPUT_PATH);
+        outfilename.push(name);
+        outfilename.set_extension("json");

-		// save json to file
-		let mut outfile = fs::File::create(outfilename).unwrap();
-		data.write_pretty(&mut outfile, 1).unwrap();
-	});
+        // save json to file
+        let mut outfile = fs::File::create(outfilename).unwrap();
+        data.write_pretty(&mut outfile, 1).unwrap();
+    });
 }

 fn main() -> Result<(), Box<std::error::Error>> {
-	// open archive just to list files
-	let zip_file = fs::File::open(BASE_FILENAME)?;
-	let zip_reader = io::BufReader::new(zip_file);
-	let mut archive = zip::ZipArchive::new(zip_reader)?;
+    // open archive just to list files
+    let zip_file = fs::File::open(BASE_FILENAME)?;
+    let zip_reader = io::BufReader::new(zip_file);
+    let mut archive = zip::ZipArchive::new(zip_reader)?;

-	let source_files: Vec<PathBuf> = (0..archive.len())
-		.map(|i| archive.by_index(i).unwrap().sanitized_name())
-		.filter(|name| {
-			// skip files without "txt" extension
-			match name.extension() {
-				Some(ext) => match ext.to_str() {
-					Some(ext_str) if ext_str.eq_ignore_ascii_case("txt") => true,
-					_ => false, // extension is not valid unicode or not txt
-				},
-				_ => false, // no extension in filename
-			}
-		})
-		.collect();
-	drop(archive);
+    let source_files: Vec<PathBuf> = (0..archive.len())
+        .map(|i| archive.by_index(i).unwrap().sanitized_name())
+        .filter(|name| {
+            // skip files without "txt" extension
+            match name.extension() {
+                Some(ext) => match ext.to_str() {
+                    Some(ext_str) => ext_str.eq_ignore_ascii_case("txt"),
+                    _ => false, // extension is not valid unicode or not txt
+                },
+                _ => false, // no extension in filename
+            }
+        })
+        .collect();
+    drop(archive);

-	// split vector and process its parts in parallel
-	split_vec(&source_files, rayon::current_num_threads())
-		.par_iter()
-		.for_each(process_files);
-	Ok(())
+    // split vector and process its parts in parallel
+    split_vec(&source_files, rayon::current_num_threads())
+        .par_iter()
+        .for_each(process_files);
+    Ok(())
 }