first
This commit is contained in:
		
							
								
								
									
										189
									
								
								src/main.rs
									
									
									
									
									
								
							
							
						
						
									
										189
									
								
								src/main.rs
									
									
									
									
									
								
							| @@ -1,3 +1,188 @@ | ||||
| fn main() { | ||||
|     println!("Hello, world!"); | ||||
| extern crate encoding; | ||||
| extern crate json; | ||||
| extern crate textstream; | ||||
| extern crate zip; | ||||
|  | ||||
| use encoding::all::KOI8_R; | ||||
| use encoding::DecoderTrap; | ||||
| use std::fs; | ||||
| use std::io::BufReader; | ||||
| use std::io::Read; | ||||
| use std::path::PathBuf; | ||||
| use textstream::TextReader; | ||||
|  | ||||
| #[derive(Debug, Clone, Copy)] | ||||
| enum KeywordType { | ||||
| 	Global, | ||||
| 	QuestionPre, | ||||
| 	QuestionStart, | ||||
| 	QuestionContent, | ||||
| } | ||||
|  | ||||
| fn keyword_type(pattern: &str) -> KeywordType { | ||||
| 	match pattern { | ||||
| 		"Чемпионат:" | "URL:" | "Дата:" | "Редактор:" | "Вид:" => { | ||||
| 			KeywordType::Global | ||||
| 		} | ||||
| 		"Тур:" => KeywordType::QuestionPre, | ||||
| 		"Вопрос " => KeywordType::QuestionStart, | ||||
| 		_ => KeywordType::QuestionContent, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| struct Context { | ||||
| 	// global output value | ||||
| 	data: json::JsonValue, | ||||
| 	// temp questions array | ||||
| 	questions: json::JsonValue, | ||||
| 	cur_keyword_type: Option<KeywordType>, | ||||
| 	// temp question value | ||||
| 	cur_question: json::JsonValue, | ||||
| 	// temp value for pre'question fields | ||||
| 	cur_question_pre: json::JsonValue, | ||||
| 	// curent json key | ||||
| 	cur_tag: String, | ||||
| 	// current json value | ||||
| 	cur_content: Vec<String>, | ||||
| 	// need to push temp question value if true | ||||
| 	have_new_question: bool, | ||||
| 	// prev. keyword type | ||||
| 	last_keyword_type: Option<KeywordType>, | ||||
| 	// prev. json key (used for store acummulated content when new keyword readed) | ||||
| 	last_tag: String, | ||||
| } | ||||
|  | ||||
| fn parse_file<R: Read>(file: R) -> Result<json::JsonValue, Box<std::error::Error>> { | ||||
| 	let buf = BufReader::new(file); | ||||
| 	let reader = TextReader::new(buf, KOI8_R, DecoderTrap::Ignore); | ||||
|  | ||||
| 	let patterns = vec![ | ||||
| 		"Чемпионат:", | ||||
| 		"URL:", | ||||
| 		"Дата:", | ||||
| 		"Редактор:", | ||||
| 		"Вид:", | ||||
| 		"Тур:", | ||||
| 		"Вопрос ", | ||||
| 		"Ответ:", | ||||
| 		"Зачет:", | ||||
| 		"Источник:", | ||||
| 		"Автор:", | ||||
| 		"Комментарий:", | ||||
| 	]; | ||||
| 	// init context | ||||
| 	let mut context = Context { | ||||
| 		data: json::JsonValue::new_object(), | ||||
| 		questions: json::JsonValue::new_array(), | ||||
| 		cur_keyword_type: None, | ||||
| 		cur_question: json::JsonValue::new_object(), | ||||
| 		cur_question_pre: json::JsonValue::new_object(), | ||||
| 		cur_tag: String::new(), | ||||
| 		cur_content: Vec::<String>::new(), | ||||
| 		have_new_question: false, | ||||
| 		last_keyword_type: None, | ||||
| 		last_tag: String::new(), | ||||
| 	}; | ||||
| 	let mut ctx = &mut context; | ||||
|  | ||||
| 	for line in reader.lines() { | ||||
| 		// ignore empty lines | ||||
| 		let line_str = String::from(line.unwrap().trim()); | ||||
| 		let line_s = &line_str; | ||||
| 		if 0 == line_s.len() { | ||||
| 			continue; | ||||
| 		} | ||||
| 		// find keywords | ||||
| 		match patterns | ||||
| 			.iter() | ||||
| 			.find(|&&pattern| line_s.starts_with(pattern) && line_s.ends_with(":")) | ||||
| 		{ | ||||
| 			Some(pattern) => { | ||||
| 				ctx.last_keyword_type = ctx.cur_keyword_type; | ||||
| 				ctx.last_tag = ctx.cur_tag.clone(); | ||||
| 				ctx.cur_keyword_type = Some(keyword_type(&pattern)); | ||||
| 				ctx.cur_tag = pattern.replace(" ", "").replace(":", ""); | ||||
|  | ||||
| 				// remember question id | ||||
| 				match ctx.cur_keyword_type { | ||||
| 					Some(KeywordType::QuestionStart) => { | ||||
| 						ctx.cur_question_pre["id"] = line_s.replace(":", "").as_str().into() | ||||
| 					} | ||||
| 					_ => (), | ||||
| 				}; | ||||
|  | ||||
| 				// apply accumulated content when new keyword found | ||||
| 				match ctx.last_keyword_type { | ||||
| 					Some(KeywordType::Global) => { | ||||
| 						ctx.data[&ctx.last_tag] = ctx.cur_content.join("\n").into() | ||||
| 					} | ||||
| 					Some(KeywordType::QuestionPre) => { | ||||
| 						ctx.cur_question_pre[&ctx.last_tag] = ctx.cur_content.join("\n").into(); | ||||
| 					} | ||||
| 					Some(KeywordType::QuestionContent) => { | ||||
| 						ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into(); | ||||
| 					} | ||||
| 					Some(KeywordType::QuestionStart) => { | ||||
| 						// store prev question before reading new | ||||
| 						if ctx.have_new_question { | ||||
| 							ctx.questions.push(ctx.cur_question.clone()).unwrap(); | ||||
| 						} | ||||
| 						// prepare for read new question data with cur_question_pre values | ||||
| 						ctx.cur_question = ctx.cur_question_pre.clone(); | ||||
| 						ctx.cur_question_pre = json::JsonValue::new_object(); | ||||
| 						ctx.cur_question[&ctx.last_tag] = ctx.cur_content.join("\n").into(); | ||||
| 						ctx.have_new_question = true; | ||||
| 					} | ||||
| 					None => (), | ||||
| 				}; | ||||
| 				// clear content | ||||
| 				ctx.cur_content.clear(); | ||||
| 			} | ||||
| 			None => { | ||||
| 				// accumulate content if line is not a keyword | ||||
| 				ctx.cur_content.push(String::from(line_s)); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// finish reading last question | ||||
| 	if ctx.have_new_question && !ctx.cur_content.is_empty() { | ||||
| 		ctx.cur_question[&ctx.cur_tag] = ctx.cur_content.join("\n").as_str().into(); | ||||
| 		ctx.questions.push(ctx.cur_question.clone()).unwrap(); | ||||
| 		ctx.have_new_question = false; | ||||
| 	} | ||||
|  | ||||
| 	ctx.data["Вопросы"] = ctx.questions.clone(); | ||||
| 	Ok(ctx.data.clone()) | ||||
| } | ||||
|  | ||||
| fn main() -> Result<(), Box<std::error::Error>> { | ||||
| 	let fname = std::path::Path::new("./baza.zip"); | ||||
| 	let zip_file = fs::File::open(&fname)?; | ||||
| 	let zip_reader = BufReader::new(zip_file); | ||||
|  | ||||
| 	let mut archive = zip::ZipArchive::new(zip_reader)?; | ||||
|  | ||||
| 	for i in 0..archive.len() { | ||||
| 		let file = archive.by_index(i)?; | ||||
| 		// FIXME | ||||
| 		//if ! file.is_file() { | ||||
| 		//    continue; | ||||
| 		//} | ||||
|  | ||||
| 		let name = file.sanitized_name(); | ||||
| 		println!("{}", name.as_path().display()); | ||||
| 		let data: json::JsonValue = parse_file(file)?; | ||||
| 		let mut outfilename = PathBuf::from("./json"); | ||||
| 		outfilename.push(name); | ||||
| 		outfilename.set_extension("json"); | ||||
|  | ||||
| 		let mut outfile = fs::File::create(outfilename)?; | ||||
|  | ||||
| 		data.write_pretty(&mut outfile, 1)?; | ||||
| 		//data.write(&mut outfile)?; | ||||
| 		//debug | ||||
| 		//break; | ||||
| 	} | ||||
| 	Ok(()) | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user