chgk_ledb/lib/src/source.rs

580 lines
18 KiB
Rust
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use serde_derive::{Deserialize, Serialize};
#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq)]
pub struct SourceQuestion {
#[serde(default, skip_serializing_if = "u32_is_zero")]
pub num: u32,
#[serde(default)]
pub id: String,
#[serde(alias = "Вопрос")]
pub description: String,
#[serde(alias = "Ответ")]
pub answer: String,
#[serde(alias = "Автор", default, skip_serializing_if = "String::is_empty")]
pub author: String,
#[serde(
default,
alias = "Комментарий",
skip_serializing_if = "String::is_empty"
)]
pub comment: String,
#[serde(
default,
alias = "Комментарии",
alias = "Инфо",
skip_serializing_if = "String::is_empty"
)]
pub comment1: String,
#[serde(default, alias = "Тур", skip_serializing_if = "String::is_empty")]
pub tour: String,
#[serde(
default,
alias = "Ссылка",
alias = "URL",
skip_serializing_if = "String::is_empty"
)]
pub url: String,
#[serde(default, alias = "Дата", skip_serializing_if = "String::is_empty")]
pub date: String,
#[serde(default, alias = "Обработан", skip_serializing_if = "String::is_empty")]
pub processed_by: String,
#[serde(default, alias = "Редактор", skip_serializing_if = "String::is_empty")]
pub redacted_by: String,
#[serde(default, alias = "Копирайт", skip_serializing_if = "String::is_empty")]
pub copyright: String,
#[serde(default, alias = "Тема", skip_serializing_if = "String::is_empty")]
pub theme: String,
#[serde(
default,
alias = "Вид",
alias = "Тип",
skip_serializing_if = "String::is_empty"
)]
pub kind: String,
#[serde(default, alias = "Источник", skip_serializing_if = "String::is_empty")]
pub source: String,
#[serde(default, alias = "Рейтинг", skip_serializing_if = "String::is_empty")]
pub rating: String,
}
#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq)]
pub struct SourceQuestionsBatch {
#[serde(default, skip_serializing_if = "String::is_empty")]
pub filename: String,
#[serde(alias = "Пакет", alias = "Чемпионат")]
pub description: String,
#[serde(default, alias = "Автор", skip_serializing_if = "String::is_empty")]
pub author: String,
#[serde(
default,
alias = "Комментарий",
alias = "Комментарии",
alias = "Инфо",
skip_serializing_if = "String::is_empty"
)]
pub comment: String,
#[serde(
default,
alias = "Ссылка",
alias = "URL",
skip_serializing_if = "String::is_empty"
)]
pub url: String,
#[serde(default, alias = "Дата", skip_serializing_if = "String::is_empty")]
pub date: String,
#[serde(default, alias = "Обработан", skip_serializing_if = "String::is_empty")]
pub processed_by: String,
#[serde(default, alias = "Редактор", skip_serializing_if = "String::is_empty")]
pub redacted_by: String,
#[serde(default, alias = "Копирайт", skip_serializing_if = "String::is_empty")]
pub copyright: String,
#[serde(default, alias = "Тема", skip_serializing_if = "String::is_empty")]
pub theme: String,
#[serde(
default,
alias = "Вид",
alias = "Тип",
skip_serializing_if = "String::is_empty"
)]
pub kind: String,
#[serde(default, alias = "Источник", skip_serializing_if = "String::is_empty")]
pub source: String,
#[serde(default, alias = "Рейтинг", skip_serializing_if = "String::is_empty")]
pub rating: String,
#[serde(alias = "Вопросы")]
pub questions: Vec<SourceQuestion>,
}
fn u32_is_zero(num: &u32) -> bool {
*num == 0
}
#[cfg(any(feature = "convert", feature = "source"))]
pub mod reader_sync {
use std::io::{Read, Seek};
use zip::ZipArchive;
use super::SourceQuestionsBatch;
pub struct SourceQuestionsZipReader<R>
where
R: Read + Seek,
{
zipfile: ZipArchive<R>,
index: Option<usize>,
}
impl<R> SourceQuestionsZipReader<R>
where
R: Read + Seek,
{
fn new(zipfile: ZipArchive<R>) -> Self {
SourceQuestionsZipReader {
zipfile,
index: None,
}
}
}
impl<R> Iterator for SourceQuestionsZipReader<R>
where
R: Read + Seek,
{
type Item = (String, Result<SourceQuestionsBatch, serde_json::Error>);
fn next(&mut self) -> Option<Self::Item> {
if self.index.is_none() && !self.zipfile.is_empty() {
self.index = Some(0);
}
match self.index {
Some(i) if i < self.zipfile.len() => {
self.index = Some(i + 1);
self.nth(i)
}
_ => None,
}
}
fn nth(&mut self, n: usize) -> Option<Self::Item> {
if self.zipfile.len() <= n {
return None;
}
self.index = Some(n + 1);
let file = self.zipfile.by_index(n).unwrap();
let name = file.mangled_name();
let name_str = name.to_str().unwrap();
let data: Result<SourceQuestionsBatch, _> = serde_json::from_reader(file);
Some((String::from(name_str), data))
}
fn size_hint(&self) -> (usize, Option<usize>) {
let len = self.zipfile.len();
let index = self.index.unwrap_or(0);
let rem = if len > index + 1 {
len - (index + 1)
} else {
0
};
(rem, Some(rem))
}
fn count(self) -> usize
where
Self: Sized,
{
self.zipfile.len()
}
}
impl<R> ExactSizeIterator for SourceQuestionsZipReader<R>
where
R: Read + Seek,
{
fn len(&self) -> usize {
self.zipfile.len()
}
}
pub trait ReadSourceQuestionsBatches<R>
where
R: Read + Seek,
{
fn source_questions(self) -> SourceQuestionsZipReader<R>;
}
impl<R> ReadSourceQuestionsBatches<R> for ZipArchive<R>
where
R: Read + Seek,
{
fn source_questions(self) -> SourceQuestionsZipReader<R> {
SourceQuestionsZipReader::new(self)
}
}
#[cfg(test)]
mod test {
use super::super::test::sample_batch;
use super::*;
use std::fs;
use std::{io::Write, iter, path::Path};
use tempfile::tempdir;
fn write_sample_zip<P>(path: P)
where
P: AsRef<Path>,
{
let batch = sample_batch();
let z_file = fs::File::create(path).expect("crerate zip file");
let mut zip_file = zip::ZipWriter::new(z_file);
let options =
zip::write::FileOptions::default().compression_method(zip::CompressionMethod::Zstd);
zip_file
.start_file("test.json", options)
.expect("zip start file");
let data = serde_json::to_vec(&batch).unwrap();
let amount = zip_file.write(data.as_slice()).expect("write entry");
assert_eq!(amount, data.len());
zip_file.finish().expect("finish zip file");
}
#[test]
fn test_source_questions_get() {
let expected_batch = sample_batch();
let dir = tempdir().expect("tempdir");
// write sample
let tmpfile_zip = dir.path().join("test.zip");
write_sample_zip(&tmpfile_zip);
let z_file = fs::File::open(tmpfile_zip).expect("open zip file");
let zip_file = zip::ZipArchive::new(z_file).expect("open zip file reader");
let mut source = zip_file.source_questions();
assert_eq!(source.len(), 1);
let actual = source.next().expect("get batch");
assert_eq!(actual.0, "test.json");
assert_eq!(actual.1.expect("parse batch"), expected_batch);
}
#[test]
fn test_source_questions_iter() {
let expected_batch = sample_batch();
let dir = tempdir().expect("tempdir");
// write sample
let tmpfile_zip = dir.path().join("test.zip");
write_sample_zip(&tmpfile_zip);
let z_file = fs::File::open(tmpfile_zip).expect("open zip file");
let zip_file = zip::ZipArchive::new(z_file).expect("open zip file reader");
let source = zip_file.source_questions();
assert_eq!(source.len(), 1);
let expected_iter = iter::once((String::from("test.json"), Ok(expected_batch)));
assert!(source
.map(|x| (x.0, x.1.map_err(|e| e.to_string())))
.eq(expected_iter));
}
}
}
#[cfg(any(feature = "convert", feature = "source"))]
pub use reader_sync::{ReadSourceQuestionsBatches, SourceQuestionsZipReader};
#[cfg(any(feature = "convert_async", feature = "source_async"))]
pub mod reader_async {
use async_stream::stream;
use async_zip::tokio::read::seek::ZipFileReader;
use futures_core::stream::Stream;
use futures_util::AsyncReadExt;
use tokio::io::{AsyncRead, AsyncSeek};
use super::SourceQuestionsBatch;
pub struct SourceQuestionsZipReaderAsync<R>
where
R: AsyncRead + AsyncSeek + Unpin,
{
zipfile: ZipFileReader<R>,
index: Option<usize>,
}
impl<R> SourceQuestionsZipReaderAsync<R>
where
R: AsyncRead + AsyncSeek + Unpin,
{
pub fn new(zipfile: ZipFileReader<R>) -> Self {
SourceQuestionsZipReaderAsync {
zipfile,
index: None,
}
}
pub fn len(&self) -> usize {
self.zipfile.file().entries().len()
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub async fn get(
&mut self,
index: usize,
) -> Result<(String, Result<SourceQuestionsBatch, serde_json::Error>), String>
where
R: AsyncRead + AsyncSeek + Unpin,
{
let len = self.len();
if index >= len {
return Err(format!("get index={index}, when len={len}"));
}
let reader = self.zipfile.reader_with_entry(index).await;
if let Err(error) = reader {
return Err(format!("reader_with_entry: {error:?}"));
}
let mut reader = reader.unwrap();
let filename = reader.entry().filename().clone().into_string().unwrap();
let mut data: Vec<u8> = Vec::new();
let readed = reader.read_to_end(&mut data).await;
if let Err(error) = readed {
return Err(format!("read_to_end: {error:?}"));
}
let parsed: Result<SourceQuestionsBatch, _> = serde_json::from_slice(&data);
Ok((filename, parsed))
}
pub async fn get_next(
&mut self,
) -> Option<Result<(String, Result<SourceQuestionsBatch, serde_json::Error>), String>>
where
R: AsyncRead + AsyncSeek + Unpin,
{
if self.index.is_none() && !self.is_empty() {
self.index = Some(0);
}
if self.index.unwrap() >= self.len() {
return None;
}
let item = self.get(self.index.unwrap()).await;
self.index = Some(self.index.unwrap() + 1);
Some(item)
}
pub fn stream(
&mut self,
) -> impl Stream<Item = (String, Result<SourceQuestionsBatch, serde_json::Error>)> + '_
{
stream! {
while let Some(Ok(item)) = self.get_next().await {
yield item
}
}
}
}
pub trait ReadSourceQuestionsBatchesAsync<R>
where
R: AsyncRead + AsyncSeek + Unpin,
{
fn source_questions(self) -> SourceQuestionsZipReaderAsync<R>;
}
impl<R> ReadSourceQuestionsBatchesAsync<R> for ZipFileReader<R>
where
R: AsyncRead + AsyncSeek + Unpin,
{
fn source_questions(self) -> SourceQuestionsZipReaderAsync<R> {
SourceQuestionsZipReaderAsync::new(self)
}
}
#[cfg(test)]
mod test {
use crate::source::SourceQuestion;
use super::super::test::sample_batch;
use super::*;
use async_zip::{base::write::ZipFileWriter, ZipEntryBuilder};
use core::fmt::Debug;
use futures_util::StreamExt;
use std::path::Path;
use tempfile::tempdir;
use tokio::fs;
async fn write_sample_zip<P>(path: P)
where
P: AsRef<Path>,
{
let batch = sample_batch();
let z_file = fs::File::create(path).await.expect("crerate zip file");
let mut zip_file = ZipFileWriter::with_tokio(z_file);
let entry =
ZipEntryBuilder::new("test.json".into(), async_zip::Compression::Zstd).build();
zip_file
.write_entry_whole(entry, serde_json::to_vec(&batch).unwrap().as_slice())
.await
.expect("write entry");
zip_file.close().await.expect("close zip");
}
async fn assert_data_rref_eq<T>((x, y): (T, &T))
where
T: PartialEq + Debug,
{
assert_eq!(x, *y);
}
#[tokio::test]
async fn test_source_questions_stream() {
let expected_batch = sample_batch();
let dir = tempdir().expect("tempdir");
// write sample
let tmpfile_zip = dir.path().join("test.zip");
write_sample_zip(&tmpfile_zip).await;
let mut z_file = fs::File::open(tmpfile_zip).await.expect("open zip file");
let zip_file = ZipFileReader::with_tokio(&mut z_file)
.await
.expect("open zip file reader");
let expected_count = expected_batch.questions.len();
let expected_stream = futures::stream::iter(expected_batch.questions.iter());
let mut actual_source = zip_file.source_questions();
let actual_stream = actual_source.stream();
let mut actual_count: usize = 0;
actual_stream
.flat_map(|x| futures::stream::iter(x.1.expect("parse batch").questions))
.zip(expected_stream)
.map(|x| {
actual_count += 1;
x
})
.for_each(assert_data_rref_eq::<SourceQuestion>)
.await;
assert_eq!(actual_count, expected_count);
}
#[tokio::test]
async fn test_source_questions_get() {
let expected_batch = sample_batch();
let dir = tempdir().expect("tempdir");
// write sample
let tmpfile_zip = dir.path().join("test.zip");
write_sample_zip(&tmpfile_zip).await;
let mut z_file = fs::File::open(tmpfile_zip).await.expect("open zip file");
let zip_file = ZipFileReader::with_tokio(&mut z_file)
.await
.expect("open zip file reader");
let mut source = zip_file.source_questions();
assert_eq!(source.len(), 1);
let actual = source.get(0).await.expect("get batch");
assert_eq!(actual.0, "test.json");
assert_eq!(actual.1.expect("parse batch"), expected_batch);
}
}
}
#[cfg(any(feature = "convert_async", feature = "source_async"))]
pub use reader_async::{ReadSourceQuestionsBatchesAsync, SourceQuestionsZipReaderAsync};
#[cfg(test)]
mod test {
use super::*;
use insta::assert_yaml_snapshot;
use serde_json::json;
pub fn sample_batch() -> SourceQuestionsBatch {
SourceQuestionsBatch {
description: "Тестовый".into(),
date: "00-000-2000".into(),
questions: vec![
SourceQuestion {
id: "Вопрос 1".into(),
description: "Сколько будет (2 * 2 * 2 + 2) * 2 * 2 + 2".into(),
answer: "42".into(),
..Default::default()
},
SourceQuestion {
id: "Вопрос 2".into(),
description: "Зимой и летом одним цветом".into(),
answer: "ёлка".into(),
..Default::default()
},
],
..Default::default()
}
}
#[test]
fn test_batch_ser() {
let batch = sample_batch();
assert_yaml_snapshot!(batch, @r#"
---
description: Тестовый
date: 00-000-2000
questions:
- id: Вопрос 1
description: Сколько будет (2 * 2 * 2 + 2) * 2 * 2 + 2
answer: "42"
- id: Вопрос 2
description: Зимой и летом одним цветом
answer: ёлка
"#);
}
#[test]
fn test_batch_de() {
let batch_from_json: Result<SourceQuestionsBatch, _> = serde_json::from_value(json!({
"Чемпионат": "Тестовый",
"Дата": "00-000-2000",
"Вопросы": [
{
"id": "Вопрос 1",
"Вопрос": "Сколько будет (2 * 2 * 2 + 2) * 2 * 2 + 2",
"Ответ": "42",
},
{
"id": "Вопрос 2",
"Вопрос": "Зимой и летом одним цветом",
"Ответ": "ёлка",
},
]
}));
assert!(batch_from_json.is_ok());
assert_yaml_snapshot!(batch_from_json.unwrap(), @r#"
---
description: Тестовый
date: 00-000-2000
questions:
- id: Вопрос 1
description: Сколько будет (2 * 2 * 2 + 2) * 2 * 2 + 2
answer: "42"
- id: Вопрос 2
description: Зимой и летом одним цветом
answer: ёлка
"#);
}
}