check for utf-8 encodings

This commit is contained in:
Dmitry Belyaev 2022-09-24 01:32:24 +03:00
parent 0b98540e35
commit c95c1fb785
Signed by: b4tman
GPG Key ID: 41A00BF15EA7E5F3
1 changed files with 25 additions and 8 deletions

View File

@ -7,10 +7,10 @@ use async_zip::write::{EntryOptions, ZipFileWriter};
use async_zip::Compression;
use clap::{Parser, ValueEnum};
use encoding::label::encoding_from_whatwg_label;
use encoding::EncodingRef;
use encoding::{all::UTF_8, Encoding, EncodingRef};
use encoding::{DecoderTrap, EncoderTrap};
use regex::Regex;
use tokio::io::{AsyncReadExt, BufWriter, AsyncWriteExt};
use tokio::io::{AsyncReadExt, AsyncWriteExt, BufWriter};
use tokio::sync::mpsc::{self, UnboundedReceiver, UnboundedSender};
use tokio::{fs, task};
@ -119,13 +119,30 @@ async fn transcoder_task(
encoding_from: EncodingRef,
encoding_to: EncodingRef,
) {
let is_encodings_same = encoding_from.name() == encoding_to.name();
let is_src_encodig_native = UTF_8.name() == encoding_from.name();
let is_dst_encodig_native = UTF_8.name() == encoding_to.name();
while let Some(FileData { name, data }) = rx.recv().await {
let new_data = task::block_in_place(move || {
let text = encoding_from.decode(&data, DecoderTrap::Ignore).unwrap();
encoding_to
.encode(text.as_str(), EncoderTrap::Ignore)
.unwrap()
});
let new_data = if is_encodings_same {
data
} else {
task::block_in_place(move || {
let text = if is_src_encodig_native {
String::from_utf8(data).unwrap()
} else {
encoding_from.decode(&data, DecoderTrap::Ignore).unwrap()
};
if is_dst_encodig_native {
text.into_bytes()
} else {
encoding_to
.encode(text.as_str(), EncoderTrap::Ignore)
.unwrap()
}
})
};
tx.send(FileData {
name,