Solutions to set 1
This commit is contained in:
75
src/english.rs
Normal file
75
src/english.rs
Normal file
@ -0,0 +1,75 @@
|
||||
pub fn score(bs: &[u8]) -> i64 {
|
||||
for b in bs.iter() {
|
||||
if *b >= 128 {
|
||||
return -1000;
|
||||
}
|
||||
}
|
||||
|
||||
let Ok(string) = std::str::from_utf8(bs) else {
|
||||
return -1000
|
||||
};
|
||||
|
||||
let mut score = 0;
|
||||
|
||||
for c in string.chars() {
|
||||
if "aeiou".contains(c) { score += 10 }
|
||||
else if c.is_ascii_lowercase() { score += 5 }
|
||||
else if c.is_ascii_uppercase() { score += 2 }
|
||||
else if c.is_ascii_digit() { score += 2 }
|
||||
else if c.is_whitespace() { score += 1 }
|
||||
else if c.is_ascii_punctuation() { score += 1 }
|
||||
else if c.is_control() {
|
||||
score -= 10
|
||||
}
|
||||
else {
|
||||
score -= 20 // wtf?
|
||||
}
|
||||
}
|
||||
|
||||
for _ in string.match_indices("the ") { score += 40 }
|
||||
for _ in string.match_indices("The ") { score += 50 }
|
||||
for _ in string.match_indices("be ") { score += 40 }
|
||||
for _ in string.match_indices("Be ") { score += 50 }
|
||||
for _ in string.match_indices("to ") { score += 40 }
|
||||
for _ in string.match_indices("To ") { score += 50 }
|
||||
for _ in string.match_indices("of ") { score += 40 }
|
||||
for _ in string.match_indices("Of ") { score += 50 }
|
||||
for _ in string.match_indices("and ") { score += 40 }
|
||||
for _ in string.match_indices("And ") { score += 50 }
|
||||
for _ in string.match_indices("have ") { score += 40 }
|
||||
for _ in string.match_indices("Have ") { score += 50 }
|
||||
for _ in string.match_indices("it ") { score += 40 }
|
||||
for _ in string.match_indices("It ") { score += 50 }
|
||||
|
||||
score
|
||||
}
|
||||
|
||||
pub struct Bhattacharyya {
|
||||
pub weights: [f32; 256]
|
||||
}
|
||||
|
||||
impl Bhattacharyya {
|
||||
pub fn compute(text: &[u8]) -> Self {
|
||||
let mut counts: [usize; 256] = [0; 256];
|
||||
let total = text.len();
|
||||
if total == 0 { return Bhattacharyya { weights: [1.0/256.0; 256] } }
|
||||
|
||||
for c in text {
|
||||
counts[*c as usize] += 1;
|
||||
}
|
||||
let mut weights = [0.0; 256];
|
||||
for i in 0..weights.len() {
|
||||
weights[i] = counts[i] as f32/total as f32;
|
||||
}
|
||||
return Bhattacharyya { weights }
|
||||
|
||||
}
|
||||
pub fn score(&self, other: &Bhattacharyya) -> f32 {
|
||||
let mut sum = 0.0;
|
||||
for i in 0..self.weights.len() {
|
||||
sum += (self.weights[i] * other.weights[i]).sqrt()
|
||||
};
|
||||
return sum // bhattacharyya coefficient (not distance)
|
||||
// return -sum.ln();
|
||||
}
|
||||
}
|
2
src/lib.rs
Normal file
2
src/lib.rs
Normal file
@ -0,0 +1,2 @@
|
||||
pub mod english;
|
||||
pub mod prelude;
|
113
src/prelude.rs
Normal file
113
src/prelude.rs
Normal file
@ -0,0 +1,113 @@
|
||||
use base64::Engine;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! bvec {
|
||||
($s:expr) => {
|
||||
Vec::<u8>::from_hex(include_str!($s)).unwrap()
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! bvec64 {
|
||||
($s:expr) => {
|
||||
Vec::<u8>::from_b64(include_str!($s)).unwrap()
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! bvecs {
|
||||
($s:expr) => {
|
||||
include_str!($s).split("\n").filter(|l| *l!="").map(|p|
|
||||
Vec::<u8>::from_hex(p).unwrap()
|
||||
).collect::<Vec<Vec<u8>>>()
|
||||
};
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum PreludeError {
|
||||
B64Err(base64::DecodeError),
|
||||
FHErr(hex::FromHexError),
|
||||
Utf8Err(std::str::Utf8Error),
|
||||
Error(&'static str),
|
||||
}
|
||||
|
||||
pub type CPals<T> = std::result::Result<T, PreludeError>;
|
||||
|
||||
pub trait ByteBased: Sized {
|
||||
fn from_hex(h: &str) -> CPals<Self>;
|
||||
fn to_hex(&self) -> String;
|
||||
|
||||
fn from_b64(b: &str) -> CPals<Self>;
|
||||
fn to_b64(&self) -> String;
|
||||
|
||||
fn from_text(t: &str) -> Self;
|
||||
fn to_text(&self) -> CPals<&str>;
|
||||
|
||||
fn xor_with(&self, other: &Self) -> CPals<Self>;
|
||||
fn xor_repeating(&self, other: &Self) -> CPals<Self>;
|
||||
|
||||
fn hamming(&self, other: &Self) -> CPals<u64>;
|
||||
}
|
||||
|
||||
impl ByteBased for Vec<u8> {
|
||||
fn from_hex(h: &str) -> CPals<Self> {
|
||||
hex::decode(remove_ws(h)).map_err(PreludeError::FHErr)
|
||||
}
|
||||
|
||||
fn to_hex(&self) -> String {
|
||||
hex::encode(self)
|
||||
}
|
||||
|
||||
fn from_b64(b: &str) -> CPals<Self> {
|
||||
base64::engine::general_purpose::STANDARD.decode(remove_ws(b)).map_err(PreludeError::B64Err)
|
||||
}
|
||||
|
||||
fn to_b64(&self) -> String {
|
||||
base64::engine::general_purpose::STANDARD.encode(self)
|
||||
}
|
||||
|
||||
fn from_text(tx: &str) -> Self {
|
||||
tx.as_bytes().to_vec()
|
||||
}
|
||||
|
||||
fn to_text(&self) -> CPals<&str> {
|
||||
std::str::from_utf8(self).map_err(PreludeError::Utf8Err)
|
||||
}
|
||||
|
||||
fn xor_with(&self, other: &Self) -> CPals<Self> {
|
||||
if other.len() != self.len() { return Err(PreludeError::Error("couldn't xor, different lengths")) }
|
||||
|
||||
let mut vec2 = vec![0; self.len()];
|
||||
for i in 0..self.len() {
|
||||
vec2[i] = self[i] ^ other[i];
|
||||
};
|
||||
Ok(vec2)
|
||||
}
|
||||
|
||||
fn xor_repeating(&self, other: &Self) -> CPals<Self> {
|
||||
if other.len() == 0 { return Err(PreludeError::Error("couldn't xor, key was empty")) }
|
||||
let mut vec2 = vec![0; self.len()];
|
||||
for i in 0..self.len() {
|
||||
vec2[i] = self[i] ^ other[i % other.len()];
|
||||
};
|
||||
Ok(vec2)
|
||||
}
|
||||
|
||||
fn hamming(&self, other: &Self) -> CPals<u64> {
|
||||
if other.len() != self.len() { return Err(PreludeError::Error("couldn't ham, different lengths")) }
|
||||
|
||||
let mut bdist: u64 = 0;
|
||||
for i in 0..self.len() {
|
||||
bdist += (self[i] ^ other[i]).count_ones() as u64;
|
||||
}
|
||||
Ok(bdist)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn remove_ws(s: &str) -> String {
|
||||
let mut new = String::with_capacity(s.len());
|
||||
for c in s.chars() {
|
||||
if !c.is_whitespace() { new.push(c) }
|
||||
}
|
||||
new
|
||||
}
|
Reference in New Issue
Block a user