|
#![feature(let_chains)]
|
|
|
|
use std::{env, fs};
|
|
|
|
fn clean_content(content: &str) -> String {
|
|
let alloed_ichars = "01234567891abcdefghijklmnopqrstuvwxyz \n.,!?";
|
|
|
|
let clean_content = content.chars()
|
|
.filter(|&c| alloed_ichars.contains(c))
|
|
.collect::<String>();
|
|
|
|
clean_content
|
|
}
|
|
|
|
fn get_sentences(content: &str) -> Vec<&str> {
|
|
let mut sentences = content.split('.')
|
|
.map(|s| s.trim_start()) // Remove leading whitespace
|
|
.collect::<Vec<_>>();
|
|
|
|
// Remove last "sentence" if didn't end with a dot
|
|
if let Some(last) = sentences.last() && !last.ends_with('.') {
|
|
sentences.pop();
|
|
}
|
|
|
|
sentences
|
|
}
|
|
|
|
fn get_words(sentences: &str) -> impl Iterator<Item = &str> + Clone {
|
|
sentences.split_whitespace()
|
|
}
|
|
|
|
fn is_fully_capitalized_word(word: &str) -> bool {
|
|
word.chars()
|
|
.all(|c| !c.is_ascii_alphanumeric() || c.is_ascii_uppercase())
|
|
}
|
|
|
|
fn get_capitalized_words(content: &str) -> Vec<&str> {
|
|
let sentences = get_sentences(content);
|
|
let mut cap_words = vec![];
|
|
|
|
for sentence in sentences {
|
|
// Always skip the first word since sentences start with
|
|
for word in get_words(sentence).skip(1) {
|
|
if is_fully_capitalized_word(word) {
|
|
cap_words.push(word);
|
|
}
|
|
}
|
|
}
|
|
|
|
cap_words
|
|
}
|
|
|
|
fn get_numbers(content: &str) -> Vec<String> {
|
|
let clean = clean_content(content);
|
|
|
|
clean.split(|c: char| c.is_ascii_digit())
|
|
.map(|n| n.to_string())
|
|
.collect()
|
|
}
|
|
|
|
fn get_forbidden_words(content: &str) -> Vec<&str> {
|
|
fn check_forbidden(w: &str) -> bool {
|
|
FORBIDDEN_WORDS.iter()
|
|
.find(|fw| str::eq_ignore_ascii_case(w, fw))
|
|
.is_some()
|
|
}
|
|
|
|
get_words(content)
|
|
.filter(|w| check_forbidden(w))
|
|
.collect()
|
|
}
|
|
|
|
fn analyze(data: &str) {
|
|
let clean_data = clean_content(data);
|
|
drop(clean_data); // You aren't actually using clean_data :O
|
|
|
|
// All capitalized words
|
|
let cap_words = get_capitalized_words(data);
|
|
println!("All capitalized words: {}", cap_words.len());
|
|
|
|
// All sentences
|
|
let sentences = get_sentences(data);
|
|
println!("Sentences: {}", sentences.len());
|
|
|
|
// All words
|
|
let words = get_words(data);
|
|
println!("Words: {}", words.clone().count());
|
|
|
|
// Numbers
|
|
let numbers = get_numbers(data);
|
|
println!("Numbers: {}", numbers.len());
|
|
|
|
// Forbidden words
|
|
let fw = get_forbidden_words(data);
|
|
println!("Forbidden words: {}", fw.len());
|
|
|
|
let word_count_per_sentence = words.count() / sentences.len();
|
|
println!("Word count per sentence: {}", word_count_per_sentence);
|
|
}
|
|
|
|
fn main() {
|
|
// Read in files from args
|
|
for arg in env::args().skip(1) { // skip program arg
|
|
let Ok(text) = fs::read_to_string(&arg) else {
|
|
eprintln!("{arg} isn't a valid file or couldn't be read");
|
|
continue;
|
|
};
|
|
|
|
analyze(&text);
|
|
}
|
|
|
|
// analyze(&SPAM1);
|
|
}
|
|
|
|
static FORBIDDEN_WORDS: &'static [&'static str] = &[
|
|
"recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com",
|
|
"@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency",
|
|
"stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century",
|
|
"transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds"
|
|
];
|
|
|