isspam/isspam_v1.rs at main

 #![feature(let_chains)]
 use std::{env, fs};
 fn clean_content(content: &str) -> String {
 	let alloed_ichars = "01234567891abcdefghijklmnopqrstuvwxyz \n.,!?";
 	let clean_content = content.chars()
 		.filter(|&c| alloed_ichars.contains(c))
 		.collect::<String>();
 	clean_content
 }
 fn get_sentences(content: &str) -> Vec<&str> {
 	let mut sentences = content.split('.')
 		.map(|s| s.trim_start()) // Remove leading whitespace
 		.collect::<Vec<_>>();
 	// Remove last "sentence" if didn't end with a dot
 	if let Some(last) = sentences.last() && !last.ends_with('.') {
 		sentences.pop();
 	}
 	sentences
 }
 fn get_words(sentences: &str) -> impl Iterator<Item = &str> + Clone {
 	sentences.split_whitespace()
 }
 fn is_fully_capitalized_word(word: &str) -> bool {
 	word.chars()
 		.all(|c| !c.is_ascii_alphanumeric() || c.is_ascii_uppercase())
 }
 fn get_capitalized_words(content: &str) -> Vec<&str> {
 	let sentences = get_sentences(content);
 	let mut cap_words = vec![];
 	for sentence in sentences {
 		// Always skip the first word since sentences start with
 		for word in get_words(sentence).skip(1) {
 			if is_fully_capitalized_word(word) {
 				cap_words.push(word);
 			}
 		}
 	}
 	cap_words
 }
 fn get_numbers(content: &str) -> Vec<String> {
 	let clean = clean_content(content);
 	clean.split(|c: char| c.is_ascii_digit())
 		.map(|n| n.to_string())
 		.collect()
 }
 fn get_forbidden_words(content: &str) -> Vec<&str> {
 	fn check_forbidden(w: &str) -> bool {
 		FORBIDDEN_WORDS.iter()
 			.find(|fw| str::eq_ignore_ascii_case(w, fw))
 			.is_some()
 	}
 	get_words(content)
 		.filter(|w| check_forbidden(w))
 		.collect()
 }
 fn analyze(data: &str) {
 	let clean_data = clean_content(data);
 	drop(clean_data); // You aren't actually using clean_data :O
 	// All capitalized words
 	let cap_words = get_capitalized_words(data);
 	println!("All capitalized words: {}", cap_words.len());
 	// All sentences
 	let sentences = get_sentences(data);
 	println!("Sentences: {}", sentences.len());
 	// All words
 	let words = get_words(data);
 	println!("Words: {}", words.clone().count());
 	// Numbers
 	let numbers = get_numbers(data);
 	println!("Numbers: {}", numbers.len());
 	// Forbidden words
 	let fw = get_forbidden_words(data);
 	println!("Forbidden words: {}", fw.len());
 	let word_count_per_sentence = words.count() / sentences.len();
 	println!("Word count per sentence: {}", word_count_per_sentence);
 }
 fn main() {
     // Read in files from args
     for arg in env::args().skip(1) { // skip program arg
     	let Ok(text) = fs::read_to_string(&arg) else {
     		eprintln!("{arg} isn't a valid file or couldn't be read");
     		continue;
     	};
     	analyze(&text);
     }
 //	analyze(&SPAM1);
 }
 static FORBIDDEN_WORDS: &'static [&'static str] = &[
     "recovery",    "techie",    "http",     "https",   "digital",    "hack",          "::",       "//",    "com",
     "@",           "crypto",    "bitcoin",  "wallet",  "hacker",     "welcome",       "whatsapp", "email", "cryptocurrency",
     "stolen",      "freeze",    "quick",    "crucial", "tracing",    "scammers",      "expers",   "hire",  "century",
     "transaction", "essential", "managing", "contact", "contacting", "understanding", "assets",   "funds"
 ];