#![feature(let_chains)] use std::{env, fs}; fn clean_content(content: &str) -> String { let alloed_ichars = "01234567891abcdefghijklmnopqrstuvwxyz \n.,!?"; let clean_content = content.chars() .filter(|&c| alloed_ichars.contains(c)) .collect::<String>(); clean_content } fn get_sentences(content: &str) -> Vec<&str> { let mut sentences = content.split('.') .map(|s| s.trim_start()) // Remove leading whitespace .collect::<Vec<_>>(); // Remove last "sentence" if didn't end with a dot if let Some(last) = sentences.last() && !last.ends_with('.') { sentences.pop(); } sentences } fn get_words(sentences: &str) -> impl Iterator<Item = &str> + Clone { sentences.split_whitespace() } fn is_fully_capitalized_word(word: &str) -> bool { word.chars() .all(|c| !c.is_ascii_alphanumeric() || c.is_ascii_uppercase()) } fn get_capitalized_words(content: &str) -> Vec<&str> { let sentences = get_sentences(content); let mut cap_words = vec![]; for sentence in sentences { // Always skip the first word since sentences start with for word in get_words(sentence).skip(1) { if is_fully_capitalized_word(word) { cap_words.push(word); } } } cap_words } fn get_numbers(content: &str) -> Vec<String> { let clean = clean_content(content); clean.split(|c: char| c.is_ascii_digit()) .map(|n| n.to_string()) .collect() } fn get_forbidden_words(content: &str) -> Vec<&str> { fn check_forbidden(w: &str) -> bool { FORBIDDEN_WORDS.iter() .find(|fw| str::eq_ignore_ascii_case(w, fw)) .is_some() } get_words(content) .filter(|w| check_forbidden(w)) .collect() } fn analyze(data: &str) { let clean_data = clean_content(data); drop(clean_data); // You aren't actually using clean_data :O // All capitalized words let cap_words = get_capitalized_words(data); println!("All capitalized words: {}", cap_words.len()); // All sentences let sentences = get_sentences(data); println!("Sentences: {}", sentences.len()); // All words let words = get_words(data); println!("Words: {}", words.clone().count()); // Numbers let numbers = get_numbers(data); println!("Numbers: {}", numbers.len()); // Forbidden words let fw = get_forbidden_words(data); println!("Forbidden words: {}", fw.len()); let word_count_per_sentence = words.count() / sentences.len(); println!("Word count per sentence: {}", word_count_per_sentence); } fn main() { // Read in files from args for arg in env::args().skip(1) { // skip program arg let Ok(text) = fs::read_to_string(&arg) else { eprintln!("{arg} isn't a valid file or couldn't be read"); continue; }; analyze(&text); } // analyze(&SPAM1); } static FORBIDDEN_WORDS: &'static [&'static str] = &[ "recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com", "@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency", "stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century", "transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds" ];