#![feature(let_chains)] use rayon::prelude::*; //use rayon::prelude::*; use std::{env, fs}; fn clean_content(content: &str) -> String { let alloed_ichars = "01234567891abcdefghijklmnopqrstuvwxyz \n.,!?"; let clean_content = content.chars() .filter(|&c| alloed_ichars.contains(c)) .collect::(); clean_content } fn get_sentences(content: &str) -> Vec<&str> { let mut sentences = content.split('.') .map(|s| s.trim_start()) // Remove leading whitespace .collect::>(); // Remove last "sentence" if didn't end with a dot if let Some(last) = sentences.last() && !last.ends_with('.') { sentences.pop(); } sentences } fn get_words(sentences: &str) -> impl Iterator + Clone { sentences.split_whitespace() } fn is_fully_capitalized_word(word: &str) -> bool { word.chars() .all(|c| !c.is_ascii_alphanumeric() || c.is_ascii_uppercase()) } fn get_capitalized_words(content: &str) -> Vec<&str> { let sentences = get_sentences(content); let mut cap_words = vec![]; for sentence in sentences { // Always skip the first word since sentences start with for word in get_words(sentence).skip(1) { if is_fully_capitalized_word(word) { cap_words.push(word); } } } cap_words } fn get_numbers(clean_content: &str) -> Vec<&str> { clean_content.split(|c: char| c.is_ascii_digit()) .collect() } fn get_forbidden_words(content: &str) -> Vec<&str> { fn check_forbidden(w: &str) -> bool { FORBIDDEN_WORDS.iter() .find(|fw| str::eq_ignore_ascii_case(w, fw)) .is_some() } get_words(content) .filter(|w| check_forbidden(w)) .collect() } fn analyze(data: &str) { let clean_data = clean_content(data); // drop(clean_data); // You aren't actually using clean_data :O // All capitalized words let cap_words = get_capitalized_words(data); println!("All capitalized words: {}", cap_words.len()); // All sentences let sentences = get_sentences(data); println!("Sentences: {}", sentences.len()); // All words let words = get_words(data); println!("Words: {}", words.clone().count()); // Numbers let numbers = get_numbers(&clean_data); println!("Numbers: {}", numbers.len()); // Forbidden words let fw = get_forbidden_words(data); println!("Forbidden words: {}", fw.len()); if sentences.len() > 0 { let word_count_per_sentence = words.count() / sentences.len(); println!("Word count per sentence: {}", word_count_per_sentence); } } fn main() { // Read in files from args let mut files = Vec::with_capacity(env::args().len()); let mut do_parallel = false; for arg in env::args().skip(1) { // skip program arg if arg == "-p" { do_parallel = true; } else { files.push(arg); } } // Do the work let work = |file| { let Ok(text) = fs::read_to_string(&file) else { eprintln!("{file} isn't a valid file or couldn't be read"); return; }; analyze(&text); }; if !do_parallel { files.iter().for_each(work); } else { files.par_iter().for_each(work) } } static FORBIDDEN_WORDS: &'static [&'static str] = &[ "recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com", "@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency", "stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century", "transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds" ];