#![feature(let_chains)]
use rayon::prelude::*;
//use rayon::prelude::*;
use std::{env, fs};
fn clean_content(content: &str) -> String {
let alloed_ichars = "01234567891abcdefghijklmnopqrstuvwxyz \n.,!?";
let clean_content = content.chars()
.filter(|&c| alloed_ichars.contains(c))
.collect::<String>();
clean_content
}
fn get_sentences(content: &str) -> usize {
let sentences = content.split('.')
.map(|s| s.trim_start()) // Remove leading whitespace
.count();
// // Remove last "sentence" if didn't end with a dot
// if let Some(last) = sentences.last() && !last.ends_with('.') {
// sentences.pop();
// }
sentences
}
fn get_words(content: &str, words: &mut usize, caps: &mut usize, fw: &mut usize) {
fn check_forbidden(w: &str) -> bool {
FORBIDDEN_WORDS.iter()
.find(|fw| str::eq_ignore_ascii_case(w, fw))
.is_some()
}
for word in content.split_whitespace() {
*words += 1;
if is_fully_capitalized_word(word) {
*caps += 1;
}
if check_forbidden(word) {
*fw += 1;
}
}
}
fn is_fully_capitalized_word(word: &str) -> bool {
word.chars()
.all(|c| !c.is_ascii_alphanumeric() || c.is_ascii_uppercase())
}
//fn get_capitalized_words(content: &str) -> usize {
// let sentences = get_sentences(content);
//// let mut cap_words = vec![];
// let mut count = 0;
//
// for sentence in sentences {
// // Always skip the first word since sentences start with
// for word in get_words(sentence).skip(1) {
// if is_fully_capitalized_word(word) {
// count += 1;
// }
// }
// }
//
// count
//}
fn get_numbers(clean_content: &str) -> usize {
clean_content.split(|c: char| !c.is_ascii_digit())
.count()
}
//fn get_forbidden_words(content: &str) -> usize {
// fn check_forbidden(w: &str) -> bool {
// FORBIDDEN_WORDS.iter()
// .find(|fw| str::eq_ignore_ascii_case(w, fw))
// .is_some()
// }
//
// get_words(content)
// .filter(|w| check_forbidden(w))
// .collect()
//}
fn analyze(data: &str) {
let clean_data = clean_content(data);
// drop(clean_data); // You aren't actually using clean_data :O
// All capitalized words
let mut words = 0;
let mut fw = 0;
let mut cap_words = 0;
get_words(&clean_data, &mut words, &mut fw, &mut cap_words);
println!("All capitalized words: {}", cap_words);
// All sentences
let sentences = get_sentences(data);
println!("Sentences: {}", sentences);
// All words
println!("Words: {}", words);
// Numbers
let numbers = get_numbers(&clean_data);
println!("Numbers: {}", numbers);
// Forbidden words
println!("Forbidden words: {}", fw);
if sentences > 0 {
let word_count_per_sentence = words / sentences;
println!("Word count per sentence: {}", word_count_per_sentence);
}
}
fn main() {
// Read in files from args
let mut files = Vec::with_capacity(env::args().len());
let mut do_parallel = false;
for arg in env::args().skip(1) { // skip program arg
if arg == "-p" {
do_parallel = true;
} else {
files.push(arg);
}
}
// Do the work
let work = |file| {
let Ok(text) = fs::read_to_string(&file) else {
eprintln!("{file} isn't a valid file or couldn't be read");
return;
};
analyze(&text);
};
if !do_parallel {
files.iter().for_each(work);
} else {
files.par_iter().for_each(work)
}
}
static FORBIDDEN_WORDS: &'static [&'static str] = &[
"recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com",
"@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency",
"stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century",
"transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds"
];