#![feature(let_chains)]

use std::{env, fs};

fn clean_content(content: &str) -> String {
	let alloed_ichars = "01234567891abcdefghijklmnopqrstuvwxyz \n.,!?";
	
	let clean_content = content.chars()
		.filter(|&c| alloed_ichars.contains(c))
		.collect::<String>();
	
	clean_content
}

fn get_sentences(content: &str) -> Vec<&str> {
	let mut sentences = content.split('.')
		.map(|s| s.trim_start()) // Remove leading whitespace
		.collect::<Vec<_>>();
	
	// Remove last "sentence" if didn't end with a dot
	if let Some(last) = sentences.last() && !last.ends_with('.') {
		sentences.pop();
	}
	
	sentences
}

fn get_words(sentences: &str) -> impl Iterator<Item = &str> + Clone {
	sentences.split_whitespace()
}

fn is_fully_capitalized_word(word: &str) -> bool {
	word.chars()
		.all(|c| !c.is_ascii_alphanumeric() || c.is_ascii_uppercase())
}

fn get_capitalized_words(content: &str) -> Vec<&str> {
	let sentences = get_sentences(content);
	let mut cap_words = vec![];
	
	for sentence in sentences {
		// Always skip the first word since sentences start with
		for word in get_words(sentence).skip(1) {
			if is_fully_capitalized_word(word) {
				cap_words.push(word);
			}
		}
	}
	
	cap_words
}

fn get_numbers(content: &str) -> Vec<String> {
	let clean = clean_content(content);
	
	clean.split(|c: char| c.is_ascii_digit())
		.map(|n| n.to_string())
		.collect()
}

fn get_forbidden_words(content: &str) -> Vec<&str> {
	fn check_forbidden(w: &str) -> bool {
		FORBIDDEN_WORDS.iter()
			.find(|fw| str::eq_ignore_ascii_case(w, fw))
			.is_some()
	}
	
	get_words(content)
		.filter(|w| check_forbidden(w))
		.collect()
}

fn analyze(data: &str) {
	let clean_data = clean_content(data);
	drop(clean_data); // You aren't actually using clean_data :O
	
	// All capitalized words
	let cap_words = get_capitalized_words(data);
	println!("All capitalized words: {}", cap_words.len());
	
	// All sentences
	let sentences = get_sentences(data);
	println!("Sentences: {}", sentences.len());
	
	// All words
	let words = get_words(data);
	println!("Words: {}", words.clone().count());
	
	// Numbers
	let numbers = get_numbers(data);
	println!("Numbers: {}", numbers.len());
	
	// Forbidden words
	let fw = get_forbidden_words(data);
	println!("Forbidden words: {}", fw.len());
	
	let word_count_per_sentence = words.count() / sentences.len();
	println!("Word count per sentence: {}", word_count_per_sentence);
}

fn main() {
    // Read in files from args
    for arg in env::args().skip(1) { // skip program arg
    	let Ok(text) = fs::read_to_string(&arg) else {
    		eprintln!("{arg} isn't a valid file or couldn't be read");
    		continue;
    	};
    	
    	analyze(&text);
    }
    
//	analyze(&SPAM1);
}

static FORBIDDEN_WORDS: &'static [&'static str] = &[
    "recovery",    "techie",    "http",     "https",   "digital",    "hack",          "::",       "//",    "com",
    "@",           "crypto",    "bitcoin",  "wallet",  "hacker",     "welcome",       "whatsapp", "email", "cryptocurrency",
    "stolen",      "freeze",    "quick",    "crucial", "tracing",    "scammers",      "expers",   "hire",  "century",
    "transaction", "essential", "managing", "contact", "contacting", "understanding", "assets",   "funds"
];