Compare commits

..

No commits in common. "b6cce442c28c469956b7d18daaacda78bf88b1c6" and "c26927a265f02916ef61bafb9ba33fa0d48b242c" have entirely different histories.

4 changed files with 49 additions and 72 deletions

View File

@ -5,7 +5,3 @@ edition = "2021"
[dependencies] [dependencies]
rayon = "1.10.0" rayon = "1.10.0"
[profile.release]
lto = "thin"
panic = "abort"

View File

@ -14,36 +14,21 @@ fn clean_content(content: &str) -> String {
clean_content clean_content
} }
fn get_sentences(content: &str) -> usize { fn get_sentences(content: &str) -> Vec<&str> {
let sentences = content.split('.') let mut sentences = content.split('.')
.map(|s| s.trim_start()) // Remove leading whitespace .map(|s| s.trim_start()) // Remove leading whitespace
.count(); .collect::<Vec<_>>();
// // Remove last "sentence" if didn't end with a dot // Remove last "sentence" if didn't end with a dot
// if let Some(last) = sentences.last() && !last.ends_with('.') { if let Some(last) = sentences.last() && !last.ends_with('.') {
// sentences.pop(); sentences.pop();
// } }
sentences sentences
} }
fn get_words(content: &str, words: &mut usize, caps: &mut usize, fw: &mut usize) { fn get_words(sentences: &str) -> impl Iterator<Item = &str> + Clone {
fn check_forbidden(w: &str) -> bool { sentences.split_whitespace()
FORBIDDEN_WORDS.iter()
.find(|fw| str::eq_ignore_ascii_case(w, fw))
.is_some()
}
for word in content.split_whitespace() {
*words += 1;
if is_fully_capitalized_word(word) {
*caps += 1;
}
if check_forbidden(word) {
*fw += 1;
}
}
} }
fn is_fully_capitalized_word(word: &str) -> bool { fn is_fully_capitalized_word(word: &str) -> bool {
@ -51,68 +36,65 @@ fn is_fully_capitalized_word(word: &str) -> bool {
.all(|c| !c.is_ascii_alphanumeric() || c.is_ascii_uppercase()) .all(|c| !c.is_ascii_alphanumeric() || c.is_ascii_uppercase())
} }
//fn get_capitalized_words(content: &str) -> usize { fn get_capitalized_words(content: &str) -> Vec<&str> {
// let sentences = get_sentences(content); let sentences = get_sentences(content);
//// let mut cap_words = vec![]; let mut cap_words = vec![];
// let mut count = 0;
// for sentence in sentences {
// for sentence in sentences { // Always skip the first word since sentences start with
// // Always skip the first word since sentences start with for word in get_words(sentence).skip(1) {
// for word in get_words(sentence).skip(1) { if is_fully_capitalized_word(word) {
// if is_fully_capitalized_word(word) { cap_words.push(word);
// count += 1; }
// } }
// } }
// }
// cap_words
// count
//}
fn get_numbers(clean_content: &str) -> usize {
clean_content.split(|c: char| !c.is_ascii_digit())
.count()
} }
//fn get_forbidden_words(content: &str) -> usize { fn get_numbers(clean_content: &str) -> Vec<&str> {
// fn check_forbidden(w: &str) -> bool { clean_content.split(|c: char| c.is_ascii_digit())
// FORBIDDEN_WORDS.iter() .collect()
// .find(|fw| str::eq_ignore_ascii_case(w, fw)) }
// .is_some()
// } fn get_forbidden_words(content: &str) -> Vec<&str> {
// fn check_forbidden(w: &str) -> bool {
// get_words(content) FORBIDDEN_WORDS.iter()
// .filter(|w| check_forbidden(w)) .find(|fw| str::eq_ignore_ascii_case(w, fw))
// .collect() .is_some()
//} }
get_words(content)
.filter(|w| check_forbidden(w))
.collect()
}
fn analyze(data: &str) { fn analyze(data: &str) {
let clean_data = clean_content(data); let clean_data = clean_content(data);
// drop(clean_data); // You aren't actually using clean_data :O // drop(clean_data); // You aren't actually using clean_data :O
// All capitalized words // All capitalized words
let mut words = 0; let cap_words = get_capitalized_words(data);
let mut fw = 0; println!("All capitalized words: {}", cap_words.len());
let mut cap_words = 0;
get_words(&clean_data, &mut words, &mut fw, &mut cap_words);
println!("All capitalized words: {}", cap_words);
// All sentences // All sentences
let sentences = get_sentences(data); let sentences = get_sentences(data);
println!("Sentences: {}", sentences); println!("Sentences: {}", sentences.len());
// All words // All words
println!("Words: {}", words); let words = get_words(data);
println!("Words: {}", words.clone().count());
// Numbers // Numbers
let numbers = get_numbers(&clean_data); let numbers = get_numbers(&clean_data);
println!("Numbers: {}", numbers); println!("Numbers: {}", numbers.len());
// Forbidden words // Forbidden words
println!("Forbidden words: {}", fw); let fw = get_forbidden_words(data);
println!("Forbidden words: {}", fw.len());
if sentences > 0 { if sentences.len() > 0 {
let word_count_per_sentence = words / sentences; let word_count_per_sentence = words.count() / sentences.len();
println!("Word count per sentence: {}", word_count_per_sentence); println!("Word count per sentence: {}", word_count_per_sentence);
} }
} }
@ -154,4 +136,3 @@ static FORBIDDEN_WORDS: &'static [&'static str] = &[
"transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds" "transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds"
]; ];

BIN
risspam

Binary file not shown.