Compare commits
No commits in common. "b6cce442c28c469956b7d18daaacda78bf88b1c6" and "c26927a265f02916ef61bafb9ba33fa0d48b242c" have entirely different histories.
b6cce442c2
...
c26927a265
@ -5,7 +5,3 @@ edition = "2021"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
rayon = "1.10.0"
|
rayon = "1.10.0"
|
||||||
|
|
||||||
[profile.release]
|
|
||||||
lto = "thin"
|
|
||||||
panic = "abort"
|
|
||||||
|
@ -14,36 +14,21 @@ fn clean_content(content: &str) -> String {
|
|||||||
clean_content
|
clean_content
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_sentences(content: &str) -> usize {
|
fn get_sentences(content: &str) -> Vec<&str> {
|
||||||
let sentences = content.split('.')
|
let mut sentences = content.split('.')
|
||||||
.map(|s| s.trim_start()) // Remove leading whitespace
|
.map(|s| s.trim_start()) // Remove leading whitespace
|
||||||
.count();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
// // Remove last "sentence" if didn't end with a dot
|
// Remove last "sentence" if didn't end with a dot
|
||||||
// if let Some(last) = sentences.last() && !last.ends_with('.') {
|
if let Some(last) = sentences.last() && !last.ends_with('.') {
|
||||||
// sentences.pop();
|
sentences.pop();
|
||||||
// }
|
}
|
||||||
|
|
||||||
sentences
|
sentences
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_words(content: &str, words: &mut usize, caps: &mut usize, fw: &mut usize) {
|
fn get_words(sentences: &str) -> impl Iterator<Item = &str> + Clone {
|
||||||
fn check_forbidden(w: &str) -> bool {
|
sentences.split_whitespace()
|
||||||
FORBIDDEN_WORDS.iter()
|
|
||||||
.find(|fw| str::eq_ignore_ascii_case(w, fw))
|
|
||||||
.is_some()
|
|
||||||
}
|
|
||||||
|
|
||||||
for word in content.split_whitespace() {
|
|
||||||
*words += 1;
|
|
||||||
|
|
||||||
if is_fully_capitalized_word(word) {
|
|
||||||
*caps += 1;
|
|
||||||
}
|
|
||||||
if check_forbidden(word) {
|
|
||||||
*fw += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_fully_capitalized_word(word: &str) -> bool {
|
fn is_fully_capitalized_word(word: &str) -> bool {
|
||||||
@ -51,68 +36,65 @@ fn is_fully_capitalized_word(word: &str) -> bool {
|
|||||||
.all(|c| !c.is_ascii_alphanumeric() || c.is_ascii_uppercase())
|
.all(|c| !c.is_ascii_alphanumeric() || c.is_ascii_uppercase())
|
||||||
}
|
}
|
||||||
|
|
||||||
//fn get_capitalized_words(content: &str) -> usize {
|
fn get_capitalized_words(content: &str) -> Vec<&str> {
|
||||||
// let sentences = get_sentences(content);
|
let sentences = get_sentences(content);
|
||||||
//// let mut cap_words = vec![];
|
let mut cap_words = vec![];
|
||||||
// let mut count = 0;
|
|
||||||
//
|
for sentence in sentences {
|
||||||
// for sentence in sentences {
|
// Always skip the first word since sentences start with
|
||||||
// // Always skip the first word since sentences start with
|
for word in get_words(sentence).skip(1) {
|
||||||
// for word in get_words(sentence).skip(1) {
|
if is_fully_capitalized_word(word) {
|
||||||
// if is_fully_capitalized_word(word) {
|
cap_words.push(word);
|
||||||
// count += 1;
|
}
|
||||||
// }
|
}
|
||||||
// }
|
}
|
||||||
// }
|
|
||||||
//
|
cap_words
|
||||||
// count
|
|
||||||
//}
|
|
||||||
|
|
||||||
fn get_numbers(clean_content: &str) -> usize {
|
|
||||||
clean_content.split(|c: char| !c.is_ascii_digit())
|
|
||||||
.count()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//fn get_forbidden_words(content: &str) -> usize {
|
fn get_numbers(clean_content: &str) -> Vec<&str> {
|
||||||
// fn check_forbidden(w: &str) -> bool {
|
clean_content.split(|c: char| c.is_ascii_digit())
|
||||||
// FORBIDDEN_WORDS.iter()
|
.collect()
|
||||||
// .find(|fw| str::eq_ignore_ascii_case(w, fw))
|
}
|
||||||
// .is_some()
|
|
||||||
// }
|
fn get_forbidden_words(content: &str) -> Vec<&str> {
|
||||||
//
|
fn check_forbidden(w: &str) -> bool {
|
||||||
// get_words(content)
|
FORBIDDEN_WORDS.iter()
|
||||||
// .filter(|w| check_forbidden(w))
|
.find(|fw| str::eq_ignore_ascii_case(w, fw))
|
||||||
// .collect()
|
.is_some()
|
||||||
//}
|
}
|
||||||
|
|
||||||
|
get_words(content)
|
||||||
|
.filter(|w| check_forbidden(w))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
fn analyze(data: &str) {
|
fn analyze(data: &str) {
|
||||||
let clean_data = clean_content(data);
|
let clean_data = clean_content(data);
|
||||||
// drop(clean_data); // You aren't actually using clean_data :O
|
// drop(clean_data); // You aren't actually using clean_data :O
|
||||||
|
|
||||||
// All capitalized words
|
// All capitalized words
|
||||||
let mut words = 0;
|
let cap_words = get_capitalized_words(data);
|
||||||
let mut fw = 0;
|
println!("All capitalized words: {}", cap_words.len());
|
||||||
let mut cap_words = 0;
|
|
||||||
get_words(&clean_data, &mut words, &mut fw, &mut cap_words);
|
|
||||||
|
|
||||||
println!("All capitalized words: {}", cap_words);
|
|
||||||
|
|
||||||
// All sentences
|
// All sentences
|
||||||
let sentences = get_sentences(data);
|
let sentences = get_sentences(data);
|
||||||
println!("Sentences: {}", sentences);
|
println!("Sentences: {}", sentences.len());
|
||||||
|
|
||||||
// All words
|
// All words
|
||||||
println!("Words: {}", words);
|
let words = get_words(data);
|
||||||
|
println!("Words: {}", words.clone().count());
|
||||||
|
|
||||||
// Numbers
|
// Numbers
|
||||||
let numbers = get_numbers(&clean_data);
|
let numbers = get_numbers(&clean_data);
|
||||||
println!("Numbers: {}", numbers);
|
println!("Numbers: {}", numbers.len());
|
||||||
|
|
||||||
// Forbidden words
|
// Forbidden words
|
||||||
println!("Forbidden words: {}", fw);
|
let fw = get_forbidden_words(data);
|
||||||
|
println!("Forbidden words: {}", fw.len());
|
||||||
|
|
||||||
if sentences > 0 {
|
if sentences.len() > 0 {
|
||||||
let word_count_per_sentence = words / sentences;
|
let word_count_per_sentence = words.count() / sentences.len();
|
||||||
println!("Word count per sentence: {}", word_count_per_sentence);
|
println!("Word count per sentence: {}", word_count_per_sentence);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -154,4 +136,3 @@ static FORBIDDEN_WORDS: &'static [&'static str] = &[
|
|||||||
"transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds"
|
"transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds"
|
||||||
];
|
];
|
||||||
|
|
||||||
|
|
||||||
|
Binary file not shown.
Loading…
Reference in New Issue
Block a user