#include "rmalloc.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "rstr.h"
#include "rstring_list.h"
#include <ctype.h>
#define sl rstring_list_t
#define slf rstring_list_free
#define sla rstring_list_add
#define sln rstring_list_new
#define rb rbuffer_t
#define rbf rbuffer_free
#define rbs rbuffer_to_string
#define rbw rbuffer_write
#define rbn rbuffer_new
char *forbidden_words[] = {
"recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com",
"@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency",
"stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century",
"transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds", NULL};
bool stricmp(char *word1, char *word2) {
while (*word1 && tolower(*word1) == tolower(*word2)) {
word1++;
word2++;
}
return *word1 == *word2;
}
void sld(sl *lst) {
for (ulonglong i = 0; i < lst->count; i++) {
printf("<%llu:%s>\n", i, lst->strings[i]);
}
}
char *remove_preserved_chars(char *content) {
char *cc = (char *)malloc(strlen(content) + 1);
*cc = 0;
char *ccp = cc;
while (*content) {
if (*content == '<' || *content == '>' || *content == ':') {
content++;
continue;
}
*ccp = *content;
ccp++;
*ccp = 0;
content++;
}
return cc;
}
//Memory usage: 29 TB, 213.322.618 (re)allocated, 106.670.251 unqiue free'd, 0 in use.
char *slds(sl *lst) {
str_t *buffer = strn(1337);
for (ulonglong i = 0; i < lst->count; i++) {
char *temp = (char *)malloc(strlen(lst->strings[i]) + 20);
char *cc = remove_preserved_chars(lst->strings[i]);
sprintf(temp, "<%llu:%s>\n", i, cc);
free(cc);
stra(buffer, temp);
free(temp);
}
return strc(buffer);
}
bool isws(char c) { return c == '\t' || c == '\n' || c == ' ' || c == ','; }
char *fread_till_eof(FILE *f) {
char c;
str_t *buffer = strn(1337);
while ((c = fgetc(f)) != EOF) {
strac(buffer, c);
}
char *content = strc(buffer);
return content;
}
int get_sentences(char *content) {
int count = 0;
char *sentence_buffer = (char *)malloc(strlen(content) + 1);
char *sentence_buffer_p = sentence_buffer;
bool in_line = false;
while (*content) {
if ((*content == ' ' || *content == '\t' || *content == '\n') && !in_line) {
content++;
continue;
} else {
in_line = true;
}
if (*content == '.') {
*sentence_buffer_p = *content;
sentence_buffer_p++;
*sentence_buffer_p = 0;
count++;
sentence_buffer_p = sentence_buffer;
*sentence_buffer = 0;
content++;
in_line = false;
continue;
}
*sentence_buffer_p = *content;
sentence_buffer_p++;
*sentence_buffer_p = 0;
content++;
}
free(sentence_buffer);
return count;
}
bool is_forbidden_word(char *word) {
for (int j = 0; forbidden_words[j] != NULL; j++) {
if (stricmp(word, forbidden_words[j])) {
return true;
}
}
return false;
}
int get_words(char *content, int * count_caps, int *fw_count) {
int count = 0;
char *word_buffer = (char *)malloc(strlen(content) + 1);
char *word_buffer_p = word_buffer;
*word_buffer_p = 0;
bool has_lcase = false;
// rbuffer_t * buffer = rbuffer_new(NULL,0);
while (*content) {
if (*content == ' ' || *content == '\t' || *content == '\n') {
if (word_buffer_p != word_buffer) {
if(!has_lcase)
{
(*count_caps)++;
}
count++;
if(is_forbidden_word(word_buffer)){
(*fw_count)++;
}
word_buffer_p = word_buffer;
*word_buffer = 0;
}
has_lcase = false;
content++;
continue;
}
*word_buffer_p = *content;
if(islower(*content) == *content)
has_lcase = true;
word_buffer_p++;
*word_buffer_p = 0;
content++;
}
free(word_buffer);
return count;
}
bool is_fully_capitalized_word(char *word) {
while (*word) {
if (isalnum(*word) && toupper(*word) != *word)
return false;
word++;
}
return true;
}
int get_capitalized_words(sl *all_words) {
int count = 0;
for (uint i = 0; i < all_words->count; i++) {
if (is_fully_capitalized_word(all_words->strings[i])) {
count++;
}
}
return count;
}
char *clean_content(char *content) {
char *allowed_ichars = "01234567891abcdefghijklmnopqrstuvwxyz.,!?";
char *clean_content = (char *)malloc(strlen(content) + 1);
char *clean_content_p = clean_content;
*clean_content_p = 0;
while (*content) {
if (strchr(allowed_ichars, tolower(*content))) {
*clean_content_p = *content;
clean_content_p++;
*clean_content_p = 0;
}
content++;
}
return clean_content;
}
int get_numbers(char *cc) {
int count = 0;
char *ccc = cc;
char *cccp = ccc;
char *number_buffer = (char *)malloc(strlen(ccc) + 1);
*number_buffer = 0;
char *number_buffer_p = number_buffer;
while (*cccp) {
if (isdigit((*cccp))) {
*number_buffer_p = *cccp;
number_buffer_p++;
*number_buffer_p = 0;
} else if (number_buffer != number_buffer_p) {
count++;
*number_buffer = 0;
number_buffer_p = number_buffer;
}
cccp++;
}
free(number_buffer);
return count;
}
unsigned int total = 0;
char *readall(FILE *f) {
if (fseek(f, 0, SEEK_END) != 0) {
fclose(f);
return NULL;
}
size_t file_size = ftell(f);
if (file_size == (size_t)-1L) {
fclose(f);
return NULL;
}
if (fseek(f, 0, SEEK_SET) != 0) {
fclose(f);
return NULL;
}
char *buffer = (char *)malloc(file_size + 1);
if (!buffer) {
fclose(f);
return NULL;
}
size_t bytes_read = fread(buffer, 1, file_size, f);
buffer[bytes_read] = 0;
return buffer;
}
void analyze(FILE *f) {
if(!f){
// File doesn't exist
return;
}
total = total + 1;
printf("#%u\n", total);
char *data = readall(f);
if(!data)
return;
char *clean_data = clean_content(data);
int capitalized_words = 0;
int fw = 0;
int words = get_words(data,&capitalized_words,&fw);
int sentences = get_sentences(data);
int numbers = get_numbers(clean_data);
// All words
printf("Words: %d\n", words);
// All capitalized words
printf("Capitalized words: %d\n", capitalized_words);
// All sentences
printf("Sentences: %i\n", sentences);
// Numbers
printf("Numbers: %d\n", numbers);
// Forbidden words
printf("Forbidden words: %d\n", fw);
if (words) {
double capitalized_word_percentage = 100 * ((double)capitalized_words / (double)words);
printf("Capitalized percentage: %f%%\n", capitalized_word_percentage);
double forbidden_word_percentage = 100 * ((double)fw / (double)words);
printf("Forbidden percentage: %f%%\n", forbidden_word_percentage);
ulonglong word_count_per_sentence = words / (sentences ? sentences : 1);
printf("Word count per sentence: %llu\n", word_count_per_sentence);
}
free(clean_data);
free(data);
}
void analyze_file(char *path) {
FILE *f = fopen(path, "r");
if(f){
analyze(f);
fclose(f);
}else{
printf("File doesn't exist: %s\n",path);
}
}
void * analyze_file_thread(void *path){
analyze_file((char *)path);
return NULL;
}
int main(int argc, char *argv[]) {
if (argc > 1) {
pthread_t *threads = (pthread_t *)malloc(argc * sizeof(pthread_t));
for (int i = 1; i < argc; i++) {
pthread_create(&threads[i-1],NULL,analyze_file_thread,(void *)argv[i]);
}
for(int i = 1; i < argc; i++){
pthread_join(threads[i-1],NULL);
}
free(threads);
return 0;
}
analyze(stdin);
printf("%s\n", rmalloc_stats());
return 0;
}