#include "rmalloc.h" #include #include #include #include #include "rstr.h" #include "rstring_list.h" #include #define sl rstring_list_t #define slf rstring_list_free #define sla rstring_list_add #define sln rstring_list_new #define rb rbuffer_t #define rbf rbuffer_free #define rbs rbuffer_to_string #define rbw rbuffer_write #define rbn rbuffer_new char *forbidden_words[] = { "recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com", "@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency", "stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century", "transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds", NULL}; bool stricmp(char *word1, char *word2) { while (*word1 && tolower(*word1) == tolower(*word2)) { word1++; word2++; } return *word1 == *word2; } void sld(sl *lst) { for (ulonglong i = 0; i < lst->count; i++) { printf("<%llu:%s>\n", i, lst->strings[i]); } } char *remove_preserved_chars(char *content) { char *cc = (char *)malloc(strlen(content) + 1); *cc = 0; char *ccp = cc; while (*content) { if (*content == '<' || *content == '>' || *content == ':') { content++; continue; } *ccp = *content; ccp++; *ccp = 0; content++; } return cc; } //Memory usage: 29 TB, 213.322.618 (re)allocated, 106.670.251 unqiue free'd, 0 in use. char *slds(sl *lst) { str_t *buffer = strn(1337); for (ulonglong i = 0; i < lst->count; i++) { char *temp = (char *)malloc(strlen(lst->strings[i]) + 20); char *cc = remove_preserved_chars(lst->strings[i]); sprintf(temp, "<%llu:%s>\n", i, cc); free(cc); stra(buffer, temp); free(temp); } return strc(buffer); } bool isws(char c) { return c == '\t' || c == '\n' || c == ' ' || c == ','; } char *fread_till_eof(FILE *f) { char c; str_t *buffer = strn(1337); while ((c = fgetc(f)) != EOF) { strac(buffer, c); } char *content = strc(buffer); return content; } int get_sentences(char *content) { int count = 0; char *sentence_buffer = (char *)malloc(strlen(content) + 1); char *sentence_buffer_p = sentence_buffer; bool in_line = false; while (*content) { if ((*content == ' ' || *content == '\t' || *content == '\n') && !in_line) { content++; continue; } else { in_line = true; } if (*content == '.') { *sentence_buffer_p = *content; sentence_buffer_p++; *sentence_buffer_p = 0; count++; sentence_buffer_p = sentence_buffer; *sentence_buffer = 0; content++; in_line = false; continue; } *sentence_buffer_p = *content; sentence_buffer_p++; *sentence_buffer_p = 0; content++; } free(sentence_buffer); return count; } bool is_forbidden_word(char *word) { for (int j = 0; forbidden_words[j] != NULL; j++) { if (stricmp(word, forbidden_words[j])) { return true; } } return false; } int get_words(char *content, int * count_caps, int *fw_count) { int count = 0; char *word_buffer = (char *)malloc(strlen(content) + 1); char *word_buffer_p = word_buffer; *word_buffer_p = 0; bool has_lcase = false; // rbuffer_t * buffer = rbuffer_new(NULL,0); while (*content) { if (*content == ' ' || *content == '\t' || *content == '\n') { if (word_buffer_p != word_buffer) { if(!has_lcase) { (*count_caps)++; } count++; if(is_forbidden_word(word_buffer)){ (*fw_count)++; } word_buffer_p = word_buffer; *word_buffer = 0; } has_lcase = false; content++; continue; } *word_buffer_p = *content; if(islower(*content) == *content) has_lcase = true; word_buffer_p++; *word_buffer_p = 0; content++; } free(word_buffer); return count; } bool is_fully_capitalized_word(char *word) { while (*word) { if (isalnum(*word) && toupper(*word) != *word) return false; word++; } return true; } int get_capitalized_words(sl *all_words) { int count = 0; for (uint i = 0; i < all_words->count; i++) { if (is_fully_capitalized_word(all_words->strings[i])) { count++; } } return count; } char *clean_content(char *content) { char *allowed_ichars = "01234567891abcdefghijklmnopqrstuvwxyz.,!?"; char *clean_content = (char *)malloc(strlen(content) + 1); char *clean_content_p = clean_content; *clean_content_p = 0; while (*content) { if (strchr(allowed_ichars, tolower(*content))) { *clean_content_p = *content; clean_content_p++; *clean_content_p = 0; } content++; } return clean_content; } int get_numbers(char *cc) { int count = 0; char *ccc = cc; char *cccp = ccc; char *number_buffer = (char *)malloc(strlen(ccc) + 1); *number_buffer = 0; char *number_buffer_p = number_buffer; while (*cccp) { if (isdigit((*cccp))) { *number_buffer_p = *cccp; number_buffer_p++; *number_buffer_p = 0; } else if (number_buffer != number_buffer_p) { count++; *number_buffer = 0; number_buffer_p = number_buffer; } cccp++; } free(number_buffer); return count; } unsigned int total = 0; char *readall(FILE *f) { if (fseek(f, 0, SEEK_END) != 0) { fclose(f); return NULL; } size_t file_size = ftell(f); if (file_size == (size_t)-1L) { fclose(f); return NULL; } if (fseek(f, 0, SEEK_SET) != 0) { fclose(f); return NULL; } char *buffer = (char *)malloc(file_size + 1); if (!buffer) { fclose(f); return NULL; } size_t bytes_read = fread(buffer, 1, file_size, f); buffer[bytes_read] = 0; return buffer; } void analyze(FILE *f) { if(!f){ // File doesn't exist return; } total = total + 1; printf("#%u\n", total); char *data = readall(f); if(!data) return; char *clean_data = clean_content(data); int capitalized_words = 0; int fw = 0; int words = get_words(data,&capitalized_words,&fw); int sentences = get_sentences(data); int numbers = get_numbers(clean_data); // All words printf("Words: %d\n", words); // All capitalized words printf("Capitalized words: %d\n", capitalized_words); // All sentences printf("Sentences: %i\n", sentences); // Numbers printf("Numbers: %d\n", numbers); // Forbidden words printf("Forbidden words: %d\n", fw); if (words) { double capitalized_word_percentage = 100 * ((double)capitalized_words / (double)words); printf("Capitalized percentage: %f%%\n", capitalized_word_percentage); double forbidden_word_percentage = 100 * ((double)fw / (double)words); printf("Forbidden percentage: %f%%\n", forbidden_word_percentage); ulonglong word_count_per_sentence = words / (sentences ? sentences : 1); printf("Word count per sentence: %llu\n", word_count_per_sentence); } free(clean_data); free(data); } void analyze_file(char *path) { FILE *f = fopen(path, "r"); if(f){ analyze(f); fclose(f); }else{ printf("File doesn't exist: %s\n",path); } } int main(int argc, char *argv[]) { if (argc > 1) { for (int i = 1; i < argc; i++) { pid_t pid = fork(); if(!pid){ printf("File: %s\n", argv[i]); analyze_file(argv[i]); printf("\n"); return 0; } } return 0; } analyze(stdin); printf("%s\n", rmalloc_stats()); return 0; }