Back to project.

Raw source file available here .

// Author: retoor@molodetz.nl

// This program analyzes text files for word counts, capitalized words, sentences, numbers, and forbidden words.

/*
MIT License

Copyright (c) 2025 retoor

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

... (full license text)
*/

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <pthread.h>

#define MAX_TEXT_LENGTH 1024
#define FORBIDDEN_WORDS_COUNT 40

const char* forbidden_words[FORBIDDEN_WORDS_COUNT] = {
"recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com",
"@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency",
"stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century",
"transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds",
NULL
};

typedef struct {
char *filename;
long long total_word_count;
long long total_capitalized_count;
long long total_sentence_count;
long long total_number_count;
long long total_forbidden_count;
} AnalysisResult;

int is_forbidden(const char* word) {
for (size_t i = 0; forbidden_words[i] != NULL; i++) {
if (strcmp(word, forbidden_words[i]) == 0) {
return 1; // Word is forbidden
}
}
return 0; // Word is not forbidden
}

char* read_file(const char* filename) {
FILE *file = fopen(filename, "r");
if (!file) {
printf("File doesn't exist: %s\n", filename);
return NULL;
}

char *content = NULL;
size_t content_size = 0;
size_t bytes_read;

do {
char *new_content = (char *)realloc(content, content_size + MAX_TEXT_LENGTH);
if (!new_content) {
free(content);
fclose(file);
printf("Memory allocation failed while reading file: %s\n", filename);
return NULL;
}
content = new_content;
bytes_read = fread(content + content_size, 1, MAX_TEXT_LENGTH, file);
content_size += bytes_read;
} while (bytes_read == MAX_TEXT_LENGTH);

content[content_size] = '\0'; // Null-terminate the string
fclose(file);
return content;
}

void* analyze_file(void* arg) {
AnalysisResult *result = (AnalysisResult *)arg;
char *text = read_file(result->filename);
if (text) {
long long word_count = 0;
long long capitalized_count = 0;
long long sentence_count = 0;
long long number_count = 0;
long long forbidden_count = 0;

for (size_t i = 0; text[i] != '\0'; i++) {
if (text[i] == '.') {
sentence_count++;
}
}

char *saveptr;
char* token = strtok_r(text, " \f\v\r\n\t", &saveptr);
while (token != NULL) {
word_count++;

if (isupper(token[0])) {
capitalized_count++;
}

for (size_t i = 0; token[i] != '\0'; i++) {
if (isdigit(token[i])) {
number_count++;
break;
}
}

if (is_forbidden(token)) {
forbidden_count++;
}

token = strtok_r(NULL, " \f\v\r\n\t", &saveptr);
}

result->total_word_count = word_count;
result->total_capitalized_count = capitalized_count;
result->total_sentence_count = sentence_count;
result->total_number_count = number_count;
result->total_forbidden_count = forbidden_count;

free(text);
}
return NULL;
}

int main(int argc, char *argv[]) {
if (argc < 2) {
printf("Usage: %s <file1> <file2> ... <fileN>\n", argv[0]);
return 1;
}

pthread_t threads[argc - 1];
AnalysisResult results[argc - 1];

for (size_t i = 1; i < argc; i++) {
results[i - 1].filename = argv[i];
if (pthread_create(&threads[i - 1], NULL, analyze_file, &results[i - 1]) != 0) {
printf("Error creating thread for file: %s\n", argv[i]);
return 1;
}
}

for (size_t i = 1; i < argc; i++) {
pthread_join(threads[i - 1], NULL);
}

long long total_word_count = 0;
long long total_capitalized_count = 0;
long long total_sentence_count = 0;
long long total_number_count = 0;
long long total_forbidden_count = 0;

for (size_t i = 0; i < argc - 1; i++) {
total_word_count += results[i].total_word_count;
total_capitalized_count += results[i].total_capitalized_count;
total_sentence_count += results[i].total_sentence_count;
total_number_count += results[i].total_number_count;
total_forbidden_count += results[i].total_forbidden_count;
}

double capitalized_percentage = (total_word_count > 0) ? (double)total_capitalized_count / total_word_count * 100.0 : 0;
double forbidden_percentage = (total_word_count > 0) ? (double)total_forbidden_count / total_word_count * 100.0 : 0;
double word_count_per_sentence = (total_sentence_count > 0) ? (double)total_word_count / total_sentence_count : 0;

printf("\nTotal Words: %lld\n", total_word_count);
printf("Total Capitalized words: %lld\n", total_capitalized_count);
printf("Total Sentences: %lld\n", total_sentence_count);
printf("Total Numbers: %lld\n", total_number_count);
printf("Total Forbidden words: %lld\n", total_forbidden_count);
printf("Capitalized percentage: %.6f%%\n", capitalized_percentage);
printf("Forbidden percentage: %.6f%%\n", forbidden_percentage);
printf("Word count per sentence: %.6f\n", word_count_per_sentence);
printf("Total files read: %d\n", (int)(argc - 1));
return 0;
}