Back to project.

Raw source file available here .

#include <string>
#include <string_view>
#include <fstream>
#include <algorithm>
#include <iostream>
#include <numeric>
#include <execution>
#include <format>
#include <codecvt>
#include <ranges>

#ifdef __cpp_lib_print
#include <print>
#else
namespace std {
template <typename T, typename... Args>
inline void print(T format, Args &&... args) {
auto f = std::vformat(format, std::make_format_args(args...));
std::cout << f;
}

template <typename T, typename... Args>
inline void println(T format, Args &&... args) {
auto f = std::vformat(format, std::make_format_args(args...));
std::cout << f << std::endl;
}
}
#endif

constexpr std::array<std::wstring_view, 35> BAD_WORDS = {
L"recovery",
L"techie",
L"http",
L"https",
L"digital",
L"hack",
L"::",
L"//",
L"com",
L"@",
L"crypto",
L"bitcoin",
L"wallet",
L"hacker",
L"welcome",
L"whatsapp",
L"email",
L"cryptocurrency",
L"stolen",
L"freeze",
L"quick",
L"crucial",
L"tracing",
L"scammers",
L"expers",
L"hire",
L"century",
L"transaction",
L"essential",
L"managing",
L"contact",
L"contacting",
L"understanding",
L"assets",
L"funds",
};

constexpr auto SHORTEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::max(),
[](std::size_t current, const std::wstring_view &word) {
return std::min(current, word.size());
}
);
constexpr auto LONGEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::min(),
[](std::size_t current, const std::wstring_view &word) {
return std::max(current, word.size());
}
);

std::size_t totalWordCount = 0;
std::size_t totalCapitalizedCount = 0;
std::size_t totalSentenceCount = 0;
std::size_t totalNumberCount = 0;
std::size_t totalForbiddenCount = 0;
std::size_t fileCount = 1;

std::size_t failCount = 0;

void check_word(std::wstring &word, std::size_t &forbiddenCount) {
if (word.size() < SHORTEST_BAD_WORD || word.size() > LONGEST_BAD_WORD) {
return;
}
std::ranges::transform(word, word.begin(), ::towlower);
if (std::ranges::find(BAD_WORDS, word) != BAD_WORDS.end()) {
forbiddenCount++;
}
// if (std::ranges::find_if(BAD_WORDS, [&word](const std::wstring_view &badWord) {
// return word.contains(badWord);
// }
// ) != BAD_WORDS.end()) {
// forbiddenCount++;
// }
}

void parseFile(const std::string_view &filename) {
std::wifstream file;

// surpress warning of deprecation
#pragma warning(push)
#pragma warning(suppress : 4996)
file.imbue(std::locale(std::locale(), new std::codecvt_utf8<wchar_t>));
#pragma warning(pop)

file.open(std::string(filename));
if (!file.is_open()) {
std::println("File doesn't exist: {}", filename);
return;
}

bool inWord = false;
bool isDigit = false;
wchar_t c;

std::wstring word;
while (file.get(c)) {
if (c == '.') {
totalSentenceCount++;
}

if (std::isspace(c)) {
inWord = false;
isDigit = false;

if (!word.empty()) {
check_word(word, totalForbiddenCount);
word.clear();
}
continue;
} else {
if (!inWord) {
totalWordCount++;
if (std::isupper(c)) {
totalCapitalizedCount++;
}
}
inWord = true;

if (std::isdigit(c) && !isDigit) {
totalNumberCount++;
isDigit = true;
}

word.push_back(c);
}
};

// std::cout << "File state: " << file.rdstate() << " EOF" << file.eof() << " Fail" << file.fail() << " Bad" << file.bad() << std::endl;

if (!word.empty()) {
check_word(word, totalForbiddenCount);
}

file.close();

if (file.fail() && !file.eof()) {
failCount++;
}

}

int main(const int argc, char *argv[]) {
if (argc < 2) {
std::println("Usage: {} <file1> <file2> ... <fileN>", argv[0]);
return 1;
}

std::for_each(std::execution::par_unseq, std::next(argv), argv + argc, parseFile);

double capitalizedPercentage = (totalWordCount > 0)
? static_cast<double>(totalCapitalizedCount) / totalWordCount * 100.0
: 0;
double forbiddenPercentage = (totalWordCount > 0)
? static_cast<double>(totalForbiddenCount) / totalWordCount * 100.0
: 0;
double wordCountPerSentence = (totalSentenceCount > 0)
? static_cast<double>(totalWordCount) / totalSentenceCount
: 0;

std::println(
"Word Count: {}\nCapitalized Count: {}\nSentence Count: {}\nNumber Count: {}\nForbidden Count: {}\nFile Count: {}\nFail Count: {}\nCapitalized Percentage: {}%\nForbidden Percentage: {}%\nWord Count Per Sentence: {}",
totalWordCount, totalCapitalizedCount, totalSentenceCount, totalNumberCount, totalForbiddenCount, fileCount, failCount,
capitalizedPercentage, forbiddenPercentage, wordCountPerSentence
);

return 0;
}