From 0799cb81f79b9d2ceb2cea087da702bc67b4f469 Mon Sep 17 00:00:00 2001 From: Fadi Imani <fadi.imani.etu@univ-lille.fr> Date: Wed, 3 Apr 2024 16:33:24 +0200 Subject: [PATCH] big project --- .../Testing/Temporary/LastTest.log | 4 +- include/compt_frequence.h | 3 + src/compt_frequence.c | 150 +++++++++--------- tests/ExampleTests.cpp | 30 ++-- 4 files changed, 100 insertions(+), 87 deletions(-) diff --git a/cmake-build-debug/Testing/Temporary/LastTest.log b/cmake-build-debug/Testing/Temporary/LastTest.log index 43cdcde..9554a11 100644 --- a/cmake-build-debug/Testing/Temporary/LastTest.log +++ b/cmake-build-debug/Testing/Temporary/LastTest.log @@ -1,3 +1,3 @@ -Start testing: Apr 03 16:13 Romance Daylight Time +Start testing: Apr 03 16:22 Romance Daylight Time ---------------------------------------------------------- -End testing: Apr 03 16:13 Romance Daylight Time +End testing: Apr 03 16:22 Romance Daylight Time diff --git a/include/compt_frequence.h b/include/compt_frequence.h index b40ef6f..dbae499 100644 --- a/include/compt_frequence.h +++ b/include/compt_frequence.h @@ -1,6 +1,8 @@ #ifndef FREQUENCY_COUNTER_H #define FREQUENCY_COUNTER_H +#define MAX_STRING_LENGTH 100 + // Structure pour représenter un mot avec sa fréquence typedef struct { char *word; // the word @@ -35,6 +37,7 @@ void write_results(const FrequencyCounter *counter, const char *filename); void filter_stop_words(FrequencyCounter *counter, const char *filename); int count_words_in_file(const char *filename); +void concatenate_strings(const char *strings[], int num_strings, char *result); void read_and_count_ngrams(FrequencyCounter *counter, const char *filename, int ngram_size); void mode_interactive(); diff --git a/src/compt_frequence.c b/src/compt_frequence.c index 0190cb1..c3fe9eb 100644 --- a/src/compt_frequence.c +++ b/src/compt_frequence.c @@ -282,78 +282,78 @@ void concatenate_strings(const char *strings[], int num_strings, char *result) { } } -void read_and_count_ngrams(FrequencyCounter *counter, const char *filename, int ngram_size) { - - int word_count = count_words_in_file(filename); - - char *ngram_array[ngram_size]; - - FILE *file = fopen(filename, "r"); - if (file == NULL) { - fprintf(stderr, "Erreur : Impossible d'ouvrir le fichier %s.\n", filename); - exit(EXIT_FAILURE); - } - - if (ngram_size > word_count) { - printf("Error: n-gram size exceeds word count\n"); - exit(EXIT_FAILURE); - } - - char word[MAX_WORD_LENGTH]; - int word_nb_read_now = 0; - int word_nb_in_ngram = 0; - while (fscanf(file, "%s", word) == 1) { - word_nb_read_now++; - word_nb_in_ngram++; - - // Convertit le mot en minuscules - for (int i = 0; word[i]; ++i) { - word[i] = tolower(word[i]); - } - - // Supprime la ponctuation du mot at the beginning - if (ispunct(word[0])) { - deleteFirstChar(word); - } - - // Supprime la ponctuation du mot at the end - int len = strlen(word); - if (ispunct(word[len - 1])) { - word[len - 1] = '\0'; // Supprime le dernier caractère (ponctuation) - } - len = strlen(word); - if (ispunct(word[len - 1])) { - word[len - 1] = '\0'; // Supprime le dernier caractère (ponctuation) - } - - // reset the ngram array - if (word_nb_in_ngram % word_nb_read_now == 0) {word_nb_in_ngram = 0;} - - - // Vérifie si le mot existe déjà dans le compteur - int found = 0; - for (int i = 0; i < counter->count; ++i) { - if (strcmp(counter->words[i].word, word) == 0) { - // Si trouvé, incrémente la fréquence - counter->words[i].frequency++; - found = 1; - break; - } - } - - // Si le mot n'a pas été trouvé, l'ajoute au compteur - if (!found) { - counter->count++; - counter->words = realloc(counter->words, counter->count * sizeof(WordFrequency)); - if (counter->words == NULL) { - fprintf(stderr, "Erreur d'allocation mémoire.\n"); - exit(EXIT_FAILURE); - } - counter->words[counter->count - 1].word = strdup(word); - counter->words[counter->count - 1].frequency = 1; - } - - } - - fclose(file); -} \ No newline at end of file +//void read_and_count_ngrams(FrequencyCounter *counter, const char *filename, int ngram_size) { +// +// int word_count = count_words_in_file(filename); +// +// char *ngram_array[ngram_size]; +// +// FILE *file = fopen(filename, "r"); +// if (file == NULL) { +// fprintf(stderr, "Erreur : Impossible d'ouvrir le fichier %s.\n", filename); +// exit(EXIT_FAILURE); +// } +// +// if (ngram_size > word_count) { +// printf("Error: n-gram size exceeds word count\n"); +// exit(EXIT_FAILURE); +// } +// +// char word[MAX_WORD_LENGTH]; +// int word_nb_read_now = 0; +// int word_nb_in_ngram = 0; +// while (fscanf(file, "%s", word) == 1) { +// word_nb_read_now++; +// word_nb_in_ngram++; +// +// // Convertit le mot en minuscules +// for (int i = 0; word[i]; ++i) { +// word[i] = tolower(word[i]); +// } +// +// // Supprime la ponctuation du mot at the beginning +// if (ispunct(word[0])) { +// deleteFirstChar(word); +// } +// +// // Supprime la ponctuation du mot at the end +// int len = strlen(word); +// if (ispunct(word[len - 1])) { +// word[len - 1] = '\0'; // Supprime le dernier caractère (ponctuation) +// } +// len = strlen(word); +// if (ispunct(word[len - 1])) { +// word[len - 1] = '\0'; // Supprime le dernier caractère (ponctuation) +// } +// +// // reset the ngram array +// if (word_nb_in_ngram % word_nb_read_now == 0) {word_nb_in_ngram = 0;} +// +// +// // Vérifie si le mot existe déjà dans le compteur +// int found = 0; +// for (int i = 0; i < counter->count; ++i) { +// if (strcmp(counter->words[i].word, word) == 0) { +// // Si trouvé, incrémente la fréquence +// counter->words[i].frequency++; +// found = 1; +// break; +// } +// } +// +// // Si le mot n'a pas été trouvé, l'ajoute au compteur +// if (!found) { +// counter->count++; +// counter->words = realloc(counter->words, counter->count * sizeof(WordFrequency)); +// if (counter->words == NULL) { +// fprintf(stderr, "Erreur d'allocation mémoire.\n"); +// exit(EXIT_FAILURE); +// } +// counter->words[counter->count - 1].word = strdup(word); +// counter->words[counter->count - 1].frequency = 1; +// } +// +// } +// +// fclose(file); +//} \ No newline at end of file diff --git a/tests/ExampleTests.cpp b/tests/ExampleTests.cpp index d5bc6d0..367e9d5 100644 --- a/tests/ExampleTests.cpp +++ b/tests/ExampleTests.cpp @@ -88,18 +88,28 @@ TEST(ExampleTests, test_count_words_in_file) { EXPECT_EQ(6, count_words_in_file("test.txt")); } -TEST(ExampleTests, test_read_and_count_ngrams) { - FrequencyCounter counter; - init_frequency_counter(&counter); - read_and_count_ngrams(&counter, "test.txt", 2); - EXPECT_EQ(2, counter.count); - EXPECT_STREQ("hello world", counter.words[0].word); - EXPECT_EQ(1, counter.words[0].frequency); - EXPECT_STREQ("world foo", counter.words[1].word); - EXPECT_EQ(1, counter.words[1].frequency); - cleanup_frequency_counter(&counter); +TEST(ExampleTests, test_concatenate_strings) { + const char *arr[] = {"This", "is", "an", "array", "of", "strings"}; + int num_strings = sizeof(arr) / sizeof(arr[0]); + char result[MAX_STRING_LENGTH] = ""; + + concatenate_strings(arr, num_strings, result); + EXPECT_STREQ("This is an array of strings", result); + free(result); } +//TEST(ExampleTests, test_read_and_count_ngrams) { +// FrequencyCounter counter; +// init_frequency_counter(&counter); +// read_and_count_ngrams(&counter, "test.txt", 2); +// EXPECT_EQ(2, counter.count); +// EXPECT_STREQ("hello world", counter.words[0].word); +// EXPECT_EQ(1, counter.words[0].frequency); +// EXPECT_STREQ("world foo", counter.words[1].word); +// EXPECT_EQ(1, counter.words[1].frequency); +// cleanup_frequency_counter(&counter); +//} + TEST(ExampleTests, test_mode_interactive) { // this is just to make sure the function does not crash mode_interactive(); -- GitLab