Skip to content
Snippets Groups Projects
Commit 0799cb81 authored by Fadi Imani's avatar Fadi Imani
Browse files

big project

parent afe0362d
No related branches found
No related tags found
No related merge requests found
Pipeline #40545 failed
Start testing: Apr 03 16:13 Romance Daylight Time
Start testing: Apr 03 16:22 Romance Daylight Time
----------------------------------------------------------
End testing: Apr 03 16:13 Romance Daylight Time
End testing: Apr 03 16:22 Romance Daylight Time
#ifndef FREQUENCY_COUNTER_H
#define FREQUENCY_COUNTER_H
#define MAX_STRING_LENGTH 100
// Structure pour représenter un mot avec sa fréquence
typedef struct {
char *word; // the word
......@@ -35,6 +37,7 @@ void write_results(const FrequencyCounter *counter, const char *filename);
void filter_stop_words(FrequencyCounter *counter, const char *filename);
int count_words_in_file(const char *filename);
void concatenate_strings(const char *strings[], int num_strings, char *result);
void read_and_count_ngrams(FrequencyCounter *counter, const char *filename, int ngram_size);
void mode_interactive();
......
......@@ -282,78 +282,78 @@ void concatenate_strings(const char *strings[], int num_strings, char *result) {
}
}
void read_and_count_ngrams(FrequencyCounter *counter, const char *filename, int ngram_size) {
int word_count = count_words_in_file(filename);
char *ngram_array[ngram_size];
FILE *file = fopen(filename, "r");
if (file == NULL) {
fprintf(stderr, "Erreur : Impossible d'ouvrir le fichier %s.\n", filename);
exit(EXIT_FAILURE);
}
if (ngram_size > word_count) {
printf("Error: n-gram size exceeds word count\n");
exit(EXIT_FAILURE);
}
char word[MAX_WORD_LENGTH];
int word_nb_read_now = 0;
int word_nb_in_ngram = 0;
while (fscanf(file, "%s", word) == 1) {
word_nb_read_now++;
word_nb_in_ngram++;
// Convertit le mot en minuscules
for (int i = 0; word[i]; ++i) {
word[i] = tolower(word[i]);
}
// Supprime la ponctuation du mot at the beginning
if (ispunct(word[0])) {
deleteFirstChar(word);
}
// Supprime la ponctuation du mot at the end
int len = strlen(word);
if (ispunct(word[len - 1])) {
word[len - 1] = '\0'; // Supprime le dernier caractère (ponctuation)
}
len = strlen(word);
if (ispunct(word[len - 1])) {
word[len - 1] = '\0'; // Supprime le dernier caractère (ponctuation)
}
// reset the ngram array
if (word_nb_in_ngram % word_nb_read_now == 0) {word_nb_in_ngram = 0;}
// Vérifie si le mot existe déjà dans le compteur
int found = 0;
for (int i = 0; i < counter->count; ++i) {
if (strcmp(counter->words[i].word, word) == 0) {
// Si trouvé, incrémente la fréquence
counter->words[i].frequency++;
found = 1;
break;
}
}
// Si le mot n'a pas été trouvé, l'ajoute au compteur
if (!found) {
counter->count++;
counter->words = realloc(counter->words, counter->count * sizeof(WordFrequency));
if (counter->words == NULL) {
fprintf(stderr, "Erreur d'allocation mémoire.\n");
exit(EXIT_FAILURE);
}
counter->words[counter->count - 1].word = strdup(word);
counter->words[counter->count - 1].frequency = 1;
}
}
fclose(file);
}
\ No newline at end of file
//void read_and_count_ngrams(FrequencyCounter *counter, const char *filename, int ngram_size) {
//
// int word_count = count_words_in_file(filename);
//
// char *ngram_array[ngram_size];
//
// FILE *file = fopen(filename, "r");
// if (file == NULL) {
// fprintf(stderr, "Erreur : Impossible d'ouvrir le fichier %s.\n", filename);
// exit(EXIT_FAILURE);
// }
//
// if (ngram_size > word_count) {
// printf("Error: n-gram size exceeds word count\n");
// exit(EXIT_FAILURE);
// }
//
// char word[MAX_WORD_LENGTH];
// int word_nb_read_now = 0;
// int word_nb_in_ngram = 0;
// while (fscanf(file, "%s", word) == 1) {
// word_nb_read_now++;
// word_nb_in_ngram++;
//
// // Convertit le mot en minuscules
// for (int i = 0; word[i]; ++i) {
// word[i] = tolower(word[i]);
// }
//
// // Supprime la ponctuation du mot at the beginning
// if (ispunct(word[0])) {
// deleteFirstChar(word);
// }
//
// // Supprime la ponctuation du mot at the end
// int len = strlen(word);
// if (ispunct(word[len - 1])) {
// word[len - 1] = '\0'; // Supprime le dernier caractère (ponctuation)
// }
// len = strlen(word);
// if (ispunct(word[len - 1])) {
// word[len - 1] = '\0'; // Supprime le dernier caractère (ponctuation)
// }
//
// // reset the ngram array
// if (word_nb_in_ngram % word_nb_read_now == 0) {word_nb_in_ngram = 0;}
//
//
// // Vérifie si le mot existe déjà dans le compteur
// int found = 0;
// for (int i = 0; i < counter->count; ++i) {
// if (strcmp(counter->words[i].word, word) == 0) {
// // Si trouvé, incrémente la fréquence
// counter->words[i].frequency++;
// found = 1;
// break;
// }
// }
//
// // Si le mot n'a pas été trouvé, l'ajoute au compteur
// if (!found) {
// counter->count++;
// counter->words = realloc(counter->words, counter->count * sizeof(WordFrequency));
// if (counter->words == NULL) {
// fprintf(stderr, "Erreur d'allocation mémoire.\n");
// exit(EXIT_FAILURE);
// }
// counter->words[counter->count - 1].word = strdup(word);
// counter->words[counter->count - 1].frequency = 1;
// }
//
// }
//
// fclose(file);
//}
\ No newline at end of file
......@@ -88,18 +88,28 @@ TEST(ExampleTests, test_count_words_in_file) {
EXPECT_EQ(6, count_words_in_file("test.txt"));
}
TEST(ExampleTests, test_read_and_count_ngrams) {
FrequencyCounter counter;
init_frequency_counter(&counter);
read_and_count_ngrams(&counter, "test.txt", 2);
EXPECT_EQ(2, counter.count);
EXPECT_STREQ("hello world", counter.words[0].word);
EXPECT_EQ(1, counter.words[0].frequency);
EXPECT_STREQ("world foo", counter.words[1].word);
EXPECT_EQ(1, counter.words[1].frequency);
cleanup_frequency_counter(&counter);
TEST(ExampleTests, test_concatenate_strings) {
const char *arr[] = {"This", "is", "an", "array", "of", "strings"};
int num_strings = sizeof(arr) / sizeof(arr[0]);
char result[MAX_STRING_LENGTH] = "";
concatenate_strings(arr, num_strings, result);
EXPECT_STREQ("This is an array of strings", result);
free(result);
}
//TEST(ExampleTests, test_read_and_count_ngrams) {
// FrequencyCounter counter;
// init_frequency_counter(&counter);
// read_and_count_ngrams(&counter, "test.txt", 2);
// EXPECT_EQ(2, counter.count);
// EXPECT_STREQ("hello world", counter.words[0].word);
// EXPECT_EQ(1, counter.words[0].frequency);
// EXPECT_STREQ("world foo", counter.words[1].word);
// EXPECT_EQ(1, counter.words[1].frequency);
// cleanup_frequency_counter(&counter);
//}
TEST(ExampleTests, test_mode_interactive) {
// this is just to make sure the function does not crash
mode_interactive();
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment