diff --git a/Module_correlation.Rmd b/Module_correlation.Rmd
index 448926d4e18fefdc657d8c3725d8f163cea0e5cd..281bc0f66a0f7f8a42994014c9dcc98699869708 100644
--- a/Module_correlation.Rmd
+++ b/Module_correlation.Rmd
@@ -3,9 +3,18 @@ title: '"Module d''analyse des corrélations des annotations des échantillons"'
 output: html_document
 ---
 
+
+```{r setup, include=FALSE}
+
+# Global options: hide code, warnings and messages in the final report.
+knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
+
+```
+
+
+
 ```{r load_packages, include=FALSE}
 # Installer et charger les packages nécessaires 
-
 if(!require(GGally)) install.packages("GGally")
 if(!require(readr)) install.packages("readr")
 if(!require(dplyr)) install.packages("dplyr")
@@ -28,24 +37,31 @@ library(ComplexHeatmap)
 library(RColorBrewer)
 library(reshape2)
 
+
 ```
 
+
+1. Data Import and Exploration
 ```{r}
-# Importer le fichier d'annotations (ex: CSV)
-setwd("C:/Users/User/Desktop/projet_visualisation")
-annotations <- read.csv("design_test.csv", sep=",", stringsAsFactors = FALSE)
+# Import the design file (CSV)
+annotations <- read.csv(design_file, sep = ",", stringsAsFactors = FALSE)
+
+# Convert specified categorical variables to factors.
+annotations <- annotations %>%
+  mutate(across(all_of(categorical_vars), as.factor))
 
-# Aperçu de la structure et résumé des données
+# Overview of data structure and summary.
 glimpse(annotations)
 summary(annotations)
 
-# Vérification des valeurs manquantes par colonne
+# Check missing values per column.
 missing_values <- colSums(is.na(annotations))
 print(missing_values)
 
+
 ```
 
-2. Séparation et Conversion des Variables
+2. Variable Separation and Conversion
 
 
 ```{r}
@@ -61,94 +77,80 @@ cat("Variables catégorielles détectées : ", paste(colnames(cat_data), collaps
 
 ```
 
-2.1 Conversion Automatisée pour le Calcul de Corrélation
-
+2.1 Automated Conversion for Correlation Analysis
+Note: For correlation analysis, only the quantitative variables are used.
 ```{r}
-convert_to_numeric <- function(df, date_cols = NULL, drop_cols = NULL, date_format = "%d.%m.%Y") {
-  # Supprime les colonnes non désirées
-  if (!is.null(drop_cols)) {
-    df <- df %>% select(-all_of(drop_cols))
-  }
-  
-  df_numeric <- df %>% mutate(across(everything(), ~ {
-    if (is.numeric(.)) {
-      return(.)
-    } else if (is.character(.)) {
-      # Si la colonne est spécifiée comme date ou peut être parsée en date
-      if (!is.null(date_cols) && cur_column() %in% date_cols) {
-        return(as.numeric(as.Date(., format = date_format)))
-      } else {
-        parsed_date <- suppressWarnings(as.Date(., format = date_format))
-        if (all(!is.na(parsed_date))) {
-          return(as.numeric(parsed_date))
-        } else {
-          return(as.numeric(factor(.)))
-        }
-      }
-    } else if (is.factor(.)) {
-      return(as.numeric(.))
-    } else {
-      return(as.numeric(.))
-    }
-  }))
-  
-  return(df_numeric)
-}
+data_numeric <- annotations %>%
+  mutate(
+    condition = as.numeric(factor(condition)),  # Conversion des catégories en indices numériques
+    animal = as.numeric(factor(animal)),
+    experiment = as.numeric(as.Date(experiment, format = "%d.%m.%Y")),  # Conversion en nombre
+    extraction = as.numeric(as.Date(extraction, format = "%d.%m.%Y")),
+    sample = as.numeric(factor(sample))  # Conversion de 'sample' en indice numérique
+  ) %>%
+  select(-sample)  # Exclure 'sample' du calcul de corrélation
 
-# Exemple : traiter "experiment" et "extraction" comme dates, et supprimer "sample"
-data_numeric <- convert_to_numeric(annotations,
-                                   date_cols = c("experiment", "extraction"),
-                                   drop_cols = c("sample"))
 str(data_numeric)
 
 
 ```
-3. Visualisations de Corrélation et Distributions
-3.1 Matrice de Corrélation avec corrplot
+3. Visualizations of Correlations and Distributions
+3.1 Correlation Matrix with corrplot
+Figure: Correlation matrix showing pairwise Pearson correlations among quantitative variables.
 ```{r}
 if(ncol(data_numeric) > 1){
+  # Calcul de la matrice de corrélation (méthode Pearson)
   cor_matrix <- cor(data_numeric, use = "complete.obs", method = "pearson")
+  
+  # Génération de la figure de corrélation
   corrplot(cor_matrix, method = "circle", type = "lower",
            tl.col = "black", tl.cex = 0.8,
            col = colorRampPalette(c("blue", "white", "red"))(200))
 } else {
-  cat("Pas assez de colonnes numériques pour calculer une matrice de corrélation.\n")
+  cat("Not enough numeric columns to compute a correlation matrix.\n")
 }
 
+
 ```
 
-3.2 Heatmap avec ComplexHeatmap
+3.2 Heatmap with ComplexHeatmap
+Figure: Heatmap of the correlation matrix.
 ```{r}
 if(ncol(data_numeric) > 1){
-  Heatmap(cor_matrix, name = "Corrélation",
+  Heatmap(cor_matrix, name = "Correlation",
           col = colorRampPalette(brewer.pal(8, "RdYlBu"))(50),
-          column_title = "Heatmap de la matrice de corrélation")
+          column_title = "Heatmap of Correlation Matrix")
 } else {
-  cat("Pas assez de variables numériques pour générer une heatmap.\n")
+  cat("Not enough numeric variables to generate a heatmap.\n")
 }
 
+
 ```
 
-3.3 Pairs Plot des Variables Numériques
+3.3 Pairs Plot of Numeric Variables
+Figure: Pairs plot (scatterplot matrix) of quantitative variables.
+
+
 ```{r}
 if(ncol(num_data) > 1){
-  ggpairs(num_data, title = "Pairs Plot des variables numériques", progress = FALSE)
+  ggpairs(num_data, title = "Pairs Plot of Numeric Variables", progress = FALSE)
 } else {
-  cat("Pas assez de variables numériques pour générer un pairs plot.\n")
+  cat("Not enough numeric variables to generate a pairs plot.\n")
 }
 
+
 ```
 
-3.4 Distribution des Variables Numériques
-Pour chaque variable numérique, un histogramme et une courbe de densité sont affichés.
+3.4 Distribution of Numeric Variables
+Figure: Histograms with density curves for each quantitative variable.
 ```{r}
 numeric_vars <- names(data_numeric)
 for (var in numeric_vars) {
   p <- ggplot(data_numeric, aes_string(x = var)) +
     geom_histogram(aes(y = ..density..), bins = 30, fill = "blue", alpha = 0.5) +
     geom_density(color = "red", size = 1) +
-    labs(title = paste("Distribution de", var),
-         x = var, y = "Densité") +
+    labs(title = paste("Distribution of", var),
+         x = var, y = "Density") +
     theme_minimal()
   print(p)
 }
@@ -156,8 +158,9 @@ for (var in numeric_vars) {
 
 ```
 
-3.5 Scatter Plots pour Paires de Variables Fortement Corrélées
-On identifie les paires de variables dont la corrélation absolue dépasse un seuil (ici 0.7) et on affiche un scatter plot avec une droite de régression.
+3.5 Scatter Plots for Highly Correlated Pairs
+Figure: Scatter plots with linear regression lines for pairs of variables with |correlation| > 0.7.
+
 ```{r}
 cor_threshold <- 0.7
 if(ncol(data_numeric) > 1){
@@ -171,89 +174,152 @@ if(ncol(data_numeric) > 1){
       p <- ggplot(data_numeric, aes_string(x = var1, y = var2)) +
            geom_point(alpha = 0.6) +
            geom_smooth(method = "lm", se = FALSE, color = "red") +
-           labs(title = paste("Scatter Plot de", var1, "vs", var2),
-                subtitle = paste("Corrélation =", round(cor_matrix[high_cor_pairs[i,1], high_cor_pairs[i,2]], 2))) +
+           labs(title = paste("Scatter Plot:", var1, "vs", var2),
+                subtitle = paste("Correlation =", round(cor_matrix[high_cor_pairs[i,1], high_cor_pairs[i,2]], 2))) +
            theme_minimal()
       print(p)
     }
   } else {
-    cat("Aucune paire de variables avec une corrélation absolue supérieure à", cor_threshold, "\n")
+    cat("No variable pairs with an absolute correlation above", cor_threshold, "\n")
   }
 } else {
-  cat("Pas assez de variables numériques pour générer des scatter plots.\n")
+  cat("Not enough numeric variables to generate scatter plots.\n")
 }
-
 ```
-3.6 Distribution des Variables Catégorielles
+
+3.6 Distribution of Categorical Variables
+Figure: Frequency distributions (bar charts) for each categorical variable.
+
 ```{r}
 cat_vars <- names(cat_data)
 for (var in cat_vars) {
   p <- ggplot(annotations, aes_string(x = var)) +
        geom_bar(fill = "blue", alpha = 0.7) +
-       labs(title = paste("Distribution de la variable catégorielle :", var),
-            x = var, y = "Fréquence") +
+       labs(title = paste("Frequency Distribution of", var),
+            x = var, y = "Count") +
        theme_minimal()
   print(p)
 }
 
 
 ```
-3.7 Visualisation de la Variable sample
+3.7 Detailed Visualization of the 'sample' Variable
+Even though the unique sample identifiers are not necessarily used in every figure, it is important to retain and examine their distribution.
+Here we provide a dedicated bar chart for the sample variable.
+
 ```{r}
 if("sample" %in% names(annotations)){
-  p <- ggplot(annotations, aes(x = factor(sample))) +
+  # Ensure 'sample' is treated as a factor and preserve its order.
+  annotations$sample <- factor(annotations$sample, levels = unique(annotations$sample))
+  
+  # Bar chart for 'sample'
+  p <- ggplot(annotations, aes(x = sample)) +
        geom_bar(fill = "steelblue", alpha = 0.7) +
-       labs(title = "Distribution de la variable 'sample'",
-            x = "Sample", y = "Fréquence") +
+       labs(title = "Frequency Distribution of 'sample'",
+            x = "Sample ID", y = "Count") +
        theme_minimal()
   print(p)
 } else {
-  cat("La variable 'sample' n'existe pas dans les données.\n")
+  cat("The variable 'sample' does not exist in the dataset.\n")
 }
 
-
 ```
 
-
-3.8 Visualisation Avancée (Matrice Améliorée)
+3.8 Advanced Visualization: Improved Pairs Plot
+Figure: Advanced pairs plot of selected categorical variables (excluding 'sample') to provide a global view of group relationships.
 
 ```{r}
-# Créer une copie des données pour conversion en facteurs
-data_factors <- annotations
-
-# Conversion des colonnes spécifiques en facteurs si elles existent
-cols_to_factor <- c("condition", "animal", "experiment", "extraction")
-for (col in cols_to_factor) {
-  if (col %in% names(data_factors)) {
-    data_factors[[col]] <- as.factor(data_factors[[col]])
+# Create a copy of the data for the pairs plot.
+# Retain 'sample' in the dataset but remove it from the pairs plot display.
+data_factors <- annotations %>% select(any_of(c(display_vars, "sample")))
+data_pairs <- data_factors %>% select(-sample)
+
+# Ensure that the variables to be displayed are factors.
+for (col in display_vars) {
+  if(col %in% names(data_pairs)){
+    data_pairs[[col]] <- as.factor(data_pairs[[col]])
   }
 }
 
-# Supprimer la colonne "sample" pour cette visualisation
-if("sample" %in% names(data_factors)) {
-  data_factors <- data_factors %>% select(-sample)
-}
-
-# Définition des couleurs personnalisées
-custom_colors <- c("#1B9E77", "#D95F02", "#7570B3")  # Vert, Orange, Bleu
+# Define custom colors for the 'condition' variable.
+custom_colors <- c("#1B9E77", "#D95F02", "#7570B3")
 
-# Génération de la matrice de plots avec ggpairs
-ggpairs(data_factors, 
+advanced_plot <- ggpairs(data_pairs, 
         mapping = aes(color = condition),
         lower = list(
-          continuous = wrap("points", alpha = 0.7, color = custom_colors[1]),
-          combo = wrap("box_no_facet", outlier.colour = "red")
+          continuous = wrap("points", alpha = 0.7, size = 2),
+          combo = wrap("box_no_facet", outlier.colour = "red", size = 0.5)
         ),
         diag = list(
           continuous = wrap("densityDiag", alpha = 0.5, fill = custom_colors[2])
         ),
         upper = list(
-          continuous = wrap("cor", size = 5, color = "black")
+          continuous = wrap("cor", size = 4, color = "black")
         )
 ) +
+  scale_color_manual(values = custom_colors) +
   theme_bw() +
   theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
         axis.text.y = element_text(size = 10),
-        legend.position = "bottom")
+        legend.position = "bottom") +
+  ggtitle("Advanced Pairs Plot of Selected Variables")
+advanced_plot
+
+
+```
+
+4. Frequency Distributions of Categorical Variables (Tabset)
+The following section uses R Markdown tabsets to display the frequency histograms of each categorical variable in separate tabs.
+This helps in quickly verifying that each level of a variable is represented by a similar number of samples—a key point for balanced downstream analyses.
+
+<!-- To enable tabbed sections, add {.tabset} to the header -->
+Frequency Distributions for Categorical Variables {.tabset}
+condition
+
+```{r}
+ggplot(annotations, aes(x = condition)) +
+  geom_bar(fill = "blue", alpha = 0.7) +
+  labs(title = "Frequency Distribution: condition",
+       x = "Condition", y = "Count") +
+  theme_minimal()
+
+```
+
+sample
+```{r}
+ggplot(annotations, aes(x = sample)) +
+  geom_bar(fill = "steelblue", alpha = 0.7) +
+  labs(title = "Frequency Distribution: sample",
+       x = "Sample ID", y = "Count") +
+  theme_minimal()
+
+```
+animal
+```{r}
+ggplot(annotations, aes(x = animal)) +
+  geom_bar(fill = "blue", alpha = 0.7) +
+  labs(title = "Frequency Distribution: animal",
+       x = "Animal", y = "Count") +
+  theme_minimal()
+
+```
+
+experiment
+```{r}
+ggplot(annotations, aes(x = experiment)) +
+  geom_bar(fill = "blue", alpha = 0.7) +
+  labs(title = "Frequency Distribution: experiment",
+       x = "Experiment", y = "Count") +
+  theme_minimal()
+
+```
+extraction
+```{r}
+ggplot(annotations, aes(x = extraction)) +
+  geom_bar(fill = "blue", alpha = 0.7) +
+  labs(title = "Frequency Distribution: extraction",
+       x = "Extraction", y = "Count") +
+  theme_minimal()
+
 ```