Skip to content
Snippets Groups Projects
Commit b2365d4c authored by Mohamed Sebabti's avatar Mohamed Sebabti
Browse files

model rn

parent 13e7783f
No related branches found
No related tags found
No related merge requests found
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import time
from sklearn.neural_network import MLPRegressor
def load_and_describe_data(file_path):
"""
......@@ -24,59 +20,37 @@ def load_and_describe_data(file_path):
df = load_and_describe_data('data_sup_0popularity.csv')
print(df.info())
def train_neural_network(df):
"""
Entraîne un réseau de neurones sur les données et évalue ses performances.
Arguments:
df -- DataFrame contenant les variables explicatives et la cible.
Retourne:
- Un dictionnaire contenant les métriques du modèle.
- Un DataFrame avec les prédictions.
"""
def train_mlp(df):
# 1. Séparation des features et de la cible
X = df.drop(columns=["popularity", "id", "artists", "name", "release_date", "date_sortie", "duration_ms", "nom_artiste"])
y = df['popularity']
## 1️⃣ **Séparation des données en features (X) et target (y)**
X = df.drop(columns=['popularity']) # Toutes les colonnes sauf 'popularity'
y = df['popularity'] # La variable cible
## 2️⃣ **Découpage en jeu d'entraînement et de test**
# 2. Séparation train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
## 3️⃣ **Normalisation des données**
scaler = StandardScaler() # Standardisation pour centrer-réduire les données
X_train = scaler.fit_transform(X_train) # Ajustement sur les données d'entraînement
X_test = scaler.transform(X_test) # Transformation des données de test
## 4️⃣ **Définition du modèle de réseau de neurones**
model = Sequential([
Dense(128, activation='relu', input_shape=(X_train.shape[1],)), # 1ère couche cachée
Dense(64, activation='relu'), # 2ème couche cachée
Dense(1) # Couche de sortie (prédiction de 1 valeur continue)
])
# 3. Normalisation des features (obligatoire pour MLP)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
## 5️⃣ **Compilation du modèle**
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse') # Adam + MSE
# 4. Définition et entraînement du modèle MLP
mlp = MLPRegressor(hidden_layer_sizes=(100, 50), activation='relu', solver='adam', max_iter=500, random_state=42)
mlp.fit(X_train_scaled, y_train)
## 6️⃣ **Entraînement du modèle**
start_time = time.time() # Chronomètre du début
model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1, validation_data=(X_test, y_test))
training_time = time.time() - start_time # Temps total d'entraînement
# 5. Prédictions
y_pred = mlp.predict(X_test_scaled)
## 7️⃣ **Prédictions**
y_pred = model.predict(X_test).flatten() # Transformation en tableau 1D
## 8️⃣ **Calcul des métriques**
# 6. Évaluation du modèle
mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
## 9️⃣ **Création d'un DataFrame avec les prédictions**
predictions_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(f"📊 MLPRegressor - MAE: {mae:.2f}, RMSE: {rmse:.2f}, R²: {r2:.3f}")
## 🔟 **Affichage des résultats**
print(f"📊 Réseau de Neurones - MAE: {mae:.2f}, RMSE: {rmse:.2f}, R²: {r2:.3f}, Temps: {training_time:.2f} sec")
# 7. Ajout des prédictions au DataFrame original
df.loc[X_test.index, 'pred_mlp'] = y_pred
print(df.head(40))
return {"MAE": mae, "RMSE": rmse, "R2": r2, "Training Time": training_time}, predictions_df
return df
train_neural_network(df)
train_mlp(df)
nohup: ignoring input
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 98159 entries, 0 to 98158
Data columns (total 27 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 id 98159 non-null object
1 artists 98159 non-null object
2 name 98159 non-null object
3 year 98159 non-null int64
4 acousticness 98159 non-null float64
5 danceability 98159 non-null float64
6 duration_ms 98159 non-null int64
7 energy 98159 non-null float64
8 explicit 98159 non-null int64
9 instrumentalness 98159 non-null float64
10 key 98159 non-null int64
11 liveness 98159 non-null float64
12 loudness 98159 non-null float64
13 mode 98159 non-null int64
14 release_date 98159 non-null object
15 speechiness 98159 non-null float64
16 tempo 98159 non-null float64
17 valence 98159 non-null float64
18 popularity 98159 non-null int64
19 date_sortie 98159 non-null object
20 nom_artiste 98159 non-null object
21 nb_caracteres_sans_espaces 98159 non-null int64
22 nb_artistes 98159 non-null int64
23 featuring 98159 non-null int64
24 duree_minute 98159 non-null float64
25 categorie_annee 98159 non-null int64
26 categorie_tempo 98159 non-null int64
dtypes: float64(10), int64(11), object(6)
memory usage: 20.2+ MB
None
/home/mohamed.sebabti/.local/lib/python3.10/site-packages/sklearn/neural_network/_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
warnings.warn(
📊 MLPRegressor - MAE: 9.27, RMSE: 12.67, R²: 0.564
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment