Skip to content
Snippets Groups Projects
Commit d7df2bcb authored by Mohamed Sebabti's avatar Mohamed Sebabti
Browse files

model rn

parent fea72d79
No related branches found
No related tags found
No related merge requests found
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import time
from sklearn.model_selection import train_test_split, GridSearchCV from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import time
from sklearn.neural_network import MLPRegressor from sklearn.neural_network import MLPRegressor
def load_and_describe_data(file_path): def load_and_describe_data(file_path):
""" """
Charge un fichier CSV. Charge un fichier CSV et affiche les informations de base.
:param file_path: Chemin du fichier CSV
:return: DataFrame Pandas
""" """
df = pd.read_csv(file_path) df = pd.read_csv(file_path)
return df
df = load_and_describe_data('data_sup_0popularity.csv')
print(df.info()) print(df.info())
return df
def train_mlp(df): def train_mlp(df):
# 1. Séparation des features et de la cible start_time = time.time() # ⏳ Timer
# 1️⃣ Séparation des features et de la cible
X = df.drop(columns=["popularity", "id", "artists", "name", "release_date", "date_sortie", "duration_ms", "nom_artiste"]) X = df.drop(columns=["popularity", "id", "artists", "name", "release_date", "date_sortie", "duration_ms", "nom_artiste"])
y = df['popularity'] y = df["popularity"]
# 2. Séparation train/test # 2️⃣ Split train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 3. Normalisation des features # 3️⃣ Normalisation des features
scaler = StandardScaler() scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train) X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test) X_test_scaled = scaler.transform(X_test)
# 4. Définition des hyperparamètres à tester # 4️⃣ Définition des hyperparamètres
param_grid = { param_grid = {
'hidden_layer_sizes': [(50,), (100,), (100, 50), (100, 100)], "hidden_layer_sizes": [(50,), (100,), (100, 50), (100, 100)],
'activation': ['relu', 'tanh'], "activation": ["relu", "tanh"],
'solver': ['adam', 'sgd'], "solver": ["adam", "sgd"],
'learning_rate_init': [0.001, 0.01, 0.1], "learning_rate_init": [0.001, 0.01, 0.1],
'max_iter': [500] "max_iter": [500],
"early_stopping": [True] # Arrête si la validation ne s'améliore pas
} }
# 5. Recherche des meilleurs hyperparamètres avec GridSearchCV # 5️⃣ Recherche des meilleurs hyperparamètres
mlp = MLPRegressor(random_state=42) mlp = MLPRegressor(random_state=42)
grid_search = GridSearchCV(mlp, param_grid, cv=3, scoring='r2', verbose=2) grid_search = GridSearchCV(mlp, param_grid, cv=3, scoring="r2", verbose=2)
grid_search.fit(X_train_scaled, y_train) grid_search.fit(X_train_scaled, y_train)
# 6. Affichage des meilleurs paramètres # 6️⃣ Affichage des meilleurs paramètres
print("Meilleurs paramètres :", grid_search.best_params_) best_params = grid_search.best_params_
print("\n✅ Meilleurs paramètres :", best_params)
# 7. Prédictions avec le meilleur modèle # 7️⃣ Prédiction avec le meilleur modèle
best_mlp = grid_search.best_estimator_ best_mlp = grid_search.best_estimator_
y_pred = best_mlp.predict(X_test_scaled) y_pred = best_mlp.predict(X_test_scaled)
# 8. Évaluation du modèle # 8️⃣ Évaluation du modèle
mae = mean_absolute_error(y_test, y_pred) mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred)) rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred) r2 = r2_score(y_test, y_pred)
print(f"📊 MLPRegressor Optimisé - MAE: {mae:.2f}, RMSE: {rmse:.2f}, R²: {r2:.3f}") print(f"\n📊 MLPRegressor - MAE: {mae:.2f}, RMSE: {rmse:.2f}, R²: {r2:.3f}")
# 9️⃣ Ajout des prédictions au DataFrame
df.loc[X_test.index, "pred_mlp"] = y_pred
# 9. Ajout des prédictions au DataFrame original # ⏳ Temps d'exécution
df.loc[X_test.index, 'pred_mlp'] = y_pred elapsed_time = time.time() - start_time
print(df.head(40)) print(f"\n⏱️ Temps d'exécution : {elapsed_time:.2f} secondes")
return df return df
train_mlp(df) # 📂 Chargement et entraînement
df = load_and_describe_data("data_sup_0popularity.csv")
df = train_mlp(df)
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment