Select Git revision
Forked from
Jean-Marie Place / SCODOC_R6A06
Source project has a limited visibility.
-
Emmanuel Viennet authoredEmmanuel Viennet authored
ml_rn.py 2.49 KiB
import numpy as np
import pandas as pd
import time
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.neural_network import MLPRegressor
def load_and_describe_data(file_path):
"""
Charge un fichier CSV et affiche les informations de base.
"""
df = pd.read_csv(file_path)
print(df.info())
return df
def train_mlp(df):
start_time = time.time() # ⏳ Timer
# 1️⃣ Séparation des features et de la cible
X = df.drop(columns=["popularity", "id", "artists", "name", "release_date", "date_sortie", "duration_ms", "nom_artiste"])
y = df["popularity"]
# 2️⃣ Split train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 3️⃣ Normalisation des features
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# 4️⃣ Définition des hyperparamètres
param_grid = {
"hidden_layer_sizes": [(50,), (100,), (100, 50), (100, 100)],
"activation": ["relu", "tanh"],
"solver": ["adam", "sgd"],
"learning_rate_init": [0.001, 0.01, 0.1],
"max_iter": [500],
"early_stopping": [True] # Arrête si la validation ne s'améliore pas
}
# 5️⃣ Recherche des meilleurs hyperparamètres
mlp = MLPRegressor(random_state=42)
grid_search = GridSearchCV(mlp, param_grid, cv=3, scoring="r2", verbose=2)
grid_search.fit(X_train_scaled, y_train)
# 6️⃣ Affichage des meilleurs paramètres
best_params = grid_search.best_params_
print("\n✅ Meilleurs paramètres :", best_params)
# 7️⃣ Prédiction avec le meilleur modèle
best_mlp = grid_search.best_estimator_
y_pred = best_mlp.predict(X_test_scaled)
# 8️⃣ Évaluation du modèle
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print(f"\n📊 MLPRegressor - MAE: {mae:.2f}, RMSE: {rmse:.2f}, R²: {r2:.3f}")
# 9️⃣ Ajout des prédictions au DataFrame
df.loc[X_test.index, "pred_mlp"] = y_pred
# ⏳ Temps d'exécution
elapsed_time = time.time() - start_time
print(f"\n⏱️ Temps d'exécution : {elapsed_time:.2f} secondes")
return df
# 📂 Chargement et entraînement
df = load_and_describe_data("data_sup_0popularity.csv")
df = train_mlp(df)