Skip to content
Snippets Groups Projects
Commit d7df2bcb authored by Mohamed Sebabti's avatar Mohamed Sebabti
Browse files

model rn

parent fea72d79
No related branches found
No related tags found
No related merge requests found
import numpy as np
import pandas as pd
import time
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import time
from sklearn.neural_network import MLPRegressor
def load_and_describe_data(file_path):
"""
Charge un fichier CSV.
:param file_path: Chemin du fichier CSV
:return: DataFrame Pandas
Charge un fichier CSV et affiche les informations de base.
"""
df = pd.read_csv(file_path)
return df
df = load_and_describe_data('data_sup_0popularity.csv')
print(df.info())
return df
def train_mlp(df):
# 1. Séparation des features et de la cible
start_time = time.time() # ⏳ Timer
# 1️⃣ Séparation des features et de la cible
X = df.drop(columns=["popularity", "id", "artists", "name", "release_date", "date_sortie", "duration_ms", "nom_artiste"])
y = df['popularity']
y = df["popularity"]
# 2. Séparation train/test
# 2️⃣ Split train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 3. Normalisation des features
scaler = StandardScaler()
# 3️⃣ Normalisation des features
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# 4. Définition des hyperparamètres à tester
# 4️⃣ Définition des hyperparamètres
param_grid = {
'hidden_layer_sizes': [(50,), (100,), (100, 50), (100, 100)],
'activation': ['relu', 'tanh'],
'solver': ['adam', 'sgd'],
'learning_rate_init': [0.001, 0.01, 0.1],
'max_iter': [500]
"hidden_layer_sizes": [(50,), (100,), (100, 50), (100, 100)],
"activation": ["relu", "tanh"],
"solver": ["adam", "sgd"],
"learning_rate_init": [0.001, 0.01, 0.1],
"max_iter": [500],
"early_stopping": [True] # Arrête si la validation ne s'améliore pas
}
# 5. Recherche des meilleurs hyperparamètres avec GridSearchCV
# 5️⃣ Recherche des meilleurs hyperparamètres
mlp = MLPRegressor(random_state=42)
grid_search = GridSearchCV(mlp, param_grid, cv=3, scoring='r2', verbose=2)
grid_search = GridSearchCV(mlp, param_grid, cv=3, scoring="r2", verbose=2)
grid_search.fit(X_train_scaled, y_train)
# 6. Affichage des meilleurs paramètres
print("Meilleurs paramètres :", grid_search.best_params_)
# 6️⃣ Affichage des meilleurs paramètres
best_params = grid_search.best_params_
print("\n✅ Meilleurs paramètres :", best_params)
# 7. Prédictions avec le meilleur modèle
# 7️⃣ Prédiction avec le meilleur modèle
best_mlp = grid_search.best_estimator_
y_pred = best_mlp.predict(X_test_scaled)
# 8. Évaluation du modèle
# 8️⃣ Évaluation du modèle
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print(f"📊 MLPRegressor Optimisé - MAE: {mae:.2f}, RMSE: {rmse:.2f}, R²: {r2:.3f}")
print(f"\n📊 MLPRegressor - MAE: {mae:.2f}, RMSE: {rmse:.2f}, R²: {r2:.3f}")
# 9️⃣ Ajout des prédictions au DataFrame
df.loc[X_test.index, "pred_mlp"] = y_pred
# 9. Ajout des prédictions au DataFrame original
df.loc[X_test.index, 'pred_mlp'] = y_pred
print(df.head(40))
# ⏳ Temps d'exécution
elapsed_time = time.time() - start_time
print(f"\n⏱️ Temps d'exécution : {elapsed_time:.2f} secondes")
return df
train_mlp(df)
# 📂 Chargement et entraînement
df = load_and_describe_data("data_sup_0popularity.csv")
df = train_mlp(df)
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment