Select Git revision
app_glob.py
-
Simon Majorczyk authoredSimon Majorczyk authored
mlsup0final.py 2.16 KiB
import numpy as np
import pandas as pd
import pickle
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
print('HELLO')
# 📂 Charger les données
df = pd.read_csv("data_sup_0popularity.csv")
# 1️⃣ Vérifier que les features sont bien présentes
expected_features = ['year', 'acousticness', 'danceability', 'energy', 'explicit',
'instrumentalness', 'key', 'liveness', 'loudness', 'mode',
'speechiness', 'tempo', 'valence', 'nb_caracteres_sans_espaces',
'nb_artistes', 'featuring', 'duree_minute', 'categorie_annee', 'categorie_tempo']
# Vérifier que toutes les colonnes attendues sont présentes
missing_features = [col for col in expected_features if col not in df.columns]
if missing_features:
raise ValueError(f"⚠️ Il manque ces colonnes dans le dataset : {missing_features}")
# 2️⃣ Séparation des features et de la cible
y = df["popularity"]
X = df[expected_features] # On garde uniquement les features déjà présentes
# 3️⃣ Split train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 4️⃣ Normalisation des features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# 5️⃣ Entraînement du modèle
rf = RandomForestRegressor(n_estimators=200, max_depth=20, min_samples_split=2, random_state=42)
rf.fit(X_train_scaled, y_train)
# 6️⃣ Sauvegarde du modèle et du scaler
with open("random_forest_model_sup0.pkl", "wb") as model_file:
pickle.dump(rf, model_file)
with open("scaler_sup0.pkl", "wb") as scaler_file:
pickle.dump(scaler, scaler_file)
# 7️⃣ Évaluation du modèle
y_pred = rf.predict(X_test_scaled)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print(f"\n📊 RandomForestRegressor - MAE: {mae:.2f}, RMSE: {rmse:.2f}, R²: {r2:.3f}")
print("\n✅ Modèle et scaler sauvegardés avec succès !")