Skip to content
Snippets Groups Projects
Select Git revision
  • d86681b268fdc14a2ae8aadffe3875e6596325b7
  • master default protected
2 results

scodoc.py

Blame
  • Forked from Jean-Marie Place / SCODOC_R6A06
    Source project has a limited visibility.
    mlsup0final.py 2.16 KiB
    import numpy as np
    import pandas as pd
    import pickle
    import time
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import StandardScaler
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
    
    print('HELLO')
    
    # 📂 Charger les données
    df = pd.read_csv("data_sup_0popularity.csv")
    
    # 1️⃣ Vérifier que les features sont bien présentes
    expected_features = ['year', 'acousticness', 'danceability', 'energy', 'explicit',
                         'instrumentalness', 'key', 'liveness', 'loudness', 'mode',
                         'speechiness', 'tempo', 'valence', 'nb_caracteres_sans_espaces',
                         'nb_artistes', 'featuring', 'duree_minute', 'categorie_annee', 'categorie_tempo']
    
    # Vérifier que toutes les colonnes attendues sont présentes
    missing_features = [col for col in expected_features if col not in df.columns]
    if missing_features:
        raise ValueError(f"⚠️ Il manque ces colonnes dans le dataset : {missing_features}")
    
    # 2️⃣ Séparation des features et de la cible
    y = df["popularity"]
    X = df[expected_features]  # On garde uniquement les features déjà présentes
    
    # 3️⃣ Split train/test
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # 4️⃣ Normalisation des features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # 5️⃣ Entraînement du modèle
    rf = RandomForestRegressor(n_estimators=200, max_depth=20, min_samples_split=2, random_state=42)
    rf.fit(X_train_scaled, y_train)
    
    # 6️⃣ Sauvegarde du modèle et du scaler
    with open("random_forest_model_sup0.pkl", "wb") as model_file:
        pickle.dump(rf, model_file)
    
    with open("scaler_sup0.pkl", "wb") as scaler_file:
        pickle.dump(scaler, scaler_file)
    
    # 7️⃣ Évaluation du modèle
    y_pred = rf.predict(X_test_scaled)
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    
    print(f"\n📊 RandomForestRegressor - MAE: {mae:.2f}, RMSE: {rmse:.2f}, R²: {r2:.3f}")
    print("\n✅ Modèle et scaler sauvegardés avec succès !")