Skip to content
Snippets Groups Projects
Select Git revision
  • c065abd0bb5ccc208b62c198dc081fad5c4e7908
  • master default protected
2 results

test_api_permissions.py

Blame
  • Forked from Jean-Marie Place / SCODOC_R6A06
    Source project has a limited visibility.
    ml_rn.py 2.49 KiB
    import numpy as np
    import pandas as pd
    import time
    from sklearn.model_selection import train_test_split, GridSearchCV
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
    from sklearn.neural_network import MLPRegressor
    
    def load_and_describe_data(file_path):
        """
        Charge un fichier CSV et affiche les informations de base.
        """
        df = pd.read_csv(file_path)
        print(df.info())
        return df
    
    def train_mlp(df):
        start_time = time.time()  # ⏳ Timer
    
        # 1️⃣ Séparation des features et de la cible
        X = df.drop(columns=["popularity", "id", "artists", "name", "release_date", "date_sortie", "duration_ms", "nom_artiste"])
        y = df["popularity"]
    
        # 2️⃣ Split train/test
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
        # 3️⃣ Normalisation des features
        scaler = MinMaxScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
    
        # 4️⃣ Définition des hyperparamètres
        param_grid = {
            "hidden_layer_sizes": [(50,), (100,), (100, 50), (100, 100)],
            "activation": ["relu", "tanh"],
            "solver": ["adam", "sgd"],
            "learning_rate_init": [0.001, 0.01, 0.1],
            "max_iter": [500],
            "early_stopping": [True]  # Arrête si la validation ne s'améliore pas
        }
    
        # 5️⃣ Recherche des meilleurs hyperparamètres
        mlp = MLPRegressor(random_state=42)
        grid_search = GridSearchCV(mlp, param_grid, cv=3, scoring="r2", verbose=2)
        grid_search.fit(X_train_scaled, y_train)
    
        # 6️⃣ Affichage des meilleurs paramètres
        best_params = grid_search.best_params_
        print("\n✅ Meilleurs paramètres :", best_params)
    
        # 7️⃣ Prédiction avec le meilleur modèle
        best_mlp = grid_search.best_estimator_
        y_pred = best_mlp.predict(X_test_scaled)
    
        # 8️⃣ Évaluation du modèle
        mae = mean_absolute_error(y_test, y_pred)
        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        r2 = r2_score(y_test, y_pred)
    
        print(f"\n📊 MLPRegressor - MAE: {mae:.2f}, RMSE: {rmse:.2f}, R²: {r2:.3f}")
    
        # 9️⃣ Ajout des prédictions au DataFrame
        df.loc[X_test.index, "pred_mlp"] = y_pred
    
        # ⏳ Temps d'exécution
        elapsed_time = time.time() - start_time
        print(f"\n⏱️ Temps d'exécution : {elapsed_time:.2f} secondes")
    
        return df
    
    # 📂 Chargement et entraînement
    df = load_and_describe_data("data_sup_0popularity.csv")
    df = train_mlp(df)