diff --git a/spotify-popularity-prediction-v2/recup_predict_sup0.py b/spotify-popularity-prediction-v2/recup_predict_sup0.py new file mode 100644 index 0000000000000000000000000000000000000000..833a094fb0b1844a71e2e085affeaf7bb56ec59c --- /dev/null +++ b/spotify-popularity-prediction-v2/recup_predict_sup0.py @@ -0,0 +1,45 @@ +import numpy as np +import pandas as pd +import pickle +from sklearn.preprocessing import StandardScaler + +# 📂 Charger le dataset complet (avec les IDs) +df = pd.read_csv("data_sup_0popularity.csv") + +# 📂 Charger le modèle et le scaler +with open("random_forest_model_sup0.pkl", "rb") as model_file: + rf = pickle.load(model_file) + +with open("scaler_sup0.pkl", "rb") as scaler_file: + scaler = pickle.load(scaler_file) + +# ✅ Vérifier que les features attendues sont bien présentes +expected_features = ['year', 'acousticness', 'danceability', 'energy', 'explicit', + 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', + 'speechiness', 'tempo', 'valence', 'nb_caracteres_sans_espaces', + 'nb_artistes', 'featuring', 'duree_minute', 'categorie_annee', 'categorie_tempo'] + +# ⚠️ Vérifier si des colonnes sont manquantes +missing_features = [col for col in expected_features if col not in df.columns] +if missing_features: + raise ValueError(f"⚠️ Il manque ces colonnes dans le dataset : {missing_features}") + +# 📝 Garder uniquement les colonnes nécessaires (ID + Features) +df_features = df[['id'] + expected_features] + +# 🎯 Extraire les features +X = df_features[expected_features] + +# 📏 Appliquer le scaler sur les features +X_scaled = scaler.transform(X) + +# 🔮 Faire les prédictions +df_features["predictions"] = rf.predict(X_scaled) + +# 📊 Afficher les 10 premières lignes +print(df_features.head(10)) + +# 💾 Sauvegarder le DataFrame avec les prédictions en CSV +df_features.to_csv("predictions_sup0.csv", index=False) + +print("\n✅ Prédictions sauvegardées dans 'predictions_sup0.csv' !")