diff --git a/app_sup0.py b/app_sup0.py index 25c6beaa5127ab90b958c2bd2ac36da219d371f8..5051f829cf650d67a5e953f5ad467eee1cc32598 100644 --- a/app_sup0.py +++ b/app_sup0.py @@ -22,19 +22,74 @@ def predict(): # Récupérer les données du formulaire data = request.form.to_dict() - # Convertir en DataFrame et en float - df_input = pd.DataFrame([data]) - df_input = df_input.astype(float) - - # Appliquer le scaler - df_input_scaled = scaler.transform(df_input) - - # Faire la prédiction - prediction = rf.predict(df_input_scaled) - - return jsonify({"predictions": prediction.tolist()}) + # Calculer les features automatiquement + if 'name' in data: + data['nb_caracteres_sans_espaces'] = len(data['name'].replace(" ", "")) + if 'artists' in data: + data['nb_artistes'] = data['artists'].count(',') + 1 + data['featuring'] = int(data['nb_artistes'] > 1) + if 'duration_ms' in data: + duration_ms = float(data['duration_ms']) + data['duree_minute'] = float(f"{int(duration_ms // 60000)}.{int((duration_ms % 60000) // 1000):02d}") + if 'year' in data: + year = int(data['year']) + data['categorie_annee'] = 3 if year < 1954 else 2 if year < 2002 else 1 + if 'tempo' in data: + tempo = float(data['tempo']) + if 40 <= tempo < 60: + data['categorie_tempo'] = 1 + elif 60 <= tempo < 66: + data['categorie_tempo'] = 2 + elif 66 <= tempo < 76: + data['categorie_tempo'] = 3 + elif 76 <= tempo < 108: + data['categorie_tempo'] = 4 + elif 108 <= tempo < 120: + data['categorie_tempo'] = 5 + elif 120 <= tempo < 163: + data['categorie_tempo'] = 6 + elif 163 <= tempo < 200: + data['categorie_tempo'] = 7 + elif 200 <= tempo <= 208: + data['categorie_tempo'] = 8 + else: + data['categorie_tempo'] = 9 + + # Supprimer les clés inutiles + data.pop('name', None) + data.pop('artists', None) + data.pop('duration_ms', None) + + # Convertir les valeurs en float si possible + for key in data: + try: + data[key] = float(data[key]) + except ValueError: + pass # Garder les valeurs non convertibles (ex: texte) + + # Liste des features dans le bon ordre (comme lors de l'entraînement) + expected_features = ['year', 'acousticness', 'danceability', 'energy', 'explicit', + 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', + 'speechiness', 'tempo', 'valence', 'nb_caracteres_sans_espaces', + 'nb_artistes', 'featuring', 'duree_minute', 'categorie_annee', 'categorie_tempo'] + + # Construire le DataFrame avec les features dans le bon ordre + input_data = pd.DataFrame([[data.get(key, 0) for key in expected_features]], columns=expected_features) + + # Vérifier que toutes les colonnes attendues sont présentes + missing_cols = [col for col in expected_features if col not in input_data.columns] + if missing_cols: + return jsonify({'error': f'Missing features: {missing_cols}'}), 400 + + # Normalisation des features + input_data_scaled = scaler.transform(input_data) + + # Prédiction + predictions = rf.predict(input_data_scaled) + + return jsonify({'predictions': predictions.tolist()}) except Exception as e: - return jsonify({"error": str(e)}) + return jsonify({'error': str(e)}) if __name__ == '__main__': app.run(debug=True) diff --git a/mlsup0final.py b/mlsup0final.py index 437db5d062cceb5d11c000f77f04d25d9bee467a..5f196339fe7f14a5fe78485cd68a710c090aa025 100644 --- a/mlsup0final.py +++ b/mlsup0final.py @@ -6,41 +6,51 @@ from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score + print('HELLO') + # 📂 Charger les données df = pd.read_csv("data_sup_0popularity.csv") -# 1️⃣ Séparation des features et de la cible -X = df.drop(columns=["popularity", "id", "artists", "name", "release_date", "date_sortie", "duration_ms", "nom_artiste"]) +# 1️⃣ Vérifier que les features sont bien présentes +expected_features = ['year', 'acousticness', 'danceability', 'energy', 'explicit', + 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', + 'speechiness', 'tempo', 'valence', 'nb_caracteres_sans_espaces', + 'nb_artistes', 'featuring', 'duree_minute', 'categorie_annee', 'categorie_tempo'] + +# Vérifier que toutes les colonnes attendues sont présentes +missing_features = [col for col in expected_features if col not in df.columns] +if missing_features: + raise ValueError(f"⚠️ Il manque ces colonnes dans le dataset : {missing_features}") + +# 2️⃣ Séparation des features et de la cible y = df["popularity"] +X = df[expected_features] # On garde uniquement les features déjà présentes -# 2️⃣ Split train/test +# 3️⃣ Split train/test X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) -# 3️⃣ Normalisation des features +# 4️⃣ Normalisation des features scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) -# 4️⃣ Entraînement du modèle +# 5️⃣ Entraînement du modèle rf = RandomForestRegressor(n_estimators=200, max_depth=20, min_samples_split=2, random_state=42) rf.fit(X_train_scaled, y_train) -# 5️⃣ Sauvegarde du modèle et du scaler +# 6️⃣ Sauvegarde du modèle et du scaler with open("random_forest_model_sup0.pkl", "wb") as model_file: pickle.dump(rf, model_file) with open("scaler_sup0.pkl", "wb") as scaler_file: pickle.dump(scaler, scaler_file) -# 6️⃣ Évaluation du modèle +# 7️⃣ Évaluation du modèle y_pred = rf.predict(X_test_scaled) mae = mean_absolute_error(y_test, y_pred) rmse = np.sqrt(mean_squared_error(y_test, y_pred)) r2 = r2_score(y_test, y_pred) print(f"\n📊 RandomForestRegressor - MAE: {mae:.2f}, RMSE: {rmse:.2f}, R²: {r2:.3f}") -print("\n✅ Modèle et scaler sauvegardés !") - -#nohup python3 -u mlsup0final.py > modelsup0final.txt 2>&1 & -#ps aux | grep python +print("\n✅ Modèle et scaler sauvegardés avec succès !") diff --git a/templates/index-sup0.html b/templates/index-sup0.html index bda319f822f9d94cce5eef0e470197328e98eb22..1563b0bc8ab42013f44232e2c4946201a1435d45 100644 --- a/templates/index-sup0.html +++ b/templates/index-sup0.html @@ -50,62 +50,47 @@ <body> <h1>Prédire la Popularité d'une Chanson</h1> <form action="/predict_sup0" method="post"> + <label for="name">Titre de la chanson :</label> + <input type="text" id="name" name="name" required> + <label for="year">Année :</label> <input type="number" id="year" name="year" required> <label for="acousticness">Acousticness :</label> - <input type="number" step="0.01" id="acousticness" name="acousticness" required> + <input type="number" step="0.0001" id="acousticness" name="acousticness" required> <label for="danceability">Danceability :</label> - <input type="number" step="0.01" id="danceability" name="danceability" required> + <input type="number" step="0.0001" id="danceability" name="danceability" required> <label for="energy">Energy :</label> - <input type="number" step="0.01" id="energy" name="energy" required> + <input type="number" step="0.0001" id="energy" name="energy" required> <label for="explicit">Explicit (0 ou 1) :</label> <input type="number" id="explicit" name="explicit" required> <label for="instrumentalness">Instrumentalness :</label> - <input type="number" step="0.01" id="instrumentalness" name="instrumentalness" required> + <input type="number" step="0.0001" id="instrumentalness" name="instrumentalness" required> <label for="key">Key :</label> <input type="number" id="key" name="key" required> <label for="liveness">Liveness :</label> - <input type="number" step="0.01" id="liveness" name="liveness" required> + <input type="number" step="0.0001" id="liveness" name="liveness" required> <label for="loudness">Loudness :</label> - <input type="number" step="0.1" id="loudness" name="loudness" required> + <input type="number" step="0.0001" id="loudness" name="loudness" required> <label for="mode">Mode :</label> <input type="number" id="mode" name="mode" required> <label for="speechiness">Speechiness :</label> - <input type="number" step="0.01" id="speechiness" name="speechiness" required> + <input type="number" step="0.0001" id="speechiness" name="speechiness" required> <label for="tempo">Tempo :</label> - <input type="number" step="0.1" id="tempo" name="tempo" required> + <input type="number" step="0.0001" id="tempo" name="tempo" required> <label for="valence">Valence :</label> - <input type="number" step="0.01" id="valence" name="valence" required> - - <label for="nb_caracteres_sans_espaces">Nombre de caractères sans espaces :</label> - <input type="number" id="nb_caracteres_sans_espaces" name="nb_caracteres_sans_espaces" required> - - <label for="nb_artistes">Nombre d'artistes :</label> - <input type="number" id="nb_artistes" name="nb_artistes" required> - - <label for="featuring">Featuring :</label> - <input type="number" id="featuring" name="featuring" required> - - <label for="duree_minute">Durée (minutes) :</label> - <input type="number" step="0.01" id="duree_minute" name="duree_minute" required> - - <label for="categorie_annee">Catégorie année :</label> - <input type="number" id="categorie_annee" name="categorie_annee" required> - - <label for="categorie_tempo">Catégorie tempo :</label> - <input type="number" id="categorie_tempo" name="categorie_tempo" required> + <input type="number" step="0.0001" id="valence" name="valence" required> <input type="submit" value="Prédire"> </form>