diff --git a/app.py b/app.py index 587200ea0a37aa92a117cdeb228b6776352c5721..e7fc28fd41005228d2d67d2c4687e29056c503d5 100644 --- a/app.py +++ b/app.py @@ -23,12 +23,43 @@ def predict(): # Récupérer les données du formulaire data = request.form.to_dict() - # Calculer automatiquement le nombre de caractères sans espaces + # Calculer automatiquement les features if 'name' in data: data['nb_caracteres_sans_espaces'] = len(data['name'].replace(" ", "")) + if 'artists' in data: + data['nb_artistes'] = data['artists'].count(',') + 1 + data['featuring'] = int(data['nb_artistes'] > 1) + if 'duration_ms' in data: + duration_ms = float(data['duration_ms']) + data['duree_minute'] = float(f"{int(duration_ms // 60000)}.{int((duration_ms % 60000) // 1000):02d}") + if 'year' in data: + year = int(data['year']) + data['categorie_annee'] = 3 if year < 1954 else 2 if year < 2002 else 1 + if 'tempo' in data: + tempo = float(data['tempo']) + if 40 <= tempo < 60: + data['categorie_tempo'] = 1 + elif 60 <= tempo < 66: + data['categorie_tempo'] = 2 + elif 66 <= tempo < 76: + data['categorie_tempo'] = 3 + elif 76 <= tempo < 108: + data['categorie_tempo'] = 4 + elif 108 <= tempo < 120: + data['categorie_tempo'] = 5 + elif 120 <= tempo < 163: + data['categorie_tempo'] = 6 + elif 163 <= tempo < 200: + data['categorie_tempo'] = 7 + elif 200 <= tempo <= 208: + data['categorie_tempo'] = 8 + else: + data['categorie_tempo'] = 9 - # Supprimer la clé 'name' qui n'est pas une feature + # Supprimer les clés non utilisées directement data.pop('name', None) + data.pop('artists', None) + data.pop('duration_ms', None) # Convertir les valeurs en float si possible for key in data: @@ -40,8 +71,8 @@ def predict(): # Liste des features dans le bon ordre (comme lors de l'entraînement) expected_features = ['year', 'acousticness', 'danceability', 'energy', 'explicit', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', - 'speechiness', 'tempo', 'valence', 'nb_artistes', 'featuring', - 'duree_minute', 'categorie_annee', 'categorie_tempo', 'nb_caracteres_sans_espaces'] + 'speechiness', 'tempo', 'valence', 'nb_caracteres_sans_espaces', + 'nb_artistes', 'featuring', 'duree_minute', 'categorie_annee', 'categorie_tempo'] # S'assurer que les colonnes du DataFrame correspondent à celles du modèle, dans le bon ordre input_data = pd.DataFrame([[data.get(key, 0) for key in expected_features]], columns=expected_features) @@ -60,4 +91,4 @@ def predict(): return jsonify({'predictions': int(predictions[0])}) if __name__ == '__main__': - app.run(debug=True) \ No newline at end of file + app.run(debug=True) diff --git a/ml_binairefinal.py b/ml_binairefinal.py index 16f67a598790f19e66aeaeff94d44d32230dfe0a..83ed42b319b2c4d1fb6b40609e3731a4c4cc99e1 100644 --- a/ml_binairefinal.py +++ b/ml_binairefinal.py @@ -7,14 +7,12 @@ from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score, classification_report import pickle - def load_and_describe_data(file_path): """Charge un fichier CSV et affiche les informations de base.""" df = pd.read_csv(file_path) print(df.info()) return df - def train_random_forest(df): start_time = time.time() # ⏳ Timer @@ -26,10 +24,10 @@ def train_random_forest(df): # Liste des features dans le bon ordre (doit être identique dans l'API Flask) expected_features = ['year', 'acousticness', 'danceability', 'energy', 'explicit', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', - 'speechiness', 'tempo', 'valence', 'nb_artistes', 'featuring', - 'duree_minute', 'categorie_annee', 'categorie_tempo', 'nb_caracteres_sans_espaces'] - - # Vérification des colonnes pour éviter les erreurs + 'speechiness', 'tempo', 'valence', 'nb_caracteres_sans_espaces', + 'nb_artistes', 'featuring', 'duree_minute', 'categorie_annee', 'categorie_tempo'] + + # Vérification des colonnes pour éviter les erreurs de mismatch X = X.reindex(columns=expected_features, fill_value=0) # 2️⃣ Split train/test @@ -67,7 +65,6 @@ def train_random_forest(df): return df - # 📂 Chargement et entraînement df = load_and_describe_data("data_binaire.csv") -df = train_random_forest(df) +df = train_random_forest(df) \ No newline at end of file diff --git a/random_forest_model_binaire.pkl b/random_forest_model_binaire.pkl index 234110c4bf1057de8989392de18bc3ace42a9fda..15c9b8599437cbd56cb6ad961a44277b31343d7a 100644 Binary files a/random_forest_model_binaire.pkl and b/random_forest_model_binaire.pkl differ diff --git a/scaler_binaire.pkl b/scaler_binaire.pkl index 19ab04faf34282436158f3112f223e18033a08ff..7081d6a4697e8d126c81ef4eb851b5a1967923a6 100644 Binary files a/scaler_binaire.pkl and b/scaler_binaire.pkl differ diff --git a/templates/index.html b/templates/index.html index a79ed2f7a26f5ceaf7cca8285c4708fe53db9cf3..14f4dd4620959466aa08a29eb7b7fb3a1204c5b7 100644 --- a/templates/index.html +++ b/templates/index.html @@ -54,58 +54,43 @@ <input type="text" id="name" name="name" required> <label for="year">Année :</label> - <input type="number" id="year" name="year" required> + <input type="number" step="0.0001" id="year" name="year" required> <label for="acousticness">Acousticness :</label> - <input type="number" step="0.01" id="acousticness" name="acousticness" required> + <input type="number" step="0.0001" id="acousticness" name="acousticness" required> <label for="danceability">Danceability :</label> - <input type="number" step="0.01" id="danceability" name="danceability" required> + <input type="number" step="0.0001" id="danceability" name="danceability" required> <label for="energy">Energy :</label> - <input type="number" step="0.01" id="energy" name="energy" required> + <input type="number" step="0.0001" id="energy" name="energy" required> <label for="explicit">Explicit :</label> <input type="number" id="explicit" name="explicit" required> <label for="instrumentalness">Instrumentalness :</label> - <input type="number" step="0.01" id="instrumentalness" name="instrumentalness" required> + <input type="number" step="0.0001" id="instrumentalness" name="instrumentalness" required> <label for="key">Key :</label> <input type="number" id="key" name="key" required> <label for="liveness">Liveness :</label> - <input type="number" step="0.01" id="liveness" name="liveness" required> + <input type="number" step="0.0001" id="liveness" name="liveness" required> <label for="loudness">Loudness :</label> - <input type="number" step="0.01" id="loudness" name="loudness" required> + <input type="number" step="0.0001" id="loudness" name="loudness" required> <label for="mode">Mode :</label> <input type="number" id="mode" name="mode" required> <label for="speechiness">Speechiness :</label> - <input type="number" step="0.01" id="speechiness" name="speechiness" required> + <input type="number" step="0.0001" id="speechiness" name="speechiness" required> <label for="tempo">Tempo :</label> - <input type="number" step="0.01" id="tempo" name="tempo" required> + <input type="number" step="0.0001" id="tempo" name="tempo" required> <label for="valence">Valence :</label> - <input type="number" step="0.01" id="valence" name="valence" required> - - <label for="nb_artistes">Nombre d'artistes :</label> - <input type="number" id="nb_artistes" name="nb_artistes" required> - - <label for="featuring">Featuring :</label> - <input type="number" id="featuring" name="featuring" required> - - <label for="duree_minute">Durée (minutes) :</label> - <input type="number" step="0.01" id="duree_minute" name="duree_minute" required> - - <label for="categorie_annee">Catégorie année :</label> - <input type="number" id="categorie_annee" name="categorie_annee" required> - - <label for="categorie_tempo">Catégorie tempo :</label> - <input type="number" id="categorie_tempo" name="categorie_tempo" required> + <input type="number" step="0.0001" id="valence" name="valence" required> <input type="submit" value="Prédire"> </form> @@ -126,4 +111,4 @@ }); </script> </body> -</html> +</html> \ No newline at end of file