Skip to content
Snippets Groups Projects
Commit 16660a38 authored by Mohamed Sebabti's avatar Mohamed Sebabti
Browse files

model sup0 formulaire

parent 41b540c5
No related branches found
No related tags found
No related merge requests found
...@@ -22,19 +22,74 @@ def predict(): ...@@ -22,19 +22,74 @@ def predict():
# Récupérer les données du formulaire # Récupérer les données du formulaire
data = request.form.to_dict() data = request.form.to_dict()
# Convertir en DataFrame et en float # Calculer les features automatiquement
df_input = pd.DataFrame([data]) if 'name' in data:
df_input = df_input.astype(float) data['nb_caracteres_sans_espaces'] = len(data['name'].replace(" ", ""))
if 'artists' in data:
data['nb_artistes'] = data['artists'].count(',') + 1
data['featuring'] = int(data['nb_artistes'] > 1)
if 'duration_ms' in data:
duration_ms = float(data['duration_ms'])
data['duree_minute'] = float(f"{int(duration_ms // 60000)}.{int((duration_ms % 60000) // 1000):02d}")
if 'year' in data:
year = int(data['year'])
data['categorie_annee'] = 3 if year < 1954 else 2 if year < 2002 else 1
if 'tempo' in data:
tempo = float(data['tempo'])
if 40 <= tempo < 60:
data['categorie_tempo'] = 1
elif 60 <= tempo < 66:
data['categorie_tempo'] = 2
elif 66 <= tempo < 76:
data['categorie_tempo'] = 3
elif 76 <= tempo < 108:
data['categorie_tempo'] = 4
elif 108 <= tempo < 120:
data['categorie_tempo'] = 5
elif 120 <= tempo < 163:
data['categorie_tempo'] = 6
elif 163 <= tempo < 200:
data['categorie_tempo'] = 7
elif 200 <= tempo <= 208:
data['categorie_tempo'] = 8
else:
data['categorie_tempo'] = 9
# Appliquer le scaler # Supprimer les clés inutiles
df_input_scaled = scaler.transform(df_input) data.pop('name', None)
data.pop('artists', None)
data.pop('duration_ms', None)
# Faire la prédiction # Convertir les valeurs en float si possible
prediction = rf.predict(df_input_scaled) for key in data:
try:
data[key] = float(data[key])
except ValueError:
pass # Garder les valeurs non convertibles (ex: texte)
# Liste des features dans le bon ordre (comme lors de l'entraînement)
expected_features = ['year', 'acousticness', 'danceability', 'energy', 'explicit',
'instrumentalness', 'key', 'liveness', 'loudness', 'mode',
'speechiness', 'tempo', 'valence', 'nb_caracteres_sans_espaces',
'nb_artistes', 'featuring', 'duree_minute', 'categorie_annee', 'categorie_tempo']
# Construire le DataFrame avec les features dans le bon ordre
input_data = pd.DataFrame([[data.get(key, 0) for key in expected_features]], columns=expected_features)
# Vérifier que toutes les colonnes attendues sont présentes
missing_cols = [col for col in expected_features if col not in input_data.columns]
if missing_cols:
return jsonify({'error': f'Missing features: {missing_cols}'}), 400
# Normalisation des features
input_data_scaled = scaler.transform(input_data)
# Prédiction
predictions = rf.predict(input_data_scaled)
return jsonify({"predictions": prediction.tolist()}) return jsonify({'predictions': predictions.tolist()})
except Exception as e: except Exception as e:
return jsonify({"error": str(e)}) return jsonify({'error': str(e)})
if __name__ == '__main__': if __name__ == '__main__':
app.run(debug=True) app.run(debug=True)
...@@ -6,41 +6,51 @@ from sklearn.model_selection import train_test_split ...@@ -6,41 +6,51 @@ from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
print('HELLO') print('HELLO')
# 📂 Charger les données # 📂 Charger les données
df = pd.read_csv("data_sup_0popularity.csv") df = pd.read_csv("data_sup_0popularity.csv")
# 1️⃣ Séparation des features et de la cible # 1️⃣ Vérifier que les features sont bien présentes
X = df.drop(columns=["popularity", "id", "artists", "name", "release_date", "date_sortie", "duration_ms", "nom_artiste"]) expected_features = ['year', 'acousticness', 'danceability', 'energy', 'explicit',
'instrumentalness', 'key', 'liveness', 'loudness', 'mode',
'speechiness', 'tempo', 'valence', 'nb_caracteres_sans_espaces',
'nb_artistes', 'featuring', 'duree_minute', 'categorie_annee', 'categorie_tempo']
# Vérifier que toutes les colonnes attendues sont présentes
missing_features = [col for col in expected_features if col not in df.columns]
if missing_features:
raise ValueError(f"⚠️ Il manque ces colonnes dans le dataset : {missing_features}")
# 2️⃣ Séparation des features et de la cible
y = df["popularity"] y = df["popularity"]
X = df[expected_features] # On garde uniquement les features déjà présentes
# 2️⃣ Split train/test # 3️⃣ Split train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 3️⃣ Normalisation des features # 4️⃣ Normalisation des features
scaler = StandardScaler() scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train) X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test) X_test_scaled = scaler.transform(X_test)
# 4️⃣ Entraînement du modèle # 5️⃣ Entraînement du modèle
rf = RandomForestRegressor(n_estimators=200, max_depth=20, min_samples_split=2, random_state=42) rf = RandomForestRegressor(n_estimators=200, max_depth=20, min_samples_split=2, random_state=42)
rf.fit(X_train_scaled, y_train) rf.fit(X_train_scaled, y_train)
# 5️⃣ Sauvegarde du modèle et du scaler # 6️⃣ Sauvegarde du modèle et du scaler
with open("random_forest_model_sup0.pkl", "wb") as model_file: with open("random_forest_model_sup0.pkl", "wb") as model_file:
pickle.dump(rf, model_file) pickle.dump(rf, model_file)
with open("scaler_sup0.pkl", "wb") as scaler_file: with open("scaler_sup0.pkl", "wb") as scaler_file:
pickle.dump(scaler, scaler_file) pickle.dump(scaler, scaler_file)
# 6️⃣ Évaluation du modèle # 7️⃣ Évaluation du modèle
y_pred = rf.predict(X_test_scaled) y_pred = rf.predict(X_test_scaled)
mae = mean_absolute_error(y_test, y_pred) mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred)) rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred) r2 = r2_score(y_test, y_pred)
print(f"\n📊 RandomForestRegressor - MAE: {mae:.2f}, RMSE: {rmse:.2f}, R²: {r2:.3f}") print(f"\n📊 RandomForestRegressor - MAE: {mae:.2f}, RMSE: {rmse:.2f}, R²: {r2:.3f}")
print("\n✅ Modèle et scaler sauvegardés !") print("\n✅ Modèle et scaler sauvegardés avec succès !")
#nohup python3 -u mlsup0final.py > modelsup0final.txt 2>&1 &
#ps aux | grep python
...@@ -50,62 +50,47 @@ ...@@ -50,62 +50,47 @@
<body> <body>
<h1>Prédire la Popularité d'une Chanson</h1> <h1>Prédire la Popularité d'une Chanson</h1>
<form action="/predict_sup0" method="post"> <form action="/predict_sup0" method="post">
<label for="name">Titre de la chanson :</label>
<input type="text" id="name" name="name" required>
<label for="year">Année :</label> <label for="year">Année :</label>
<input type="number" id="year" name="year" required> <input type="number" id="year" name="year" required>
<label for="acousticness">Acousticness :</label> <label for="acousticness">Acousticness :</label>
<input type="number" step="0.01" id="acousticness" name="acousticness" required> <input type="number" step="0.0001" id="acousticness" name="acousticness" required>
<label for="danceability">Danceability :</label> <label for="danceability">Danceability :</label>
<input type="number" step="0.01" id="danceability" name="danceability" required> <input type="number" step="0.0001" id="danceability" name="danceability" required>
<label for="energy">Energy :</label> <label for="energy">Energy :</label>
<input type="number" step="0.01" id="energy" name="energy" required> <input type="number" step="0.0001" id="energy" name="energy" required>
<label for="explicit">Explicit (0 ou 1) :</label> <label for="explicit">Explicit (0 ou 1) :</label>
<input type="number" id="explicit" name="explicit" required> <input type="number" id="explicit" name="explicit" required>
<label for="instrumentalness">Instrumentalness :</label> <label for="instrumentalness">Instrumentalness :</label>
<input type="number" step="0.01" id="instrumentalness" name="instrumentalness" required> <input type="number" step="0.0001" id="instrumentalness" name="instrumentalness" required>
<label for="key">Key :</label> <label for="key">Key :</label>
<input type="number" id="key" name="key" required> <input type="number" id="key" name="key" required>
<label for="liveness">Liveness :</label> <label for="liveness">Liveness :</label>
<input type="number" step="0.01" id="liveness" name="liveness" required> <input type="number" step="0.0001" id="liveness" name="liveness" required>
<label for="loudness">Loudness :</label> <label for="loudness">Loudness :</label>
<input type="number" step="0.1" id="loudness" name="loudness" required> <input type="number" step="0.0001" id="loudness" name="loudness" required>
<label for="mode">Mode :</label> <label for="mode">Mode :</label>
<input type="number" id="mode" name="mode" required> <input type="number" id="mode" name="mode" required>
<label for="speechiness">Speechiness :</label> <label for="speechiness">Speechiness :</label>
<input type="number" step="0.01" id="speechiness" name="speechiness" required> <input type="number" step="0.0001" id="speechiness" name="speechiness" required>
<label for="tempo">Tempo :</label> <label for="tempo">Tempo :</label>
<input type="number" step="0.1" id="tempo" name="tempo" required> <input type="number" step="0.0001" id="tempo" name="tempo" required>
<label for="valence">Valence :</label> <label for="valence">Valence :</label>
<input type="number" step="0.01" id="valence" name="valence" required> <input type="number" step="0.0001" id="valence" name="valence" required>
<label for="nb_caracteres_sans_espaces">Nombre de caractères sans espaces :</label>
<input type="number" id="nb_caracteres_sans_espaces" name="nb_caracteres_sans_espaces" required>
<label for="nb_artistes">Nombre d'artistes :</label>
<input type="number" id="nb_artistes" name="nb_artistes" required>
<label for="featuring">Featuring :</label>
<input type="number" id="featuring" name="featuring" required>
<label for="duree_minute">Durée (minutes) :</label>
<input type="number" step="0.01" id="duree_minute" name="duree_minute" required>
<label for="categorie_annee">Catégorie année :</label>
<input type="number" id="categorie_annee" name="categorie_annee" required>
<label for="categorie_tempo">Catégorie tempo :</label>
<input type="number" id="categorie_tempo" name="categorie_tempo" required>
<input type="submit" value="Prédire"> <input type="submit" value="Prédire">
</form> </form>
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment