Skip to content
Snippets Groups Projects
Commit c1c94127 authored by Mohamed Sebabti's avatar Mohamed Sebabti
Browse files

modif sup0

parents 144bde86 16660a38
No related branches found
No related tags found
No related merge requests found
......@@ -22,19 +22,74 @@ def predict():
# Récupérer les données du formulaire
data = request.form.to_dict()
# Convertir en DataFrame et en float
df_input = pd.DataFrame([data])
df_input = df_input.astype(float)
# Calculer les features automatiquement
if 'name' in data:
data['nb_caracteres_sans_espaces'] = len(data['name'].replace(" ", ""))
if 'artists' in data:
data['nb_artistes'] = data['artists'].count(',') + 1
data['featuring'] = int(data['nb_artistes'] > 1)
if 'duration_ms' in data:
duration_ms = float(data['duration_ms'])
data['duree_minute'] = float(f"{int(duration_ms // 60000)}.{int((duration_ms % 60000) // 1000):02d}")
if 'year' in data:
year = int(data['year'])
data['categorie_annee'] = 3 if year < 1954 else 2 if year < 2002 else 1
if 'tempo' in data:
tempo = float(data['tempo'])
if 40 <= tempo < 60:
data['categorie_tempo'] = 1
elif 60 <= tempo < 66:
data['categorie_tempo'] = 2
elif 66 <= tempo < 76:
data['categorie_tempo'] = 3
elif 76 <= tempo < 108:
data['categorie_tempo'] = 4
elif 108 <= tempo < 120:
data['categorie_tempo'] = 5
elif 120 <= tempo < 163:
data['categorie_tempo'] = 6
elif 163 <= tempo < 200:
data['categorie_tempo'] = 7
elif 200 <= tempo <= 208:
data['categorie_tempo'] = 8
else:
data['categorie_tempo'] = 9
# Appliquer le scaler
df_input_scaled = scaler.transform(df_input)
# Supprimer les clés inutiles
data.pop('name', None)
data.pop('artists', None)
data.pop('duration_ms', None)
# Faire la prédiction
prediction = rf.predict(df_input_scaled)
# Convertir les valeurs en float si possible
for key in data:
try:
data[key] = float(data[key])
except ValueError:
pass # Garder les valeurs non convertibles (ex: texte)
# Liste des features dans le bon ordre (comme lors de l'entraînement)
expected_features = ['year', 'acousticness', 'danceability', 'energy', 'explicit',
'instrumentalness', 'key', 'liveness', 'loudness', 'mode',
'speechiness', 'tempo', 'valence', 'nb_caracteres_sans_espaces',
'nb_artistes', 'featuring', 'duree_minute', 'categorie_annee', 'categorie_tempo']
# Construire le DataFrame avec les features dans le bon ordre
input_data = pd.DataFrame([[data.get(key, 0) for key in expected_features]], columns=expected_features)
# Vérifier que toutes les colonnes attendues sont présentes
missing_cols = [col for col in expected_features if col not in input_data.columns]
if missing_cols:
return jsonify({'error': f'Missing features: {missing_cols}'}), 400
# Normalisation des features
input_data_scaled = scaler.transform(input_data)
# Prédiction
predictions = rf.predict(input_data_scaled)
return jsonify({"predictions": prediction.tolist()})
return jsonify({'predictions': predictions.tolist()})
except Exception as e:
return jsonify({"error": str(e)})
return jsonify({'error': str(e)})
if __name__ == '__main__':
app.run(debug=True)
......@@ -6,41 +6,51 @@ from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
print('HELLO')
# 📂 Charger les données
df = pd.read_csv("data_sup_0popularity.csv")
# 1️⃣ Séparation des features et de la cible
X = df.drop(columns=["popularity", "id", "artists", "name", "release_date", "date_sortie", "duration_ms", "nom_artiste"])
# 1️⃣ Vérifier que les features sont bien présentes
expected_features = ['year', 'acousticness', 'danceability', 'energy', 'explicit',
'instrumentalness', 'key', 'liveness', 'loudness', 'mode',
'speechiness', 'tempo', 'valence', 'nb_caracteres_sans_espaces',
'nb_artistes', 'featuring', 'duree_minute', 'categorie_annee', 'categorie_tempo']
# Vérifier que toutes les colonnes attendues sont présentes
missing_features = [col for col in expected_features if col not in df.columns]
if missing_features:
raise ValueError(f"⚠️ Il manque ces colonnes dans le dataset : {missing_features}")
# 2️⃣ Séparation des features et de la cible
y = df["popularity"]
X = df[expected_features] # On garde uniquement les features déjà présentes
# 2️⃣ Split train/test
# 3️⃣ Split train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 3️⃣ Normalisation des features
# 4️⃣ Normalisation des features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# 4️⃣ Entraînement du modèle
# 5️⃣ Entraînement du modèle
rf = RandomForestRegressor(n_estimators=200, max_depth=20, min_samples_split=2, random_state=42)
rf.fit(X_train_scaled, y_train)
# 5️⃣ Sauvegarde du modèle et du scaler
# 6️⃣ Sauvegarde du modèle et du scaler
with open("random_forest_model_sup0.pkl", "wb") as model_file:
pickle.dump(rf, model_file)
with open("scaler_sup0.pkl", "wb") as scaler_file:
pickle.dump(scaler, scaler_file)
# 6️⃣ Évaluation du modèle
# 7️⃣ Évaluation du modèle
y_pred = rf.predict(X_test_scaled)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print(f"\n📊 RandomForestRegressor - MAE: {mae:.2f}, RMSE: {rmse:.2f}, R²: {r2:.3f}")
print("\n✅ Modèle et scaler sauvegardés !")
#nohup python3 -u mlsup0final.py > modelsup0final.txt 2>&1 &
#ps aux | grep python
print("\n✅ Modèle et scaler sauvegardés avec succès !")
......@@ -50,62 +50,47 @@
<body>
<h1>Prédire la Popularité d'une Chanson</h1>
<form action="/predict_sup0" method="post">
<label for="name">Titre de la chanson :</label>
<input type="text" id="name" name="name" required>
<label for="year">Année :</label>
<input type="number" id="year" name="year" required>
<label for="acousticness">Acousticness :</label>
<input type="number" step="0.01" id="acousticness" name="acousticness" required>
<input type="number" step="0.0001" id="acousticness" name="acousticness" required>
<label for="danceability">Danceability :</label>
<input type="number" step="0.01" id="danceability" name="danceability" required>
<input type="number" step="0.0001" id="danceability" name="danceability" required>
<label for="energy">Energy :</label>
<input type="number" step="0.01" id="energy" name="energy" required>
<input type="number" step="0.0001" id="energy" name="energy" required>
<label for="explicit">Explicit (0 ou 1) :</label>
<input type="number" id="explicit" name="explicit" required>
<label for="instrumentalness">Instrumentalness :</label>
<input type="number" step="0.01" id="instrumentalness" name="instrumentalness" required>
<input type="number" step="0.0001" id="instrumentalness" name="instrumentalness" required>
<label for="key">Key :</label>
<input type="number" id="key" name="key" required>
<label for="liveness">Liveness :</label>
<input type="number" step="0.01" id="liveness" name="liveness" required>
<input type="number" step="0.0001" id="liveness" name="liveness" required>
<label for="loudness">Loudness :</label>
<input type="number" step="0.1" id="loudness" name="loudness" required>
<input type="number" step="0.0001" id="loudness" name="loudness" required>
<label for="mode">Mode :</label>
<input type="number" id="mode" name="mode" required>
<label for="speechiness">Speechiness :</label>
<input type="number" step="0.01" id="speechiness" name="speechiness" required>
<input type="number" step="0.0001" id="speechiness" name="speechiness" required>
<label for="tempo">Tempo :</label>
<input type="number" step="0.1" id="tempo" name="tempo" required>
<input type="number" step="0.0001" id="tempo" name="tempo" required>
<label for="valence">Valence :</label>
<input type="number" step="0.01" id="valence" name="valence" required>
<label for="nb_caracteres_sans_espaces">Nombre de caractères sans espaces :</label>
<input type="number" id="nb_caracteres_sans_espaces" name="nb_caracteres_sans_espaces" required>
<label for="nb_artistes">Nombre d'artistes :</label>
<input type="number" id="nb_artistes" name="nb_artistes" required>
<label for="featuring">Featuring :</label>
<input type="number" id="featuring" name="featuring" required>
<label for="duree_minute">Durée (minutes) :</label>
<input type="number" step="0.01" id="duree_minute" name="duree_minute" required>
<label for="categorie_annee">Catégorie année :</label>
<input type="number" id="categorie_annee" name="categorie_annee" required>
<label for="categorie_tempo">Catégorie tempo :</label>
<input type="number" id="categorie_tempo" name="categorie_tempo" required>
<input type="number" step="0.0001" id="valence" name="valence" required>
<input type="submit" value="Prédire">
</form>
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment