Skip to content
Snippets Groups Projects
Commit 352d262d authored by Simon Majorczyk's avatar Simon Majorczyk
Browse files

Merge branch 'dev' of gitlab-ssh.univ-lille.fr:simon.majorczyk.etu/bigdata into dev

parents 9c66eb11 c1c94127
No related branches found
No related tags found
No related merge requests found
......@@ -22,19 +22,74 @@ def predict():
# Récupérer les données du formulaire
data = request.form.to_dict()
# Convertir en DataFrame et en float
df_input = pd.DataFrame([data])
df_input = df_input.astype(float)
# Calculer les features automatiquement
if 'name' in data:
data['nb_caracteres_sans_espaces'] = len(data['name'].replace(" ", ""))
if 'artists' in data:
data['nb_artistes'] = data['artists'].count(',') + 1
data['featuring'] = int(data['nb_artistes'] > 1)
if 'duration_ms' in data:
duration_ms = float(data['duration_ms'])
data['duree_minute'] = float(f"{int(duration_ms // 60000)}.{int((duration_ms % 60000) // 1000):02d}")
if 'year' in data:
year = int(data['year'])
data['categorie_annee'] = 3 if year < 1954 else 2 if year < 2002 else 1
if 'tempo' in data:
tempo = float(data['tempo'])
if 40 <= tempo < 60:
data['categorie_tempo'] = 1
elif 60 <= tempo < 66:
data['categorie_tempo'] = 2
elif 66 <= tempo < 76:
data['categorie_tempo'] = 3
elif 76 <= tempo < 108:
data['categorie_tempo'] = 4
elif 108 <= tempo < 120:
data['categorie_tempo'] = 5
elif 120 <= tempo < 163:
data['categorie_tempo'] = 6
elif 163 <= tempo < 200:
data['categorie_tempo'] = 7
elif 200 <= tempo <= 208:
data['categorie_tempo'] = 8
else:
data['categorie_tempo'] = 9
# Appliquer le scaler
df_input_scaled = scaler.transform(df_input)
# Supprimer les clés inutiles
data.pop('name', None)
data.pop('artists', None)
data.pop('duration_ms', None)
# Faire la prédiction
prediction = rf.predict(df_input_scaled)
# Convertir les valeurs en float si possible
for key in data:
try:
data[key] = float(data[key])
except ValueError:
pass # Garder les valeurs non convertibles (ex: texte)
# Liste des features dans le bon ordre (comme lors de l'entraînement)
expected_features = ['year', 'acousticness', 'danceability', 'energy', 'explicit',
'instrumentalness', 'key', 'liveness', 'loudness', 'mode',
'speechiness', 'tempo', 'valence', 'nb_caracteres_sans_espaces',
'nb_artistes', 'featuring', 'duree_minute', 'categorie_annee', 'categorie_tempo']
# Construire le DataFrame avec les features dans le bon ordre
input_data = pd.DataFrame([[data.get(key, 0) for key in expected_features]], columns=expected_features)
# Vérifier que toutes les colonnes attendues sont présentes
missing_cols = [col for col in expected_features if col not in input_data.columns]
if missing_cols:
return jsonify({'error': f'Missing features: {missing_cols}'}), 400
# Normalisation des features
input_data_scaled = scaler.transform(input_data)
# Prédiction
predictions = rf.predict(input_data_scaled)
return jsonify({"predictions": prediction.tolist()})
return jsonify({'predictions': predictions.tolist()})
except Exception as e:
return jsonify({"error": str(e)})
return jsonify({'error': str(e)})
if __name__ == '__main__':
app.run(debug=True)
......@@ -6,41 +6,51 @@ from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
print('HELLO')
# 📂 Charger les données
df = pd.read_csv("data_sup_0popularity.csv")
# 1️⃣ Séparation des features et de la cible
X = df.drop(columns=["popularity", "id", "artists", "name", "release_date", "date_sortie", "duration_ms", "nom_artiste"])
# 1️⃣ Vérifier que les features sont bien présentes
expected_features = ['year', 'acousticness', 'danceability', 'energy', 'explicit',
'instrumentalness', 'key', 'liveness', 'loudness', 'mode',
'speechiness', 'tempo', 'valence', 'nb_caracteres_sans_espaces',
'nb_artistes', 'featuring', 'duree_minute', 'categorie_annee', 'categorie_tempo']
# Vérifier que toutes les colonnes attendues sont présentes
missing_features = [col for col in expected_features if col not in df.columns]
if missing_features:
raise ValueError(f"⚠️ Il manque ces colonnes dans le dataset : {missing_features}")
# 2️⃣ Séparation des features et de la cible
y = df["popularity"]
X = df[expected_features] # On garde uniquement les features déjà présentes
# 2️⃣ Split train/test
# 3️⃣ Split train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 3️⃣ Normalisation des features
# 4️⃣ Normalisation des features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# 4️⃣ Entraînement du modèle
# 5️⃣ Entraînement du modèle
rf = RandomForestRegressor(n_estimators=200, max_depth=20, min_samples_split=2, random_state=42)
rf.fit(X_train_scaled, y_train)
# 5️⃣ Sauvegarde du modèle et du scaler
# 6️⃣ Sauvegarde du modèle et du scaler
with open("random_forest_model_sup0.pkl", "wb") as model_file:
pickle.dump(rf, model_file)
with open("scaler_sup0.pkl", "wb") as scaler_file:
pickle.dump(scaler, scaler_file)
# 6️⃣ Évaluation du modèle
# 7️⃣ Évaluation du modèle
y_pred = rf.predict(X_test_scaled)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print(f"\n📊 RandomForestRegressor - MAE: {mae:.2f}, RMSE: {rmse:.2f}, R²: {r2:.3f}")
print("\n✅ Modèle et scaler sauvegardés !")
#nohup python3 -u mlsup0final.py > modelsup0final.txt 2>&1 &
#ps aux | grep python
print("\n✅ Modèle et scaler sauvegardés avec succès !")
......@@ -50,62 +50,47 @@
<body>
<h1>Prédire la Popularité d'une Chanson</h1>
<form action="/predict_sup0" method="post">
<label for="name">Titre de la chanson :</label>
<input type="text" id="name" name="name" required>
<label for="year">Année :</label>
<input type="number" id="year" name="year" required>
<label for="acousticness">Acousticness :</label>
<input type="number" step="0.01" id="acousticness" name="acousticness" required>
<input type="number" step="0.0001" id="acousticness" name="acousticness" required>
<label for="danceability">Danceability :</label>
<input type="number" step="0.01" id="danceability" name="danceability" required>
<input type="number" step="0.0001" id="danceability" name="danceability" required>
<label for="energy">Energy :</label>
<input type="number" step="0.01" id="energy" name="energy" required>
<input type="number" step="0.0001" id="energy" name="energy" required>
<label for="explicit">Explicit (0 ou 1) :</label>
<input type="number" id="explicit" name="explicit" required>
<label for="instrumentalness">Instrumentalness :</label>
<input type="number" step="0.01" id="instrumentalness" name="instrumentalness" required>
<input type="number" step="0.0001" id="instrumentalness" name="instrumentalness" required>
<label for="key">Key :</label>
<input type="number" id="key" name="key" required>
<label for="liveness">Liveness :</label>
<input type="number" step="0.01" id="liveness" name="liveness" required>
<input type="number" step="0.0001" id="liveness" name="liveness" required>
<label for="loudness">Loudness :</label>
<input type="number" step="0.1" id="loudness" name="loudness" required>
<input type="number" step="0.0001" id="loudness" name="loudness" required>
<label for="mode">Mode :</label>
<input type="number" id="mode" name="mode" required>
<label for="speechiness">Speechiness :</label>
<input type="number" step="0.01" id="speechiness" name="speechiness" required>
<input type="number" step="0.0001" id="speechiness" name="speechiness" required>
<label for="tempo">Tempo :</label>
<input type="number" step="0.1" id="tempo" name="tempo" required>
<input type="number" step="0.0001" id="tempo" name="tempo" required>
<label for="valence">Valence :</label>
<input type="number" step="0.01" id="valence" name="valence" required>
<label for="nb_caracteres_sans_espaces">Nombre de caractères sans espaces :</label>
<input type="number" id="nb_caracteres_sans_espaces" name="nb_caracteres_sans_espaces" required>
<label for="nb_artistes">Nombre d'artistes :</label>
<input type="number" id="nb_artistes" name="nb_artistes" required>
<label for="featuring">Featuring :</label>
<input type="number" id="featuring" name="featuring" required>
<label for="duree_minute">Durée (minutes) :</label>
<input type="number" step="0.01" id="duree_minute" name="duree_minute" required>
<label for="categorie_annee">Catégorie année :</label>
<input type="number" id="categorie_annee" name="categorie_annee" required>
<label for="categorie_tempo">Catégorie tempo :</label>
<input type="number" id="categorie_tempo" name="categorie_tempo" required>
<input type="number" step="0.0001" id="valence" name="valence" required>
<input type="submit" value="Prédire">
</form>
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment