diff --git a/app.py b/app.py index 04aedb24d021cb846006d38dec988ff2716e73ed..935a508946168fb8311b56ae5056eeb58c4bc727 100644 --- a/app.py +++ b/app.py @@ -2,6 +2,7 @@ from flask import Flask, request, jsonify, render_template import pickle import pandas as pd from sklearn.preprocessing import StandardScaler +import numpy as np app = Flask(__name__) @@ -24,7 +25,7 @@ def predict(): input_data_scaled = scaler.fit_transform(input_data) # Prédiction - predictions = rf.predict(input_data_scaled) + predictions = np.round(rf.predict(input_data_scaled),4) return jsonify({'predictions': predictions.tolist()}) diff --git a/ml_binairefinal.py b/ml_binairefinal.py index c355b6ed4d6aaa0503781678d4f92be1352cf028..0688e9ed318f280dd16008fe2999bfcbf278f8aa 100644 --- a/ml_binairefinal.py +++ b/ml_binairefinal.py @@ -29,7 +29,7 @@ def train_random_forest(df): X_test_scaled = scaler.transform(X_test) # 4️⃣ Initialisation du modèle avec les hyperparamètres optimisés - rf = RandomForestClassifier(n_estimators=200, max_depth=None, criterion="gini") + rf = RandomForestClassifier(n_estimators=200, max_depth=None, criterion="gini",class_weight='balanced') # 5️⃣ Entraînement du modèle rf.fit(X_train_scaled, y_train) @@ -72,4 +72,6 @@ def get_independent_variables(df): return independent_variables # Obtenir la liste des variables indépendantes -independent_variables = get_independent_variables(df) \ No newline at end of file +independent_variables = get_independent_variables(df) + +print(df['popularity_2'].value_counts()) \ No newline at end of file diff --git a/mlsup0final.py b/mlsup0final.py index c4b60104aab69bcc110af65eee50ee309b589888..d430629ab13a422ec4386b6e597e77ad6f57ea76 100644 --- a/mlsup0final.py +++ b/mlsup0final.py @@ -31,6 +31,10 @@ def train_random_forest(df): rf = RandomForestRegressor(n_estimators=200, max_depth=20, min_samples_split=2, random_state=42) rf.fit(X_train_scaled, y_train) + # Sauvegarder le modèle + with open('random_forest_model_sup.pkl', 'wb') as model_file: + pickle.dump(rf, model_file) + # 5️⃣ Prédiction sur les données de test y_pred = rf.predict(X_test_scaled) diff --git a/random_forest_model.pkl b/random_forest_model.pkl index 53681515e3a7abfedbdbcc78e0c9bb4dbeeb691a..348339307c70b4af2bf48bbebfc2613bc6cc61b8 100644 Binary files a/random_forest_model.pkl and b/random_forest_model.pkl differ