Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
BigData
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Simon Majorczyk
BigData
Commits
d7df2bcb
Commit
d7df2bcb
authored
4 months ago
by
Mohamed Sebabti
Browse files
Options
Downloads
Patches
Plain Diff
model rn
parent
fea72d79
No related branches found
No related tags found
No related merge requests found
Changes
2
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
ml_rn.py
+35
-32
35 additions, 32 deletions
ml_rn.py
model_rn.txt
+0
-495
0 additions, 495 deletions
model_rn.txt
with
35 additions
and
527 deletions
ml_rn.py
+
35
−
32
View file @
d7df2bcb
import
numpy
as
np
import
pandas
as
pd
import
time
from
sklearn.model_selection
import
train_test_split
,
GridSearchCV
from
sklearn.preprocessing
import
Standard
Scaler
from
sklearn.preprocessing
import
MinMax
Scaler
from
sklearn.metrics
import
mean_absolute_error
,
mean_squared_error
,
r2_score
import
time
from
sklearn.neural_network
import
MLPRegressor
def
load_and_describe_data
(
file_path
):
"""
Charge un fichier CSV.
:param file_path: Chemin du fichier CSV
:return: DataFrame Pandas
Charge un fichier CSV et affiche les informations de base.
"""
df
=
pd
.
read_csv
(
file_path
)
return
df
df
=
load_and_describe_data
(
'
data_sup_0popularity.csv
'
)
print
(
df
.
info
())
return
df
def
train_mlp
(
df
):
# 1. Séparation des features et de la cible
start_time
=
time
.
time
()
# ⏳ Timer
# 1️⃣ Séparation des features et de la cible
X
=
df
.
drop
(
columns
=
[
"
popularity
"
,
"
id
"
,
"
artists
"
,
"
name
"
,
"
release_date
"
,
"
date_sortie
"
,
"
duration_ms
"
,
"
nom_artiste
"
])
y
=
df
[
'
popularity
'
]
y
=
df
[
"
popularity
"
]
# 2
. Séparation
train/test
# 2
️⃣ Split
train/test
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.2
,
random_state
=
42
)
# 3
.
Normalisation des features
scaler
=
Standard
Scaler
()
# 3
️⃣
Normalisation des features
scaler
=
MinMax
Scaler
()
X_train_scaled
=
scaler
.
fit_transform
(
X_train
)
X_test_scaled
=
scaler
.
transform
(
X_test
)
# 4
.
Définition des hyperparamètres
à tester
# 4
️⃣
Définition des hyperparamètres
param_grid
=
{
'
hidden_layer_sizes
'
:
[(
50
,),
(
100
,),
(
100
,
50
),
(
100
,
100
)],
'
activation
'
:
[
'
relu
'
,
'
tanh
'
],
'
solver
'
:
[
'
adam
'
,
'
sgd
'
],
'
learning_rate_init
'
:
[
0.001
,
0.01
,
0.1
],
'
max_iter
'
:
[
500
]
"
hidden_layer_sizes
"
:
[(
50
,),
(
100
,),
(
100
,
50
),
(
100
,
100
)],
"
activation
"
:
[
"
relu
"
,
"
tanh
"
],
"
solver
"
:
[
"
adam
"
,
"
sgd
"
],
"
learning_rate_init
"
:
[
0.001
,
0.01
,
0.1
],
"
max_iter
"
:
[
500
],
"
early_stopping
"
:
[
True
]
# Arrête si la validation ne s'améliore pas
}
# 5
.
Recherche des meilleurs hyperparamètres
avec GridSearchCV
# 5
️⃣
Recherche des meilleurs hyperparamètres
mlp
=
MLPRegressor
(
random_state
=
42
)
grid_search
=
GridSearchCV
(
mlp
,
param_grid
,
cv
=
3
,
scoring
=
'
r2
'
,
verbose
=
2
)
grid_search
=
GridSearchCV
(
mlp
,
param_grid
,
cv
=
3
,
scoring
=
"
r2
"
,
verbose
=
2
)
grid_search
.
fit
(
X_train_scaled
,
y_train
)
# 6. Affichage des meilleurs paramètres
print
(
"
Meilleurs paramètres :
"
,
grid_search
.
best_params_
)
# 6️⃣ Affichage des meilleurs paramètres
best_params
=
grid_search
.
best_params_
print
(
"
\n
✅ Meilleurs paramètres :
"
,
best_params
)
# 7
.
Prédiction
s
avec le meilleur modèle
# 7
️⃣
Prédiction avec le meilleur modèle
best_mlp
=
grid_search
.
best_estimator_
y_pred
=
best_mlp
.
predict
(
X_test_scaled
)
# 8
.
Évaluation du modèle
# 8
️⃣
Évaluation du modèle
mae
=
mean_absolute_error
(
y_test
,
y_pred
)
rmse
=
np
.
sqrt
(
mean_squared_error
(
y_test
,
y_pred
))
r2
=
r2_score
(
y_test
,
y_pred
)
print
(
f
"
📊 MLPRegressor Optimisé - MAE:
{
mae
:
.
2
f
}
, RMSE:
{
rmse
:
.
2
f
}
, R²:
{
r2
:
.
3
f
}
"
)
print
(
f
"
\n
📊 MLPRegressor - MAE:
{
mae
:
.
2
f
}
, RMSE:
{
rmse
:
.
2
f
}
, R²:
{
r2
:
.
3
f
}
"
)
# 9️⃣ Ajout des prédictions au DataFrame
df
.
loc
[
X_test
.
index
,
"
pred_mlp
"
]
=
y_pred
#
9. Ajout des prédictions au DataFrame original
df
.
loc
[
X_test
.
index
,
'
pred_mlp
'
]
=
y_pred
print
(
df
.
head
(
40
)
)
#
⏳ Temps d'exécution
elapsed_time
=
time
.
time
()
-
start_time
print
(
f
"
\n
⏱️ Temps d
'
exécution :
{
elapsed_time
:
.
2
f
}
secondes
"
)
return
df
train_mlp
(
df
)
# 📂 Chargement et entraînement
df
=
load_and_describe_data
(
"
data_sup_0popularity.csv
"
)
df
=
train_mlp
(
df
)
This diff is collapsed.
Click to expand it.
model_rn.txt
deleted
100644 → 0
+
0
−
495
View file @
fea72d79
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment