Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
BigData
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Simon Majorczyk
BigData
Commits
b2365d4c
Commit
b2365d4c
authored
4 months ago
by
Mohamed Sebabti
Browse files
Options
Downloads
Patches
Plain Diff
model rn
parent
13e7783f
No related branches found
No related tags found
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
ml_rn.py
+23
-49
23 additions, 49 deletions
ml_rn.py
model_rn.txt
+39
-0
39 additions, 0 deletions
model_rn.txt
with
62 additions
and
49 deletions
ml_rn.py
+
23
−
49
View file @
b2365d4c
import
numpy
as
np
import
pandas
as
pd
import
tensorflow
as
tf
from
tensorflow
import
keras
from
tensorflow.keras.models
import
Sequential
from
tensorflow.keras.layers
import
Dense
from
tensorflow.keras.optimizers
import
Adam
from
sklearn.model_selection
import
train_test_split
from
sklearn.preprocessing
import
StandardScaler
from
sklearn.metrics
import
mean_absolute_error
,
mean_squared_error
,
r2_score
import
time
from
sklearn.neural_network
import
MLPRegressor
def
load_and_describe_data
(
file_path
):
"""
...
...
@@ -24,59 +20,37 @@ def load_and_describe_data(file_path):
df
=
load_and_describe_data
(
'
data_sup_0popularity.csv
'
)
print
(
df
.
info
())
def
train_neural_network
(
df
):
"""
Entraîne un réseau de neurones sur les données et évalue ses performances.
Arguments:
df -- DataFrame contenant les variables explicatives et la cible.
Retourne:
- Un dictionnaire contenant les métriques du modèle.
- Un DataFrame avec les prédictions.
"""
def
train_mlp
(
df
):
# 1. Séparation des features et de la cible
X
=
df
.
drop
(
columns
=
[
"
popularity
"
,
"
id
"
,
"
artists
"
,
"
name
"
,
"
release_date
"
,
"
date_sortie
"
,
"
duration_ms
"
,
"
nom_artiste
"
])
y
=
df
[
'
popularity
'
]
## 1️⃣ **Séparation des données en features (X) et target (y)**
X
=
df
.
drop
(
columns
=
[
'
popularity
'
])
# Toutes les colonnes sauf 'popularity'
y
=
df
[
'
popularity
'
]
# La variable cible
## 2️⃣ **Découpage en jeu d'entraînement et de test**
# 2. Séparation train/test
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.2
,
random_state
=
42
)
## 3️⃣ **Normalisation des données**
scaler
=
StandardScaler
()
# Standardisation pour centrer-réduire les données
X_train
=
scaler
.
fit_transform
(
X_train
)
# Ajustement sur les données d'entraînement
X_test
=
scaler
.
transform
(
X_test
)
# Transformation des données de test
## 4️⃣ **Définition du modèle de réseau de neurones**
model
=
Sequential
([
Dense
(
128
,
activation
=
'
relu
'
,
input_shape
=
(
X_train
.
shape
[
1
],)),
# 1ère couche cachée
Dense
(
64
,
activation
=
'
relu
'
),
# 2ème couche cachée
Dense
(
1
)
# Couche de sortie (prédiction de 1 valeur continue)
])
# 3. Normalisation des features (obligatoire pour MLP)
scaler
=
StandardScaler
()
X_train_scaled
=
scaler
.
fit_transform
(
X_train
)
X_test_scaled
=
scaler
.
transform
(
X_test
)
## 5️⃣ **Compilation du modèle**
model
.
compile
(
optimizer
=
Adam
(
learning_rate
=
0.001
),
loss
=
'
mse
'
)
# Adam + MSE
# 4. Définition et entraînement du modèle MLP
mlp
=
MLPRegressor
(
hidden_layer_sizes
=
(
100
,
50
),
activation
=
'
relu
'
,
solver
=
'
adam
'
,
max_iter
=
500
,
random_state
=
42
)
mlp
.
fit
(
X_train_scaled
,
y_train
)
## 6️⃣ **Entraînement du modèle**
start_time
=
time
.
time
()
# Chronomètre du début
model
.
fit
(
X_train
,
y_train
,
epochs
=
50
,
batch_size
=
32
,
verbose
=
1
,
validation_data
=
(
X_test
,
y_test
))
training_time
=
time
.
time
()
-
start_time
# Temps total d'entraînement
# 5. Prédictions
y_pred
=
mlp
.
predict
(
X_test_scaled
)
## 7️⃣ **Prédictions**
y_pred
=
model
.
predict
(
X_test
).
flatten
()
# Transformation en tableau 1D
## 8️⃣ **Calcul des métriques**
# 6. Évaluation du modèle
mae
=
mean_absolute_error
(
y_test
,
y_pred
)
rmse
=
mean_squared_error
(
y_test
,
y_pred
,
squared
=
False
)
rmse
=
np
.
sqrt
(
mean_squared_error
(
y_test
,
y_pred
)
)
r2
=
r2_score
(
y_test
,
y_pred
)
## 9️⃣ **Création d'un DataFrame avec les prédictions**
predictions_df
=
pd
.
DataFrame
({
'
Actual
'
:
y_test
,
'
Predicted
'
:
y_pred
})
print
(
f
"
📊 MLPRegressor - MAE:
{
mae
:
.
2
f
}
, RMSE:
{
rmse
:
.
2
f
}
, R²:
{
r2
:
.
3
f
}
"
)
## 🔟 **Affichage des résultats**
print
(
f
"
📊 Réseau de Neurones - MAE:
{
mae
:
.
2
f
}
, RMSE:
{
rmse
:
.
2
f
}
, R²:
{
r2
:
.
3
f
}
, Temps:
{
training_time
:
.
2
f
}
sec
"
)
# 7. Ajout des prédictions au DataFrame original
df
.
loc
[
X_test
.
index
,
'
pred_mlp
'
]
=
y_pred
print
(
df
.
head
(
40
))
return
{
"
MAE
"
:
mae
,
"
RMSE
"
:
rmse
,
"
R2
"
:
r2
,
"
Training Time
"
:
training_time
},
predictions_
df
return
df
train_
neural_network
(
df
)
train_
mlp
(
df
)
This diff is collapsed.
Click to expand it.
model_rn.txt
0 → 100644
+
39
−
0
View file @
b2365d4c
nohup: ignoring input
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 98159 entries, 0 to 98158
Data columns (total 27 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 id 98159 non-null object
1 artists 98159 non-null object
2 name 98159 non-null object
3 year 98159 non-null int64
4 acousticness 98159 non-null float64
5 danceability 98159 non-null float64
6 duration_ms 98159 non-null int64
7 energy 98159 non-null float64
8 explicit 98159 non-null int64
9 instrumentalness 98159 non-null float64
10 key 98159 non-null int64
11 liveness 98159 non-null float64
12 loudness 98159 non-null float64
13 mode 98159 non-null int64
14 release_date 98159 non-null object
15 speechiness 98159 non-null float64
16 tempo 98159 non-null float64
17 valence 98159 non-null float64
18 popularity 98159 non-null int64
19 date_sortie 98159 non-null object
20 nom_artiste 98159 non-null object
21 nb_caracteres_sans_espaces 98159 non-null int64
22 nb_artistes 98159 non-null int64
23 featuring 98159 non-null int64
24 duree_minute 98159 non-null float64
25 categorie_annee 98159 non-null int64
26 categorie_tempo 98159 non-null int64
dtypes: float64(10), int64(11), object(6)
memory usage: 20.2+ MB
None
/home/mohamed.sebabti/.local/lib/python3.10/site-packages/sklearn/neural_network/_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
warnings.warn(
📊 MLPRegressor - MAE: 9.27, RMSE: 12.67, R²: 0.564
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment