Skip to content
Snippets Groups Projects
Commit 377f6880 authored by Matthieu Dabrowski's avatar Matthieu Dabrowski
Browse files

publish branch

parents
Branches
No related tags found
No related merge requests found
Showing
with 788 additions and 0 deletions
# Default ignored files
/shelf/
/workspace.xml
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="$USER_HOME$/anaconda3" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="$USER_HOME$/anaconda3" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/multiVariateModels.iml" filepath="$PROJECT_DIR$/.idea/multiVariateModels.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="format" value="PLAIN" />
<option name="myDocStringFormat" value="Plain" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
</component>
</project>
\ No newline at end of file
This diff is collapsed.
import argparse
import json
import os
import sys
import numpy as np
import matplotlib
# Import libraries
import matplotlib.pyplot as plt
# PyTorch's libraries and modules
import netCDF4
import torch
from torch.utils.data import DataLoader
from piq import VSILoss, MDSILoss, MultiScaleGMSDLoss, FSIMLoss
import cupy as cp
import datetime
import time
import pickle
from sympy import S, symbols, printing
from dataset_generation import PlotPrep, DatasetCams, Filter, Distrib, DatasetAladinEur
import warnings
warnings.filterwarnings("ignore", 'The original MDSI supports only RGB images. The input images were converted to RGB by copying the grey channel 3 times.')
warnings.filterwarnings("ignore", 'The original VSI supports only RGB images. The input images were converted to RGB by copying the grey channel 3 times.')
warnings.filterwarnings("ignore", "torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument.")
warnings.filterwarnings("ignore", 'Matplotlib is currently using agg, which is a non-GUI backend, so cannot show the figure.')
matplotlib.use('Agg')
plt.ioff()
epsilon = sys.float_info.epsilon
# If you can, run this example on a GPU, it will be a lot faster.
cuda = torch.cuda.is_available()
device_name = "cuda" if cuda else "cpu"
device = torch.device(device_name)
parser = argparse.ArgumentParser()
# Choosing the version of the model (useful for saving)
parser.add_argument("--version", type=str, default="", help="version of the model")
# Choose degree of interpolation
parser.add_argument("--D", type=int, default=1, help="degree of the interpolation")
# Choose number of samples for 'training'
parser.add_argument("--N", type=int, default=10, help="number of samples for 'training'")
# Choosing the type of aerosol we consider
aer_arg = parser.add_argument("--aer", type=str, default='pm', help="type of aerosol we consider (du, bc, om, ss, su, pm). ")
# Choosing the prediction horizon (must be 0 or a multiple of 3 as data is obtained every 3 hours)
hor_arg2 = parser.add_argument("--horizon", type=int, default=0, help="prediction horizon (must be 0 or a multiple of 3 as data is obtained every 3 hours)")
# Choosing whether or not to normalize all inputs
parser.add_argument("--normal", type=bool, default=False, help="normalize all inputs")
# Choosing if we apply a log to the inputs and targets or not
parser.add_argument("--log", type=bool, default=False, help="apply log scale to the inputs and targets")
# Choosing if we display the error in black & white or not
parser.add_argument("--e_bw", type=bool, default=True, help="display the error in black & white or not")
# # Choosing train period (season & year)
# trainPeriod_arg = parser.add_argument("--trainPeriod", type=str, default="sum20", help="choosing train period (season & year or YEAR)")
# # Choosing the test period
# testPeriod_arg = parser.add_argument("--testPeriod", type=str, default="next", help="choosing test period (next or all)")
# Choosing the time step for the test set
parser.add_argument("--testStep", type=int, default=3, help="time step for test set")
# Choosing origin of data
parser.add_argument("--origData", type=str, default="Cams", help="data origin")
# Choosing whether to take the same complete training periods for semi-supervised and supervised
parser.add_argument("--matchTrainPeriods", type=bool, default=False, help="take the same complete training periods for semi-supervised and supervised")
# Dividing the dimensions by a ratio (Cams Forecast)
parser.add_argument("--ratio", type=int, default=1, help="dividing dim by ratio (Cams Forecast)")
# Choosing whether to load a pre-existing model or to train a new one
parser.add_argument("--load", type=bool, default=False, help="choosing whether to load a pre-existing model or to train a new one")
args = parser.parse_args()
print(args)
# if args.testPeriod not in ["next", "all"]:
# raise argparse.ArgumentError(testPeriod_arg, "testPeriod should be next or all. \n "
# "Got "+str(args.testPeriod))
dict_aer = {"du": (["aermr05", "aermr06"], "duaod550"),
"bc": (["aermr09", "aermr10"], "bcaod550"),
"om": (["aermr07", "aermr08"], "omaod550"),
"ss": (["aermr02", "aermr03"], "ssaod550"),
"su": (["aermr11"], "suaod550"),
"pm": (["pm2p5"], "aod550")}
dict_Data = {'YEAR':{'train':{'sfc':'Cams/cams-global-reanalysis-year_2020-07-01_2021-05-31_levtype_sfc.nc',
'pl':'Cams/cams-global-reanalysis-year_2020-07-01_2021-05-31_levtype_pl.nc'},
'trainGen':{'sfc':'Cams/cams-global-reanalysis_2021-06-01_2021-06-30_levtype_sfc.nc',
'pl':'Cams/cams-global-reanalysis_2021-06-01_2021-06-30_levtype_pl.nc'},
'test':[{'sfc':'Cams/cams_reanalysis_year-testSet_2021-07-01_2022-07-01_levtype_sfc.nc',
'pl':'Cams/cams_reanalysis_year-testSet_2021-07-01_2022-07-01_levtype_pl.nc'}],
'testPeriod':['YEAR'],
'trainStartDate':datetime.datetime(2019, 7, 1), 'trainEndDate':datetime.datetime(2020, 6, 1),
'trainGenStartDate':datetime.datetime(2020, 6, 1), 'trainGenEndDate':datetime.datetime(2020, 7, 1),
'testStartDate':datetime.datetime(2020, 7, 1), 'testEndDate':datetime.datetime(2021, 7, 1),
'testDates' : ["202007010100-202010010000", "202010010100-202101010000", "202101010100-202104010000",
"202104010100-202107010000"],
'trainingDates' : ["201907010100-201910010000", "201910010100-202001010000", "202001010100-202004010000",
"202004010100-202007010000"]}
}
testP = 'YEAR'
dict_Path = dict_Data[testP]
sfc_test_path = dict_Path['test'][0]['sfc']
pl_test_path = dict_Path['test'][0]['pl']
sfc_train_path = dict_Path['train']['sfc']
pl_train_path = dict_Path['train']['pl']
sfc_train_pathGen = dict_Path['trainGen']['sfc']
pl_train_pathGen = dict_Path['trainGen']['pl']
trainStartDate = dict_Path['trainStartDate']
trainEndDate = dict_Path['trainEndDate']
trainGenStartDate = dict_Path['trainGenStartDate']
trainGenEndDate = dict_Path['trainGenEndDate']
testStartDate = dict_Path['testStartDate']
testEndDate = dict_Path['testEndDate']
trainingDates = dict_Path['trainingDates']
testDates = dict_Path['testDates']
# if args.trainPeriod not in dict_Data.keys():
# raise argparse.ArgumentError(trainPeriod_arg, "trainPeriod should be one of [sum20, aut20, win21, spr21, sum21, YEAR]. \n"
# "Got "+str(args.trainPeriod))
args.version = "_" + args.origData + args.version
if args.horizon != 0:
args.version = "_pred"+str(args.horizon)+"h"+args.version
if args.log:
args.version = "_log"+args.version
args.version = "_"+args.aer+args.version
if args.matchTrainPeriods:
args.version = "_" + 'Match' + args.version
else:
args.version = "_" +args.version
name_model = "interp_deg"+str(args.D)+args.version
if args.origData == "Cams":
if args.matchTrainPeriods:
train_set = DatasetCams(pathSfcList=[sfc_train_pathGen, sfc_train_path],
pathPlList=[pl_train_pathGen, pl_train_path],
var=args.aer, horizon=args.horizon, log=args.log, normal=args.normal)
else:
train_set = DatasetCams(pathSfcList=[sfc_train_path],
pathPlList=[pl_train_path],
var=args.aer, horizon=args.horizon, log=args.log, normal=args.normal)
elif args.origData == "Al-EUR":
if args.matchTrainPeriods:
train_set = DatasetAladinEur(list_dates=trainingDates, end_date=None, log=args.log, normal=args.normal)
else:
train_set = DatasetAladinEur(list_dates=trainingDates, end_date=trainEndDate, log=args.log, normal=args.normal)
train_set = DatasetCamsForecast(type="train", end_date=endSupTrainDate, log=args.log, horizon1=args.horizon1, horizon2=args.horizon2, aer=args.aer, inputTarget=True, normal=args.normal, ratio=args.ratio)
train_loader = DataLoader(train_set, batch_size=1, shuffle=True)
xS = symbols("x")
MSE_loss = torch.nn.MSELoss(reduction='none')
pPrep = PlotPrep()
if args.log:
dis = Distrib()
if not args.load:
x = cp.array([])
y = cp.array([])
for idx_batch, (inputTrain_i, targetTrain_i) in enumerate(train_loader):
if idx_batch >= args.N:
break
inputTrain_i = inputTrain_i.float()
targetTrain_i = targetTrain_i.float()
if inputTrain_i.shape != targetTrain_i.shape:
raise Exception("inputTrain_i and targetTrain_i have different shapes. inputTrain_i.shape : ", inputTrain_i.shape, " targetTrain_i.shape : ", targetTrain_i.shape)
inputTrain_i = cp.asarray(inputTrain_i)
targetTrain_i = cp.asarray(targetTrain_i)
if inputTrain_i.shape != targetTrain_i.shape:
raise Exception("inputTrain_i and targetTrain_i have different shapes. inputTrain_i.shape : ", inputTrain_i.shape, " targetTrain_i.shape : ", targetTrain_i.shape)
x_i = cp.ravel(inputTrain_i)
y_i = cp.ravel(targetTrain_i)
if x_i.shape != y_i.shape:
raise Exception("x_i and y_i have different shapes. x_i.shape : ", x_i.shape, " y_i.shape : ", y_i.shape)
x = cp.concatenate((x, x_i))
y = cp.concatenate((y, y_i))
if x.shape != y.shape:
raise Exception("x and y have different shapes. x.shape : ", x.shape, " y.shape : ", y.shape)
model = cp.polyfit(x, y, args.D)
os.makedirs("savedModels/" + name_model + "/", exist_ok=True)
pickle.dump(model, open("savedModels/" + name_model + "/model.pkl", 'wb'))
else:
model = pickle.load(open("savedModels/" + name_model + "/model.pkl", 'rb'))
deg = args.D
c = 0
while (c < len(model)) and (model[c]==0):
deg = deg-1
c = c+1
p = cp.poly1d(model)
eq_model = sum(S("{:6.2f}".format(v)) * xS ** i for i, v in enumerate(model[::-1]))
eq_model_print = printing.latex(eq_model)
losses = []
times = []
r_losses = []
c_losses = []
MBE = []
MBE_R = []
# Ds = []
sim_idces = []
def test(sfc_test_path, pl_test_path, testDates, origData):
# Beginning test
if origData == "Cams":
test_set = DatasetCams(pathSfcList=[sfc_test_path],
pathPlList=[pl_test_path],
var=args.aer, horizon=args.horizon, log=args.log, normal=args.normal)
newMax = (test_set.maxTest - test_set.minTest)
elif origData == "Al-EUR":
test_set = DatasetAladinEur(list_dates=testDates, end_date=None, step=args.testStep,
log=args.log, normal=args.normal)
newMax = (test_set.maxTest - test_set.minTest)
simLoss = FSIMLoss(chromatic=False, data_range=newMax)
test_loader = DataLoader(test_set, batch_size=1, shuffle=False)
os.makedirs("results/" + name_model + "/", exist_ok=True)
# os.makedirs("results/" + name_model + "/" + testPeriod + "/", exist_ok=True)
for idx_batch_test, (input_, target) in enumerate(test_loader):
input_ = input_.float()
target = target.float()
input_ = input_.to(device)
target = target.to(device)
input_ = input_[0]
target = target[0]
path_save_img = "results/" + name_model + "/results_test_img" + str(idx_batch_test + 1) + ".pdf"
path_save_img2 = "results/" + name_model + "/error_test_img" + str(idx_batch_test + 1) + ".pdf"
start_time = time.time()
input1 = cp.asarray(input_)
predic = p(input1)
pred_time = time.time() - start_time
times.append(pred_time)
predic = torch.from_numpy(cp.asnumpy(predic))
predic = predic.to(device)
if args.log:
input_ = dis.reverse(input_)
target = dis.reverse(target)
predic = dis.reverse(predic)
predic = Filter(predic)
target = Filter(target)
mt = torch.mean(target).item()
loss = torch.mean(torch.sqrt(MSE_loss(predic, target))).item()
relative_loss = (loss / mt)
losses.append(loss)
r_losses.append(relative_loss)
mbe = torch.mean(predic - target).item()
mbe_r = (mbe / mt)
# d1 = torch.sum(torch.pow((predic - target), 2)).item()
# d2 = torch.sum(torch.pow((torch.abs(predic - mt) + torch.abs(target - mt)), 2)).item()
# d = 1 - d1/d2
# Ds.append(d)
predF1 = predic - torch.min(predic)
predF = ((predF1 > 0) * (predF1 < newMax)) * predF1 + (predF1 >= newMax) * newMax
targetF1 = target - test_set.minmaxs['Conc'][0]
targetF = ((targetF1 > 0) * (targetF1 < newMax)) * targetF1 + (targetF1 >= newMax) * newMax
sim_idx = simLoss(predF[None, :], targetF[None, :]).item()
sim_idces.append(sim_idx)
MBE.append(mbe)
MBE_R.append(mbe_r)
pred = pPrep(predic, norm=True)
targ = pPrep(target, norm=True)
inp = pPrep(input_, norm=True)
if args.D < 5:
plt.suptitle("Test results. \n "+eq_model_print)
else:
plt.suptitle("Test results. \n Degree is "+str(deg))
# Each file contains two images : one for the prediction and one for the target
ax0 = plt.subplot(2, 2, 1)
ax0.imshow(inp)
ax0.set_title("Input (AOD)")
ax1 = plt.subplot(2, 2, 3)
ax1.imshow(pred)
ax1.set_title(f"Prediction. Test error : {loss:1.3f}\u03BCg/m\N{superscript three} or {relative_loss*100:1.3f}%."
f" \n MBE : {mbe:1.3f}\u03BCg/m\N{superscript three} or {mbe_r*100:1.3f}%."
f" \n sim-index loss : {sim_idx*100:1.3f}%.")
ax2 = plt.subplot(2, 2, 4)
ax2.imshow(targ)
ax2.set_title("Target.")
plt.savefig(path_save_img)
plt.close()
q1, q2, q3 = torch.quantile(target, 0.25), torch.quantile(target, 0.5), torch.quantile(target, 0.75)
ct = torch.where(target > q1, 1, 0) + torch.where(target > q2, 1, 0) + torch.where(target > q3, 1, 0)
cpred = torch.where(predic > q1, 1, 0) + torch.where(predic > q2, 1, 0) + torch.where(predic > q3, 1, 0)
ce = torch.mean(torch.abs(ct - cpred).float()).item()
c_losses.append(ce)
ct = pPrep(ct/4)
cpred = pPrep(cpred/4)
error = torch.abs(target - predic)
error = pPrep(error, error=True, bw=args.e_bw, norm=True)
if args.D < 5:
plt.suptitle("Error visualisation. \n " + eq_model_print)
else:
plt.suptitle("Error visualisation. \n Degree is " + str(deg))
ax0 = plt.subplot(2, 2, 1)
if args.e_bw:
ax0.imshow(error, cmap='gray')
else:
ax0.imshow(error)
ax0.set_title(f"Test error : {loss:1.3f}\u03BCg/m\N{superscript three} or {relative_loss*100:1.3f}%.")
ax1 = plt.subplot(2, 2, 3)
ax1.imshow(cpred)
ax1.set_title(f"Segmented prediction. \n Classification error : {ce:1.3f}.")
ax2 = plt.subplot(2, 2, 4)
ax2.imshow(ct)
ax2.set_title("Segmented target.")
plt.savefig(path_save_img2)
plt.close()
dict_perf = {"Average inference time (s)": np.mean(times),
"STD of inference time (s)": np.std(times),
"Average prediction error (\u03BCg/m\N{superscript three})": np.mean(losses),
"STD of prediction error (\u03BCg/m\N{superscript three})": np.std(losses),
"Average relative error (%)": 100 * np.mean(r_losses),
"STD of relative error (%)": 100 * np.std(r_losses),
"Average classification error": np.mean(c_losses),
"STD of classification error": np.std(c_losses),
"Average bias error (\u03BCg/m\N{superscript three})": np.mean(MBE),
"STD of bias error (\u03BCg/m\N{superscript three})": np.std(MBE),
"Average relative bias error (%)": 100 * np.mean(MBE_R),
"STD of relative bias error (%)": 100 * np.std(MBE_R),
# "Average index of agreement (%)": 100 * np.mean(Ds),
# "STD of index of agreement (%)": 100 * np.std(Ds),
"Average sim-index loss (%)": 100 * np.mean(sim_idces),
"STD of sim-index loss (%)": 100 * np.std(sim_idces)
}
with open("results/" + name_model + "/perf_test.txt", "w+") as f:
json.dump(dict_perf, f, indent=2)
f.close()
return dict_perf
# End test
perf = test(sfc_test_path, pl_test_path, testDates, args.origData)
# Saving the arguments and the equation
os.makedirs("savedModels/" + name_model + "/", exist_ok=True)
with open("savedModels/" + name_model + "/arguments.txt", "w+") as f:
json.dump(args.__dict__, f, indent=2)
f.close()
\ No newline at end of file
ML.py 0 → 100644
import argparse
import json
import os
import sys
import matplotlib
# Import libraries
import matplotlib.pyplot as plt
# PyTorch's libraries and modules
import netCDF4
import sklearn.linear_model
import torch
from torch.utils.data import DataLoader
from piq import VSILoss, MDSILoss, MultiScaleGMSDLoss, FSIMLoss
import numpy as np
import cupy as cp
import time
import pickle
import datetime
from sklearn.ensemble import RandomForestRegressor
from sklearn import config_context
from dataset_generation import PlotPrep, DatasetCams, Filter, Distrib, DatasetAladinEur
import warnings
# If you can, run this example on a GPU, it will be a lot faster.
cuda = torch.cuda.is_available()
device_name = "cuda" if cuda else "cpu"
device = torch.device(device_name)
warnings.filterwarnings("ignore", 'The original MDSI supports only RGB images. The input images were converted to RGB by copying the grey channel 3 times.')
warnings.filterwarnings("ignore", 'The original VSI supports only RGB images. The input images were converted to RGB by copying the grey channel 3 times.')
warnings.filterwarnings("ignore", "torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument.")
warnings.filterwarnings("ignore", 'Matplotlib is currently using agg, which is a non-GUI backend, so cannot show the figure.')
matplotlib.use('Agg')
plt.ioff()
epsilon = sys.float_info.epsilon
# If you can, run this example on a GPU, it will be a lot faster.
cuda = torch.cuda.is_available()
device_name = "cuda" if cuda else "cpu"
device = torch.device(device_name)
cpu = torch.device("cpu")
parser = argparse.ArgumentParser()
# Choosing the version of the model (useful for saving)
parser.add_argument("--version", type=str, default="", help="version of the model")
# # Choosing the type of model we want to use
model_arg = parser.add_argument("--model", type=str, default="RandomForest", help="choosing the type of model we want to use")
# Choosing the type of aerosol we consider
aer_arg = parser.add_argument("--aer", type=str, default='pm', help="type of aerosol we consider (du, bc, om, ss, su, pm). ")
# Choosing the prediction horizon (must be 0 or a multiple of 3 as data is obtained every 3 hours)
hor_arg2 = parser.add_argument("--horizon", type=int, default=0, help="prediction horizon (must be 0 or a multiple of 3 as data is obtained every 3 hours)")
# Choosing if we apply a log to the inputs and targets or not
parser.add_argument("--log", type=bool, default=False, help="apply log scale to the inputs and targets")
# Choosing whether or not to normalize all inputs
parser.add_argument("--normal", type=bool, default=False, help="normalize all inputs")
# Choosing if we display the error in black & white or not
parser.add_argument("--e_bw", type=bool, default=True, help="display the error in black & white or not")
# # Choosing train period (season & year)
# trainPeriod_arg = parser.add_argument("--trainPeriod", type=str, default="sum20", help="choosing train period (season & year or YEAR)")
# # Choosing the test period
# testPeriod_arg = parser.add_argument("--testPeriod", type=str, default="next", help="choosing test period (next or all)")
# Choosing the time step for the test set
parser.add_argument("--testStep", type=int, default=3, help="time step for test set")
# Choosing origin of data
parser.add_argument("--origData", type=str, default="Cams", help="data origin")
# Choosing whether to take the same complete training periods for semi-supervised and supervised
parser.add_argument("--matchTrainPeriods", type=bool, default=False, help="take the same complete training periods for semi-supervised and supervised")
# Dividing the dimensions by a ratio (Cams Forecast)
parser.add_argument("--ratio", type=int, default=1, help="dividing dim by ratio (Cams Forecast)")
# Choosing whether to load a pre-existing model or to train a new one
parser.add_argument("--load", type=bool, default=False, help="choosing whether to load a pre-existing model or to train a new one")
args = parser.parse_args()
print(args)
# if args.testPeriod not in ["next", "all"]:
# raise argparse.ArgumentError(testPeriod_arg, "testPeriod should be next or all. \n "
# "Got "+str(args.testPeriod))
# if args.horizon%3 != 0:
# raise argparse.ArgumentError(hor_arg, "prediction horizon should be 0 or a multiple of 3 as data is obtained every 3 hours. \n"
# "Got "+str(args.horizon))
dict_aer = {"du": (["aermr05", "aermr06"], "duaod550"),
"bc": (["aermr09", "aermr10"], "bcaod550"),
"om": (["aermr07", "aermr08"], "omaod550"),
"ss": (["aermr02", "aermr03"], "ssaod550"),
"su": (["aermr11"], "suaod550"),
"pm": (["pm2p5"], "aod550")}
dict_Data = {'YEAR':{'train':{'sfc':'Cams/cams-global-reanalysis-year_2020-07-01_2021-05-31_levtype_sfc.nc',
'pl':'Cams/cams-global-reanalysis-year_2020-07-01_2021-05-31_levtype_pl.nc'},
'trainGen':{'sfc':'Cams/cams-global-reanalysis_2021-06-01_2021-06-30_levtype_sfc.nc',
'pl':'Cams/cams-global-reanalysis_2021-06-01_2021-06-30_levtype_pl.nc'},
'test':[{'sfc':'Cams/cams_reanalysis_year-testSet_2021-07-01_2022-07-01_levtype_sfc.nc',
'pl':'Cams/cams_reanalysis_year-testSet_2021-07-01_2022-07-01_levtype_pl.nc'}],
'testPeriod':['YEAR'],
'trainStartDate':datetime.datetime(2019, 7, 1), 'trainEndDate':datetime.datetime(2020, 6, 1),
'trainGenStartDate':datetime.datetime(2020, 6, 1), 'trainGenEndDate':datetime.datetime(2020, 7, 1),
'testStartDate':datetime.datetime(2020, 7, 1), 'testEndDate':datetime.datetime(2021, 7, 1),
'testDates' : ["202007010100-202010010000", "202010010100-202101010000", "202101010100-202104010000",
"202104010100-202107010000"],
'trainingDates' : ["201907010100-201910010000", "201910010100-202001010000", "202001010100-202004010000",
"202004010100-202007010000"]}
}
testP = 'YEAR'
dict_Path = dict_Data[testP]
sfc_test_path = dict_Path['test'][0]['sfc']
pl_test_path = dict_Path['test'][0]['pl']
sfc_train_path = dict_Path['train']['sfc']
pl_train_path = dict_Path['train']['pl']
sfc_train_pathGen = dict_Path['trainGen']['sfc']
pl_train_pathGen = dict_Path['trainGen']['pl']
trainStartDate = dict_Path['trainStartDate']
trainEndDate = dict_Path['trainEndDate']
trainGenStartDate = dict_Path['trainGenStartDate']
trainGenEndDate = dict_Path['trainGenEndDate']
testStartDate = dict_Path['testStartDate']
testEndDate = dict_Path['testEndDate']
trainingDates = dict_Path['trainingDates']
testDates = dict_Path['testDates']
# if args.trainPeriod not in dict_Data.keys():
# raise argparse.ArgumentError(trainPeriod_arg, "trainPeriod should be one of [sum20, aut20, win21, spr21, sum21, YEAR]. \n"
# "Got "+str(args.trainPeriod))
args.version = "_" + args.origData + args.version
if args.horizon != 0:
args.version = "_pred"+str(args.horizon)+"h"+args.version
if args.log:
args.version = "_log"+args.version
args.version = "_"+args.aer+args.version
if args.matchTrainPeriods:
args.version = "_" + 'M' + args.version
else:
args.version = "_"+args.version
name_model = "ML-regression_"+args.model+args.version
if args.origData=="Cams":
if args.matchTrainPeriods:
train_set = DatasetCams(pathSfcList=[sfc_train_pathGen, sfc_train_path],
pathPlList=[pl_train_pathGen, pl_train_path],
var=args.aer, horizon=args.horizon, log=args.log, normal=args.normal)
else:
train_set = DatasetCams(pathSfcList=[sfc_train_path],
pathPlList=[pl_train_path],
var=args.aer, horizon=args.horizon, log=args.log, normal=args.normal)
elif args.origData == "Al-EUR":
if args.matchTrainPeriods:
train_set = DatasetAladinEur(list_dates=trainingDates, end_date=None, log=args.log, normal=args.normal)
else:
train_set = DatasetAladinEur(list_dates=trainingDates, end_date=trainEndDate, log=args.log, normal=args.normal)
train_loader = DataLoader(train_set, batch_size=1, shuffle=True)
MSE_loss = torch.nn.MSELoss(reduction='none')
pPrep = PlotPrep()
if args.log:
dis = Distrib()
if not args.load:
N = len(train_loader)
L = train_set.height * train_set.width
x = cp.zeros((N, L))
y = cp.zeros((N, L))
for idx_batch, (inputTrain_i, targetTrain_i) in enumerate(train_loader):
# if idx_batch >= args.N:
# break
inputTrain_i = inputTrain_i.float()
targetTrain_i = targetTrain_i.float()
inputTrain_i = cp.asarray(inputTrain_i)
targetTrain_i = cp.asarray(targetTrain_i)
x_i = cp.ravel(inputTrain_i)
y_i = cp.ravel(targetTrain_i)
x[idx_batch] = x_i
y[idx_batch] = y_i
x = x.reshape(-1, 1)
y = cp.ravel(y)
with config_context(array_api_dispatch=True):
if args.model == "RandomForest":
model = RandomForestRegressor(random_state=0, verbose=True)
else:
raise argparse.ArgumentError(model_arg, "unkown model")
os.makedirs("savedModels/" + name_model + "/", exist_ok=True)
print("\n Starting training")
model = model.fit(X=x.get(), y=y.get())
print("\n End of training")
pickle.dump(model, open("savedModels/" + name_model + "/model.pkl", 'wb'))
else:
model = pickle.load(open("savedModels/" + name_model + "/model.pkl", 'rb'))
p = model.predict
losses = []
times = []
r_losses = []
c_losses = []
MBE = []
MBE_R = []
# Ds = []
sim_idces = []
def test(sfc_test_path, pl_test_path, testDates, origData):
# Beginning test
if origData=="Cams":
test_set = DatasetCams(pathSfcList=[sfc_test_path],
pathPlList=[pl_test_path],
var=args.aer, horizon=args.horizon, log=args.log, normal=args.normal)
maxT = test_set.maxTest
minT = test_set.minTest
elif origData == "Al-EUR":
test_set = DatasetAladinEur(list_dates=testDates, end_date=None, step=args.testStep, normal=args.normal)
maxT = test_set.maxTest
minT = test_set.minTest
newMax = (maxT - minT)
simLoss = FSIMLoss(chromatic=False, data_range=newMax)
test_loader = DataLoader(test_set, batch_size=1, shuffle=False)
os.makedirs("results/" + name_model + "/", exist_ok=True)
# os.makedirs("results/" + name_model + "/" + testPeriod + "/", exist_ok=True)
for idx_batch_test, (input_, target) in enumerate(test_loader):
input_ = input_.float()
target = target.float()
input_ = input_.to(device)
target = target.to(device)
input_ = input_[0]
target = target[0]
path_save_img = "results/" + name_model + "/results_test_img" + str(idx_batch_test + 1) + ".pdf"
path_save_img2 = "results/" + name_model + "/error_test_img" + str(idx_batch_test + 1) + ".pdf"
start_time = time.time()
input0 = cp.asarray(input_)
input1 = input0.reshape(-1, 1)
with config_context(array_api_dispatch=True):
predic0 = p(input1.get())
predic = predic0.reshape(input0.shape)
pred_time = time.time() - start_time
times.append(pred_time)
predic = torch.from_numpy(predic)
predic = predic.to(device)
if args.log:
input_ = dis.reverse(input_)
target = dis.reverse(target)
predic = dis.reverse(predic)
predic = Filter(predic)
target = Filter(target)
mt = torch.mean(target).item()
loss = torch.mean(torch.sqrt(MSE_loss(predic, target))).item()
relative_loss = (loss / mt)
losses.append(loss)
r_losses.append(relative_loss)
mbe = torch.mean(predic - target).item()
mbe_r = (mbe / mt)
# d1 = torch.sum(torch.pow((predic - target), 2)).item()
# d2 = torch.sum(torch.pow((torch.abs(predic - mt) + torch.abs(target - mt)), 2)).item()
# d = 1 - d1/d2
# Ds.append(d)
predF1 = predic - torch.min(predic)
predF = ((predF1 > 0) * (predF1 < newMax)) * predF1 + (predF1 >= newMax) * newMax
targetF1 = target - minT
targetF = ((targetF1 > 0) * (targetF1 < newMax)) * targetF1 + (targetF1 >= newMax) * newMax
sim_idx = simLoss(predF[None, :], targetF[None, :]).item()
sim_idces.append(sim_idx)
MBE.append(mbe)
MBE_R.append(mbe_r)
pred = pPrep(predic, norm=True)
targ = pPrep(target, norm=True)
inp = pPrep(input_, norm=True)
plt.suptitle("Test results.")
# Each file contains two images : one for the prediction and one for the target
ax0 = plt.subplot(2, 2, 1)
ax0.imshow(inp)
ax0.set_title("Input (AOD)")
ax1 = plt.subplot(2, 2, 3)
ax1.imshow(pred)
ax1.set_title(f"Prediction. Test error : {loss:1.3f}\u03BCg/m\N{superscript three} or {relative_loss*100:1.3f}%."
f" \n MBE : {mbe:1.3f}\u03BCg/m\N{superscript three} or {mbe_r*100:1.3f}%."
f" \n sim-index loss : {sim_idx*100:1.3f}%.")
ax2 = plt.subplot(2, 2, 4)
ax2.imshow(targ)
ax2.set_title("Target.")
plt.savefig(path_save_img)
plt.close()
q1, q2, q3 = torch.quantile(target, 0.25), torch.quantile(target, 0.5), torch.quantile(target, 0.75)
ct = torch.where(target > q1, 1, 0) + torch.where(target > q2, 1, 0) + torch.where(target > q3, 1, 0)
cpred = torch.where(predic > q1, 1, 0) + torch.where(predic > q2, 1, 0) + torch.where(predic > q3, 1, 0)
ce = torch.mean(torch.abs(ct - cpred).float()).item()
c_losses.append(ce)
ct = pPrep(ct/4)
cpred = pPrep(cpred/4)
error = torch.abs(target - predic)
error = pPrep(error, error=True, bw=args.e_bw, norm=True)
plt.suptitle("Error visualisation.")
ax0 = plt.subplot(2, 2, 1)
if args.e_bw:
ax0.imshow(error, cmap='gray')
else:
ax0.imshow(error)
ax0.set_title(f"Test error : {loss:1.3f}\u03BCg/m\N{superscript three} or {relative_loss*100:1.3f}%.")
ax1 = plt.subplot(2, 2, 3)
ax1.imshow(cpred)
ax1.set_title(f"Segmented prediction. \n Classification error : {ce:1.3f}.")
ax2 = plt.subplot(2, 2, 4)
ax2.imshow(ct)
ax2.set_title("Segmented target.")
plt.savefig(path_save_img2)
plt.close()
dict_perf = {"Average inference time (s)": np.mean(times),
"STD of inference time (s)": np.std(times),
"Average prediction error (\u03BCg/m\N{superscript three})": np.mean(losses),
"STD of prediction error (\u03BCg/m\N{superscript three})": np.std(losses),
"Average relative error (%)": 100 * np.mean(r_losses),
"STD of relative error (%)": 100 * np.std(r_losses),
"Average classification error": np.mean(c_losses),
"STD of classification error": np.std(c_losses),
"Average bias error (\u03BCg/m\N{superscript three})": np.mean(MBE),
"STD of bias error (\u03BCg/m\N{superscript three})": np.std(MBE),
"Average relative bias error (%)": 100 * np.mean(MBE_R),
"STD of relative bias error (%)": 100 * np.std(MBE_R),
# "Average index of agreement (%)": 100 * np.mean(Ds),
# "STD of index of agreement (%)": 100 * np.std(Ds),
"Average sim-index loss (%)": 100 * np.mean(sim_idces),
"STD of sim-index loss (%)": 100 * np.std(sim_idces)
}
with open("results/" + name_model + "/perf_test.txt", "w+") as f:
json.dump(dict_perf, f, indent=2)
f.close()
return dict_perf
# End test
# if args.testPeriod != "all":
perf = test(sfc_test_path, pl_test_path, testDates, args.origData)
# else:
# for i in range(len(dict_Path['test'])):
# perfPer = test(dict_Path['test'][i]['sfc'], dict_Path['test'][i]['pl'], dict_Path['testPeriod'][i])
# Saving the arguments and the equation
os.makedirs("savedModels/" + name_model + "/", exist_ok=True)
with open("savedModels/" + name_model + "/arguments.txt", "w+") as f:
json.dump(args.__dict__, f, indent=2)
f.close()
\ No newline at end of file
Code corresponding to the paper : (include DOI)
Files kriging.py, ML.py and LinearPred.py correspond to baseline methods used as comparison.
File Aod2SfcAer_2DED.py is used for all experiments on models using several variables.
command_Aladin.txt and command_Cams.txt contain all commands for experiments on the Aladin and Cams datasets, respectively. Less experiments were realised on Aladin than on Cams, as they take a very long time.
Regarding datasets paths, the Aladin dataset is considered to be located in the AeroClim2 subfolder of the current folder, and the Cams dataset in the Cams subfolder. However they could not be included in this repository because of the large size of the corresponding files. The data can be found at : (include link/DOI)
########## Baseline
nohup python3 LinearPred.py --D=3 --log=True --origData=Al-EUR >>stdout.txt 2>>stderr.txt &
nohup python3 ML.py --log=True --origData=Al-EUR >>stdout.txt 2>>stderr.txt &
nohup python3 kriging.py --type=ordinary --variogram=hole-effect --bc_type=random --prob_bc=5 --log=True --origData=Al-EUR >>stdout.txt 2>>stderr.txt &
############## Cams best results
nohup python3 Aod2SfcAer_2DED.py --skip_co=True --linear_layer=True --aer=pm --testPeriod=next --trainPeriod=YEAR --log=True --FSIMloss=True --wind=True --extract_norm_wind=True --humidity=True --pressure=True --temperature=True --angstrom=True --origData=Al-EUR >>stdout.txt 2>>stderr.txt &
nohup python3 Aod2SfcAer_2DED.py --skip_co=True --linear_layer=True --aer=pm --testPeriod=next --trainPeriod=YEAR --log=True --FSIMloss=True --wind=True --extract_norm_wind=True --humidity=True --pressure=True --temperature=True --origData=Al-EUR >>stdout.txt 2>>stderr.txt & disown
nohup python3 Aod2SfcAer_2DED.py --skip_co=True --linear_layer=True --aer=pm --testPeriod=next --trainPeriod=YEAR --log=True --FSIMloss=True --wind=True --extract_norm_wind=True --humidity=True --pressure=True --temperature=True --angstrom=True --origData=Al-EUR --fusion='DF' >>stdout.txt 2>>stderr.txt & disown
######## Hybrid
nohup python3 Aod2SfcAer_2DED.py --skip_co=True --linear_layer=True --aer=pm --testPeriod=next --trainPeriod=YEAR --log=True --FSIMloss=True --wind=True --extract_norm_wind=True --humidity=True --pressure=True --temperature=True --angstrom=True --origData=Al-EUR --fusion='Hybrid1F' >>stdout.txt 2>>stderr.txt & disown
nohup python3 Aod2SfcAer_2DED.py --skip_co=True --linear_layer=True --aer=pm --testPeriod=next --trainPeriod=YEAR --log=True --FSIMloss=True --wind=True --extract_norm_wind=True --humidity=True --pressure=True --temperature=True --angstrom=True --origData=Al-EUR --fusion='Hybrid2F' >>stdout.txt 2>>stderr.txt & disown
This diff is collapsed.
from .dataset_Cams import DatasetCams, DatasetCamsGAN, DatasetCamsHybrid
from .transforms import BCnaive, BCrandom, BCrandomLand, BCfromFile, ApplyBC, PlotPrep, ApplyBC, MRtoConc3D, MRtoConc2D, BCloss, Filter, FilterAod, MultAod, Distrib, PrepKriDataFromSparse, ConvPM, AirQualityQuantif
from .dataset_kri import DatasetCamsKri, DatasetAladinEurKri
from .dataset_AeroClim import DatasetAladinEur, DatasetAladinEurGAN
\ No newline at end of file
File added
File added
File added
File added
File added
File added
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment