publish branch

377f6880 · Matthieu Dabrowski · 377f6880 · 377f6880 · 377f6880 · 377f6880
Commit 377f6880 authored 1 year ago by Matthieu Dabrowski
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
+# Default ignored files
+/shelf/
+/workspace.xml
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="$USER_HOME$/anaconda3" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="$USER_HOME$/anaconda3" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/multiVariateModels.iml" filepath="$PROJECT_DIR$/.idea/multiVariateModels.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/multiVariateModels.iml
+++ b/.idea/multiVariateModels.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+</module>
\ No newline at end of file
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$/.." vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
--- a/Aod2SfcAer_2DED.py
+++ b/Aod2SfcAer_2DED.py
--- a/LinearPred.py
+++ b/LinearPred.py
+import argparse
+import json
+import os
+import sys
+import numpy as np
+import matplotlib
+# Import libraries
+import matplotlib.pyplot as plt
+# PyTorch's libraries and modules
+import netCDF4
+import torch
+from torch.utils.data import DataLoader
+from piq import VSILoss, MDSILoss, MultiScaleGMSDLoss, FSIMLoss
+import cupy as cp
+import datetime
+import time
+import pickle
+from sympy import S, symbols, printing
+from dataset_generation import PlotPrep, DatasetCams, Filter, Distrib, DatasetAladinEur
+import warnings
+warnings.filterwarnings("ignore", 'The original MDSI supports only RGB images. The input images were converted to RGB by copying the grey channel 3 times.')
+warnings.filterwarnings("ignore", 'The original VSI supports only RGB images. The input images were converted to RGB by copying the grey channel 3 times.')
+warnings.filterwarnings("ignore", "torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument.")
+warnings.filterwarnings("ignore", 'Matplotlib is currently using agg, which is a non-GUI backend, so cannot show the figure.')
+
+matplotlib.use('Agg')
+plt.ioff()
+epsilon = sys.float_info.epsilon
+# If you can, run this example on a GPU, it will be a lot faster.
+cuda = torch.cuda.is_available()
+device_name = "cuda" if cuda else "cpu"
+device = torch.device(device_name)
+
+parser = argparse.ArgumentParser()
+# Choosing the version of the model (useful for saving)
+parser.add_argument("--version", type=str, default="", help="version of the model")
+# Choose degree of interpolation
+parser.add_argument("--D", type=int, default=1, help="degree of the interpolation")
+# Choose number of samples for 'training'
+parser.add_argument("--N", type=int, default=10, help="number of samples for 'training'")
+# Choosing the type of aerosol we consider
+aer_arg = parser.add_argument("--aer", type=str, default='pm', help="type of aerosol we consider (du, bc, om, ss, su, pm). ")
+# Choosing the prediction horizon (must be 0 or a multiple of 3 as data is obtained every 3 hours)
+hor_arg2 = parser.add_argument("--horizon", type=int, default=0, help="prediction horizon (must be 0 or a multiple of 3 as data is obtained every 3 hours)")
+# Choosing whether or not to normalize all inputs
+parser.add_argument("--normal", type=bool, default=False, help="normalize all inputs")
+# Choosing if we apply a log to the inputs and targets or not
+parser.add_argument("--log", type=bool, default=False, help="apply log scale to the inputs and targets")
+# Choosing if we display the error in black & white or not
+parser.add_argument("--e_bw", type=bool, default=True, help="display the error in black & white or not")
+# # Choosing train period (season & year)
+# trainPeriod_arg = parser.add_argument("--trainPeriod", type=str, default="sum20", help="choosing train period (season & year or YEAR)")
+# # Choosing the test period
+# testPeriod_arg = parser.add_argument("--testPeriod", type=str, default="next", help="choosing test period (next or all)")
+# Choosing the time step for the test set
+parser.add_argument("--testStep", type=int, default=3, help="time step for test set")
+# Choosing origin of data
+parser.add_argument("--origData", type=str, default="Cams", help="data origin")
+# Choosing whether to take the same complete training periods for semi-supervised and supervised
+parser.add_argument("--matchTrainPeriods", type=bool, default=False, help="take the same complete training periods for semi-supervised and supervised")
+# Dividing the dimensions by a ratio (Cams Forecast)
+parser.add_argument("--ratio", type=int, default=1, help="dividing dim by ratio (Cams Forecast)")
+# Choosing whether to load a pre-existing model or to train a new one
+parser.add_argument("--load", type=bool, default=False, help="choosing whether to load a pre-existing model or to train a new one")
+args = parser.parse_args()
+print(args)
+
+# if args.testPeriod not in ["next", "all"]:
+#     raise argparse.ArgumentError(testPeriod_arg, "testPeriod should be next or all. \n "
+#                                                 "Got "+str(args.testPeriod))
+
+dict_aer = {"du": (["aermr05", "aermr06"], "duaod550"),
+            "bc": (["aermr09", "aermr10"], "bcaod550"),
+            "om": (["aermr07", "aermr08"], "omaod550"),
+            "ss": (["aermr02", "aermr03"], "ssaod550"),
+            "su": (["aermr11"], "suaod550"),
+            "pm": (["pm2p5"], "aod550")}
+
+dict_Data = {'YEAR':{'train':{'sfc':'Cams/cams-global-reanalysis-year_2020-07-01_2021-05-31_levtype_sfc.nc',
+                               'pl':'Cams/cams-global-reanalysis-year_2020-07-01_2021-05-31_levtype_pl.nc'},
+                     'trainGen':{'sfc':'Cams/cams-global-reanalysis_2021-06-01_2021-06-30_levtype_sfc.nc',
+                                   'pl':'Cams/cams-global-reanalysis_2021-06-01_2021-06-30_levtype_pl.nc'},
+                     'test':[{'sfc':'Cams/cams_reanalysis_year-testSet_2021-07-01_2022-07-01_levtype_sfc.nc',
+                                'pl':'Cams/cams_reanalysis_year-testSet_2021-07-01_2022-07-01_levtype_pl.nc'}],
+                     'testPeriod':['YEAR'],
+                      'trainStartDate':datetime.datetime(2019, 7, 1), 'trainEndDate':datetime.datetime(2020, 6, 1),
+                      'trainGenStartDate':datetime.datetime(2020, 6, 1), 'trainGenEndDate':datetime.datetime(2020, 7, 1),
+                      'testStartDate':datetime.datetime(2020, 7, 1), 'testEndDate':datetime.datetime(2021, 7, 1),
+    'testDates' : ["202007010100-202010010000", "202010010100-202101010000", "202101010100-202104010000",
+                 "202104010100-202107010000"],
+'trainingDates' : ["201907010100-201910010000", "201910010100-202001010000", "202001010100-202004010000",
+                 "202004010100-202007010000"]}
+             }
+
+testP = 'YEAR'
+dict_Path = dict_Data[testP]
+sfc_test_path = dict_Path['test'][0]['sfc']
+pl_test_path = dict_Path['test'][0]['pl']
+
+sfc_train_path = dict_Path['train']['sfc']
+pl_train_path = dict_Path['train']['pl']
+
+sfc_train_pathGen = dict_Path['trainGen']['sfc']
+pl_train_pathGen = dict_Path['trainGen']['pl']
+
+trainStartDate = dict_Path['trainStartDate']
+trainEndDate = dict_Path['trainEndDate']
+trainGenStartDate = dict_Path['trainGenStartDate']
+trainGenEndDate = dict_Path['trainGenEndDate']
+testStartDate = dict_Path['testStartDate']
+testEndDate = dict_Path['testEndDate']
+
+trainingDates = dict_Path['trainingDates']
+testDates = dict_Path['testDates']
+
+# if args.trainPeriod not in dict_Data.keys():
+#     raise argparse.ArgumentError(trainPeriod_arg, "trainPeriod should be one of [sum20, aut20, win21, spr21, sum21, YEAR]. \n"
+#                                                   "Got "+str(args.trainPeriod))
+
+args.version = "_" + args.origData + args.version
+if args.horizon != 0:
+    args.version = "_pred"+str(args.horizon)+"h"+args.version
+if args.log:
+    args.version = "_log"+args.version
+args.version = "_"+args.aer+args.version
+if args.matchTrainPeriods:
+    args.version = "_" + 'Match' + args.version
+else:
+    args.version = "_" +args.version
+name_model = "interp_deg"+str(args.D)+args.version
+
+if args.origData == "Cams":
+    if args.matchTrainPeriods:
+        train_set = DatasetCams(pathSfcList=[sfc_train_pathGen, sfc_train_path],
+                                                pathPlList=[pl_train_pathGen, pl_train_path],
+                                                var=args.aer, horizon=args.horizon, log=args.log, normal=args.normal)
+    else:
+        train_set = DatasetCams(pathSfcList=[sfc_train_path],
+                                            pathPlList=[pl_train_path],
+                                            var=args.aer, horizon=args.horizon, log=args.log, normal=args.normal)
+elif args.origData == "Al-EUR":
+    if args.matchTrainPeriods:
+        train_set = DatasetAladinEur(list_dates=trainingDates, end_date=None, log=args.log, normal=args.normal)
+    else:
+        train_set = DatasetAladinEur(list_dates=trainingDates, end_date=trainEndDate, log=args.log, normal=args.normal)
+    train_set = DatasetCamsForecast(type="train", end_date=endSupTrainDate, log=args.log, horizon1=args.horizon1, horizon2=args.horizon2, aer=args.aer, inputTarget=True, normal=args.normal, ratio=args.ratio)
+train_loader = DataLoader(train_set, batch_size=1, shuffle=True)
+xS = symbols("x")
+
+MSE_loss = torch.nn.MSELoss(reduction='none')
+
+pPrep = PlotPrep()
+if args.log:
+    dis = Distrib()
+
+if not args.load:
+    x = cp.array([])
+    y = cp.array([])
+
+    for idx_batch, (inputTrain_i, targetTrain_i) in enumerate(train_loader):
+
+        if idx_batch >= args.N:
+            break
+
+        inputTrain_i = inputTrain_i.float()
+        targetTrain_i = targetTrain_i.float()
+        if inputTrain_i.shape != targetTrain_i.shape:
+            raise Exception("inputTrain_i and targetTrain_i have different shapes. inputTrain_i.shape : ", inputTrain_i.shape, " targetTrain_i.shape : ", targetTrain_i.shape)
+
+        inputTrain_i = cp.asarray(inputTrain_i)
+        targetTrain_i = cp.asarray(targetTrain_i)
+        if inputTrain_i.shape != targetTrain_i.shape:
+            raise Exception("inputTrain_i and targetTrain_i have different shapes. inputTrain_i.shape : ", inputTrain_i.shape, " targetTrain_i.shape : ", targetTrain_i.shape)
+
+        x_i = cp.ravel(inputTrain_i)
+        y_i = cp.ravel(targetTrain_i)
+        if x_i.shape != y_i.shape:
+            raise Exception("x_i and y_i have different shapes. x_i.shape : ", x_i.shape, " y_i.shape : ", y_i.shape)
+
+        x = cp.concatenate((x, x_i))
+        y = cp.concatenate((y, y_i))
+        if x.shape != y.shape:
+            raise Exception("x and y have different shapes. x.shape : ", x.shape, " y.shape : ", y.shape)
+
+    model = cp.polyfit(x, y, args.D)
+    os.makedirs("savedModels/" + name_model + "/", exist_ok=True)
+    pickle.dump(model, open("savedModels/" + name_model + "/model.pkl", 'wb'))
+else:
+    model = pickle.load(open("savedModels/" + name_model + "/model.pkl", 'rb'))
+deg = args.D
+c = 0
+while (c < len(model)) and (model[c]==0):
+    deg = deg-1
+    c = c+1
+p = cp.poly1d(model)
+eq_model = sum(S("{:6.2f}".format(v)) * xS ** i for i, v in enumerate(model[::-1]))
+eq_model_print = printing.latex(eq_model)
+
+losses = []
+times = []
+r_losses = []
+c_losses = []
+MBE = []
+MBE_R = []
+# Ds = []
+sim_idces = []
+
+def test(sfc_test_path, pl_test_path, testDates, origData):
+    # Beginning test
+    if origData == "Cams":
+        test_set = DatasetCams(pathSfcList=[sfc_test_path],
+                    pathPlList=[pl_test_path],
+                    var=args.aer, horizon=args.horizon, log=args.log, normal=args.normal)
+        newMax = (test_set.maxTest - test_set.minTest)
+    elif origData == "Al-EUR":
+        test_set = DatasetAladinEur(list_dates=testDates, end_date=None, step=args.testStep,
+                                        log=args.log, normal=args.normal)
+        newMax = (test_set.maxTest - test_set.minTest)
+    simLoss = FSIMLoss(chromatic=False, data_range=newMax)
+    test_loader = DataLoader(test_set, batch_size=1, shuffle=False)
+    os.makedirs("results/" + name_model + "/", exist_ok=True)
+    # os.makedirs("results/" + name_model + "/" + testPeriod + "/", exist_ok=True)
+    for idx_batch_test, (input_, target) in enumerate(test_loader):
+
+        input_ = input_.float()
+        target = target.float()
+
+        input_ = input_.to(device)
+        target = target.to(device)
+        input_ = input_[0]
+        target = target[0]
+
+        path_save_img = "results/" + name_model + "/results_test_img" + str(idx_batch_test + 1) + ".pdf"
+        path_save_img2 = "results/" + name_model + "/error_test_img" + str(idx_batch_test + 1) + ".pdf"
+        start_time = time.time()
+        input1 = cp.asarray(input_)
+        predic = p(input1)
+        pred_time = time.time() - start_time
+        times.append(pred_time)
+        predic = torch.from_numpy(cp.asnumpy(predic))
+        predic = predic.to(device)
+
+        if args.log:
+            input_ = dis.reverse(input_)
+            target = dis.reverse(target)
+            predic = dis.reverse(predic)
+
+        predic = Filter(predic)
+        target = Filter(target)
+        mt = torch.mean(target).item()
+        loss = torch.mean(torch.sqrt(MSE_loss(predic, target))).item()
+        relative_loss = (loss / mt)
+        losses.append(loss)
+        r_losses.append(relative_loss)
+        mbe = torch.mean(predic - target).item()
+        mbe_r = (mbe / mt)
+        # d1 = torch.sum(torch.pow((predic - target), 2)).item()
+        # d2 = torch.sum(torch.pow((torch.abs(predic - mt) + torch.abs(target - mt)), 2)).item()
+        # d = 1 - d1/d2
+        # Ds.append(d)
+        predF1 = predic - torch.min(predic)
+        predF = ((predF1 > 0) * (predF1 < newMax)) * predF1 + (predF1 >= newMax) * newMax
+        targetF1 = target - test_set.minmaxs['Conc'][0]
+        targetF = ((targetF1 > 0) * (targetF1 < newMax)) * targetF1 + (targetF1 >= newMax) * newMax
+        sim_idx = simLoss(predF[None, :], targetF[None, :]).item()
+        sim_idces.append(sim_idx)
+        MBE.append(mbe)
+        MBE_R.append(mbe_r)
+        pred = pPrep(predic, norm=True)
+        targ = pPrep(target, norm=True)
+        inp = pPrep(input_, norm=True)
+        if args.D < 5:
+            plt.suptitle("Test results. \n "+eq_model_print)
+        else:
+            plt.suptitle("Test results. \n Degree is "+str(deg))
+        # Each file contains two images : one for the prediction and one for the target
+        ax0 = plt.subplot(2, 2, 1)
+        ax0.imshow(inp)
+        ax0.set_title("Input (AOD)")
+        ax1 = plt.subplot(2, 2, 3)
+        ax1.imshow(pred)
+        ax1.set_title(f"Prediction. Test error : {loss:1.3f}\u03BCg/m\N{superscript three} or {relative_loss*100:1.3f}%."
+                      f" \n MBE : {mbe:1.3f}\u03BCg/m\N{superscript three} or {mbe_r*100:1.3f}%."
+                      f" \n sim-index loss : {sim_idx*100:1.3f}%.")
+        ax2 = plt.subplot(2, 2, 4)
+        ax2.imshow(targ)
+        ax2.set_title("Target.")
+        plt.savefig(path_save_img)
+        plt.close()
+
+        q1, q2, q3 = torch.quantile(target, 0.25), torch.quantile(target, 0.5), torch.quantile(target, 0.75)
+        ct = torch.where(target > q1, 1, 0) + torch.where(target > q2, 1, 0) + torch.where(target > q3, 1, 0)
+        cpred = torch.where(predic > q1, 1, 0) + torch.where(predic > q2, 1, 0) + torch.where(predic > q3, 1, 0)
+        ce = torch.mean(torch.abs(ct - cpred).float()).item()
+        c_losses.append(ce)
+        ct = pPrep(ct/4)
+        cpred = pPrep(cpred/4)
+        error = torch.abs(target - predic)
+        error = pPrep(error, error=True, bw=args.e_bw, norm=True)
+        if args.D < 5:
+            plt.suptitle("Error visualisation. \n " + eq_model_print)
+        else:
+            plt.suptitle("Error visualisation. \n Degree is " + str(deg))
+        ax0 = plt.subplot(2, 2, 1)
+        if args.e_bw:
+            ax0.imshow(error, cmap='gray')
+        else:
+            ax0.imshow(error)
+        ax0.set_title(f"Test error : {loss:1.3f}\u03BCg/m\N{superscript three} or {relative_loss*100:1.3f}%.")
+        ax1 = plt.subplot(2, 2, 3)
+        ax1.imshow(cpred)
+        ax1.set_title(f"Segmented prediction. \n Classification error : {ce:1.3f}.")
+        ax2 = plt.subplot(2, 2, 4)
+        ax2.imshow(ct)
+        ax2.set_title("Segmented target.")
+        plt.savefig(path_save_img2)
+        plt.close()
+
+    dict_perf = {"Average inference time (s)": np.mean(times),
+                 "STD of inference time (s)": np.std(times),
+                 "Average prediction error (\u03BCg/m\N{superscript three})": np.mean(losses),
+                 "STD of prediction error  (\u03BCg/m\N{superscript three})": np.std(losses),
+                 "Average relative error (%)": 100 * np.mean(r_losses),
+                 "STD of relative error (%)": 100 * np.std(r_losses),
+                 "Average classification error": np.mean(c_losses),
+                 "STD of classification error": np.std(c_losses),
+                 "Average bias error (\u03BCg/m\N{superscript three})": np.mean(MBE),
+                 "STD of bias error (\u03BCg/m\N{superscript three})": np.std(MBE),
+                 "Average relative bias error (%)": 100 * np.mean(MBE_R),
+                 "STD of relative bias error (%)": 100 * np.std(MBE_R),
+                 # "Average index of agreement (%)": 100 * np.mean(Ds),
+                 # "STD of index of agreement (%)": 100 * np.std(Ds),
+                 "Average sim-index loss (%)": 100 * np.mean(sim_idces),
+                 "STD of sim-index loss (%)": 100 * np.std(sim_idces)
+                 }
+    with open("results/" + name_model + "/perf_test.txt", "w+") as f:
+        json.dump(dict_perf, f, indent=2)
+        f.close()
+    return dict_perf
+    # End test
+
+perf = test(sfc_test_path, pl_test_path, testDates, args.origData)
+
+# Saving the arguments and the equation
+os.makedirs("savedModels/" + name_model + "/", exist_ok=True)
+with open("savedModels/" + name_model + "/arguments.txt", "w+") as f:
+    json.dump(args.__dict__, f, indent=2)
+    f.close()
\ No newline at end of file
--- a/ML.py
+++ b/ML.py
+import argparse
+import json
+import os
+import sys
+import matplotlib
+# Import libraries
+import matplotlib.pyplot as plt
+# PyTorch's libraries and modules
+import netCDF4
+import sklearn.linear_model
+import torch
+from torch.utils.data import DataLoader
+from piq import VSILoss, MDSILoss, MultiScaleGMSDLoss, FSIMLoss
+import numpy as np
+import cupy as cp
+import time
+import pickle
+import datetime
+from sklearn.ensemble import RandomForestRegressor
+from sklearn import config_context
+from dataset_generation import PlotPrep, DatasetCams, Filter, Distrib, DatasetAladinEur
+import warnings
+# If you can, run this example on a GPU, it will be a lot faster.
+cuda = torch.cuda.is_available()
+device_name = "cuda" if cuda else "cpu"
+device = torch.device(device_name)
+warnings.filterwarnings("ignore", 'The original MDSI supports only RGB images. The input images were converted to RGB by copying the grey channel 3 times.')
+warnings.filterwarnings("ignore", 'The original VSI supports only RGB images. The input images were converted to RGB by copying the grey channel 3 times.')
+warnings.filterwarnings("ignore", "torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument.")
+warnings.filterwarnings("ignore", 'Matplotlib is currently using agg, which is a non-GUI backend, so cannot show the figure.')
+
+matplotlib.use('Agg')
+plt.ioff()
+epsilon = sys.float_info.epsilon
+# If you can, run this example on a GPU, it will be a lot faster.
+cuda = torch.cuda.is_available()
+device_name = "cuda" if cuda else "cpu"
+device = torch.device(device_name)
+cpu = torch.device("cpu")
+
+parser = argparse.ArgumentParser()
+# Choosing the version of the model (useful for saving)
+parser.add_argument("--version", type=str, default="", help="version of the model")
+# # Choosing the type of model we want to use
+model_arg = parser.add_argument("--model", type=str, default="RandomForest", help="choosing the type of model we want to use")
+# Choosing the type of aerosol we consider
+aer_arg = parser.add_argument("--aer", type=str, default='pm', help="type of aerosol we consider (du, bc, om, ss, su, pm). ")
+# Choosing the prediction horizon (must be 0 or a multiple of 3 as data is obtained every 3 hours)
+hor_arg2 = parser.add_argument("--horizon", type=int, default=0, help="prediction horizon (must be 0 or a multiple of 3 as data is obtained every 3 hours)")
+# Choosing if we apply a log to the inputs and targets or not
+parser.add_argument("--log", type=bool, default=False, help="apply log scale to the inputs and targets")
+# Choosing whether or not to normalize all inputs
+parser.add_argument("--normal", type=bool, default=False, help="normalize all inputs")
+# Choosing if we display the error in black & white or not
+parser.add_argument("--e_bw", type=bool, default=True, help="display the error in black & white or not")
+# # Choosing train period (season & year)
+# trainPeriod_arg = parser.add_argument("--trainPeriod", type=str, default="sum20", help="choosing train period (season & year or YEAR)")
+# # Choosing the test period
+# testPeriod_arg = parser.add_argument("--testPeriod", type=str, default="next", help="choosing test period (next or all)")
+# Choosing the time step for the test set
+parser.add_argument("--testStep", type=int, default=3, help="time step for test set")
+# Choosing origin of data
+parser.add_argument("--origData", type=str, default="Cams", help="data origin")
+# Choosing whether to take the same complete training periods for semi-supervised and supervised
+parser.add_argument("--matchTrainPeriods", type=bool, default=False, help="take the same complete training periods for semi-supervised and supervised")
+# Dividing the dimensions by a ratio (Cams Forecast)
+parser.add_argument("--ratio", type=int, default=1, help="dividing dim by ratio (Cams Forecast)")
+# Choosing whether to load a pre-existing model or to train a new one
+parser.add_argument("--load", type=bool, default=False, help="choosing whether to load a pre-existing model or to train a new one")
+args = parser.parse_args()
+print(args)
+
+# if args.testPeriod not in ["next", "all"]:
+#     raise argparse.ArgumentError(testPeriod_arg, "testPeriod should be next or all. \n "
+#                                                 "Got "+str(args.testPeriod))
+
+# if args.horizon%3 != 0:
+#     raise argparse.ArgumentError(hor_arg, "prediction horizon should be 0 or a multiple of 3 as data is obtained every 3 hours. \n"
+#                                           "Got "+str(args.horizon))
+
+dict_aer = {"du": (["aermr05", "aermr06"], "duaod550"),
+            "bc": (["aermr09", "aermr10"], "bcaod550"),
+            "om": (["aermr07", "aermr08"], "omaod550"),
+            "ss": (["aermr02", "aermr03"], "ssaod550"),
+            "su": (["aermr11"], "suaod550"),
+            "pm": (["pm2p5"], "aod550")}
+
+dict_Data = {'YEAR':{'train':{'sfc':'Cams/cams-global-reanalysis-year_2020-07-01_2021-05-31_levtype_sfc.nc',
+                               'pl':'Cams/cams-global-reanalysis-year_2020-07-01_2021-05-31_levtype_pl.nc'},
+                     'trainGen':{'sfc':'Cams/cams-global-reanalysis_2021-06-01_2021-06-30_levtype_sfc.nc',
+                                   'pl':'Cams/cams-global-reanalysis_2021-06-01_2021-06-30_levtype_pl.nc'},
+                     'test':[{'sfc':'Cams/cams_reanalysis_year-testSet_2021-07-01_2022-07-01_levtype_sfc.nc',
+                                'pl':'Cams/cams_reanalysis_year-testSet_2021-07-01_2022-07-01_levtype_pl.nc'}],
+                     'testPeriod':['YEAR'],
+                      'trainStartDate':datetime.datetime(2019, 7, 1), 'trainEndDate':datetime.datetime(2020, 6, 1),
+                      'trainGenStartDate':datetime.datetime(2020, 6, 1), 'trainGenEndDate':datetime.datetime(2020, 7, 1),
+                      'testStartDate':datetime.datetime(2020, 7, 1), 'testEndDate':datetime.datetime(2021, 7, 1),
+    'testDates' : ["202007010100-202010010000", "202010010100-202101010000", "202101010100-202104010000",
+                 "202104010100-202107010000"],
+'trainingDates' : ["201907010100-201910010000", "201910010100-202001010000", "202001010100-202004010000",
+                 "202004010100-202007010000"]}
+             }
+
+testP = 'YEAR'
+dict_Path = dict_Data[testP]
+sfc_test_path = dict_Path['test'][0]['sfc']
+pl_test_path = dict_Path['test'][0]['pl']
+
+sfc_train_path = dict_Path['train']['sfc']
+pl_train_path = dict_Path['train']['pl']
+
+sfc_train_pathGen = dict_Path['trainGen']['sfc']
+pl_train_pathGen = dict_Path['trainGen']['pl']
+
+trainStartDate = dict_Path['trainStartDate']
+trainEndDate = dict_Path['trainEndDate']
+trainGenStartDate = dict_Path['trainGenStartDate']
+trainGenEndDate = dict_Path['trainGenEndDate']
+testStartDate = dict_Path['testStartDate']
+testEndDate = dict_Path['testEndDate']
+
+trainingDates = dict_Path['trainingDates']
+testDates = dict_Path['testDates']
+
+# if args.trainPeriod not in dict_Data.keys():
+#     raise argparse.ArgumentError(trainPeriod_arg, "trainPeriod should be one of [sum20, aut20, win21, spr21, sum21, YEAR]. \n"
+#                                                   "Got "+str(args.trainPeriod))
+
+args.version = "_" + args.origData + args.version
+if args.horizon != 0:
+    args.version = "_pred"+str(args.horizon)+"h"+args.version
+if args.log:
+    args.version = "_log"+args.version
+args.version = "_"+args.aer+args.version
+if args.matchTrainPeriods:
+    args.version = "_" + 'M' + args.version
+else:
+    args.version = "_"+args.version
+name_model = "ML-regression_"+args.model+args.version
+
+if args.origData=="Cams":
+    if args.matchTrainPeriods:
+        train_set = DatasetCams(pathSfcList=[sfc_train_pathGen, sfc_train_path],
+                                                pathPlList=[pl_train_pathGen, pl_train_path],
+                                                var=args.aer, horizon=args.horizon, log=args.log, normal=args.normal)
+    else:
+        train_set = DatasetCams(pathSfcList=[sfc_train_path],
+                                            pathPlList=[pl_train_path],
+                                            var=args.aer, horizon=args.horizon, log=args.log, normal=args.normal)
+elif args.origData == "Al-EUR":
+    if args.matchTrainPeriods:
+        train_set = DatasetAladinEur(list_dates=trainingDates, end_date=None, log=args.log, normal=args.normal)
+    else:
+        train_set = DatasetAladinEur(list_dates=trainingDates, end_date=trainEndDate, log=args.log, normal=args.normal)
+train_loader = DataLoader(train_set, batch_size=1, shuffle=True)
+
+MSE_loss = torch.nn.MSELoss(reduction='none')
+
+pPrep = PlotPrep()
+if args.log:
+    dis = Distrib()
+
+if not args.load:
+
+    N = len(train_loader)
+    L = train_set.height * train_set.width
+    x = cp.zeros((N, L))
+    y = cp.zeros((N, L))
+
+    for idx_batch, (inputTrain_i, targetTrain_i) in enumerate(train_loader):
+
+        # if idx_batch >= args.N:
+        #     break
+
+        inputTrain_i = inputTrain_i.float()
+        targetTrain_i = targetTrain_i.float()
+
+        inputTrain_i = cp.asarray(inputTrain_i)
+        targetTrain_i = cp.asarray(targetTrain_i)
+
+        x_i = cp.ravel(inputTrain_i)
+        y_i = cp.ravel(targetTrain_i)
+
+        x[idx_batch] = x_i
+        y[idx_batch] = y_i
+
+    x = x.reshape(-1, 1)
+    y = cp.ravel(y)
+    with config_context(array_api_dispatch=True):
+        if args.model == "RandomForest":
+            model = RandomForestRegressor(random_state=0, verbose=True)
+        else:
+            raise argparse.ArgumentError(model_arg, "unkown model")
+        os.makedirs("savedModels/" + name_model + "/", exist_ok=True)
+        print("\n Starting training")
+        model = model.fit(X=x.get(), y=y.get())
+        print("\n End of training")
+    pickle.dump(model, open("savedModels/" + name_model + "/model.pkl", 'wb'))
+else:
+    model = pickle.load(open("savedModels/" + name_model + "/model.pkl", 'rb'))
+p = model.predict
+
+
+losses = []
+times = []
+r_losses = []
+c_losses = []
+MBE = []
+MBE_R = []
+# Ds = []
+sim_idces = []
+
+def test(sfc_test_path, pl_test_path, testDates, origData):
+    # Beginning test
+    if origData=="Cams":
+        test_set = DatasetCams(pathSfcList=[sfc_test_path],
+                               pathPlList=[pl_test_path],
+                               var=args.aer, horizon=args.horizon, log=args.log, normal=args.normal)
+        maxT = test_set.maxTest
+        minT = test_set.minTest
+    elif origData == "Al-EUR":
+        test_set = DatasetAladinEur(list_dates=testDates, end_date=None, step=args.testStep, normal=args.normal)
+        maxT = test_set.maxTest
+        minT = test_set.minTest
+    newMax = (maxT - minT)
+    simLoss = FSIMLoss(chromatic=False, data_range=newMax)
+    test_loader = DataLoader(test_set, batch_size=1, shuffle=False)
+    os.makedirs("results/" + name_model + "/", exist_ok=True)
+    # os.makedirs("results/" + name_model + "/" + testPeriod + "/", exist_ok=True)
+    for idx_batch_test, (input_, target) in enumerate(test_loader):
+
+        input_ = input_.float()
+        target = target.float()
+
+        input_ = input_.to(device)
+        target = target.to(device)
+        input_ = input_[0]
+        target = target[0]
+
+        path_save_img = "results/" + name_model + "/results_test_img" + str(idx_batch_test + 1) + ".pdf"
+        path_save_img2 = "results/" + name_model + "/error_test_img" + str(idx_batch_test + 1) + ".pdf"
+        start_time = time.time()
+        input0 = cp.asarray(input_)
+        input1 = input0.reshape(-1, 1)
+        with config_context(array_api_dispatch=True):
+            predic0 = p(input1.get())
+        predic = predic0.reshape(input0.shape)
+        pred_time = time.time() - start_time
+        times.append(pred_time)
+        predic = torch.from_numpy(predic)
+        predic = predic.to(device)
+
+        if args.log:
+            input_ = dis.reverse(input_)
+            target = dis.reverse(target)
+            predic = dis.reverse(predic)
+
+        predic = Filter(predic)
+        target = Filter(target)
+        mt = torch.mean(target).item()
+        loss = torch.mean(torch.sqrt(MSE_loss(predic, target))).item()
+        relative_loss = (loss / mt)
+        losses.append(loss)
+        r_losses.append(relative_loss)
+        mbe = torch.mean(predic - target).item()
+        mbe_r = (mbe / mt)
+        # d1 = torch.sum(torch.pow((predic - target), 2)).item()
+        # d2 = torch.sum(torch.pow((torch.abs(predic - mt) + torch.abs(target - mt)), 2)).item()
+        # d = 1 - d1/d2
+        # Ds.append(d)
+        predF1 = predic - torch.min(predic)
+        predF = ((predF1 > 0) * (predF1 < newMax)) * predF1 + (predF1 >= newMax) * newMax
+        targetF1 = target - minT
+        targetF = ((targetF1 > 0) * (targetF1 < newMax)) * targetF1 + (targetF1 >= newMax) * newMax
+        sim_idx = simLoss(predF[None, :], targetF[None, :]).item()
+        sim_idces.append(sim_idx)
+        MBE.append(mbe)
+        MBE_R.append(mbe_r)
+        pred = pPrep(predic, norm=True)
+        targ = pPrep(target, norm=True)
+        inp = pPrep(input_, norm=True)
+        plt.suptitle("Test results.")
+        # Each file contains two images : one for the prediction and one for the target
+        ax0 = plt.subplot(2, 2, 1)
+        ax0.imshow(inp)
+        ax0.set_title("Input (AOD)")
+        ax1 = plt.subplot(2, 2, 3)
+        ax1.imshow(pred)
+        ax1.set_title(f"Prediction. Test error : {loss:1.3f}\u03BCg/m\N{superscript three} or {relative_loss*100:1.3f}%."
+                      f" \n MBE : {mbe:1.3f}\u03BCg/m\N{superscript three} or {mbe_r*100:1.3f}%."
+                      f" \n sim-index loss : {sim_idx*100:1.3f}%.")
+        ax2 = plt.subplot(2, 2, 4)
+        ax2.imshow(targ)
+        ax2.set_title("Target.")
+        plt.savefig(path_save_img)
+        plt.close()
+
+        q1, q2, q3 = torch.quantile(target, 0.25), torch.quantile(target, 0.5), torch.quantile(target, 0.75)
+        ct = torch.where(target > q1, 1, 0) + torch.where(target > q2, 1, 0) + torch.where(target > q3, 1, 0)
+        cpred = torch.where(predic > q1, 1, 0) + torch.where(predic > q2, 1, 0) + torch.where(predic > q3, 1, 0)
+        ce = torch.mean(torch.abs(ct - cpred).float()).item()
+        c_losses.append(ce)
+        ct = pPrep(ct/4)
+        cpred = pPrep(cpred/4)
+        error = torch.abs(target - predic)
+        error = pPrep(error, error=True, bw=args.e_bw, norm=True)
+        plt.suptitle("Error visualisation.")
+        ax0 = plt.subplot(2, 2, 1)
+        if args.e_bw:
+            ax0.imshow(error, cmap='gray')
+        else:
+            ax0.imshow(error)
+        ax0.set_title(f"Test error : {loss:1.3f}\u03BCg/m\N{superscript three} or {relative_loss*100:1.3f}%.")
+        ax1 = plt.subplot(2, 2, 3)
+        ax1.imshow(cpred)
+        ax1.set_title(f"Segmented prediction. \n Classification error : {ce:1.3f}.")
+        ax2 = plt.subplot(2, 2, 4)
+        ax2.imshow(ct)
+        ax2.set_title("Segmented target.")
+        plt.savefig(path_save_img2)
+        plt.close()
+
+    dict_perf = {"Average inference time (s)": np.mean(times),
+                 "STD of inference time (s)": np.std(times),
+                 "Average prediction error (\u03BCg/m\N{superscript three})": np.mean(losses),
+                 "STD of prediction error  (\u03BCg/m\N{superscript three})": np.std(losses),
+                 "Average relative error (%)": 100 * np.mean(r_losses),
+                 "STD of relative error (%)": 100 * np.std(r_losses),
+                 "Average classification error": np.mean(c_losses),
+                 "STD of classification error": np.std(c_losses),
+                 "Average bias error (\u03BCg/m\N{superscript three})": np.mean(MBE),
+                 "STD of bias error (\u03BCg/m\N{superscript three})": np.std(MBE),
+                 "Average relative bias error (%)": 100 * np.mean(MBE_R),
+                 "STD of relative bias error (%)": 100 * np.std(MBE_R),
+                 # "Average index of agreement (%)": 100 * np.mean(Ds),
+                 # "STD of index of agreement (%)": 100 * np.std(Ds),
+                 "Average sim-index loss (%)": 100 * np.mean(sim_idces),
+                 "STD of sim-index loss (%)": 100 * np.std(sim_idces)
+                 }
+    with open("results/" + name_model + "/perf_test.txt", "w+") as f:
+        json.dump(dict_perf, f, indent=2)
+        f.close()
+    return dict_perf
+    # End test
+
+# if args.testPeriod != "all":
+perf = test(sfc_test_path, pl_test_path, testDates, args.origData)
+# else:
+#     for i in range(len(dict_Path['test'])):
+#         perfPer = test(dict_Path['test'][i]['sfc'], dict_Path['test'][i]['pl'], dict_Path['testPeriod'][i])
+
+# Saving the arguments and the equation
+os.makedirs("savedModels/" + name_model + "/", exist_ok=True)
+with open("savedModels/" + name_model + "/arguments.txt", "w+") as f:
+    json.dump(args.__dict__, f, indent=2)
+    f.close()
\ No newline at end of file
--- a/README.md
+++ b/README.md
+Code corresponding to the paper : (include DOI)
+
+Files kriging.py, ML.py and LinearPred.py correspond to baseline methods used as comparison.
+
+File Aod2SfcAer_2DED.py is used for all experiments on models using several variables.
+
+command_Aladin.txt and command_Cams.txt contain all commands for experiments on the Aladin and Cams datasets, respectively. Less experiments were realised on Aladin than on Cams, as they take a very long time.
+
+Regarding datasets paths, the Aladin dataset is considered to be located in the AeroClim2 subfolder of the current folder, and the Cams dataset in the Cams subfolder. However they could not be included in this repository because of the large size of the corresponding files. The data can be found at : (include link/DOI)
--- a/command_Aladin.txt
+++ b/command_Aladin.txt
+########## Baseline
+
+nohup python3 LinearPred.py --D=3 --log=True --origData=Al-EUR >>stdout.txt 2>>stderr.txt &
+
+nohup python3 ML.py --log=True --origData=Al-EUR >>stdout.txt 2>>stderr.txt &
+
+nohup python3 kriging.py --type=ordinary --variogram=hole-effect --bc_type=random --prob_bc=5 --log=True --origData=Al-EUR >>stdout.txt 2>>stderr.txt &
+
+############## Cams best results
+
+nohup python3 Aod2SfcAer_2DED.py --skip_co=True --linear_layer=True --aer=pm --testPeriod=next --trainPeriod=YEAR --log=True --FSIMloss=True --wind=True --extract_norm_wind=True --humidity=True --pressure=True --temperature=True --angstrom=True --origData=Al-EUR >>stdout.txt 2>>stderr.txt &
+
+nohup python3 Aod2SfcAer_2DED.py --skip_co=True --linear_layer=True --aer=pm --testPeriod=next --trainPeriod=YEAR --log=True --FSIMloss=True --wind=True --extract_norm_wind=True --humidity=True --pressure=True --temperature=True --origData=Al-EUR >>stdout.txt 2>>stderr.txt & disown
+
+nohup python3 Aod2SfcAer_2DED.py --skip_co=True --linear_layer=True --aer=pm --testPeriod=next --trainPeriod=YEAR --log=True --FSIMloss=True --wind=True --extract_norm_wind=True --humidity=True --pressure=True --temperature=True --angstrom=True --origData=Al-EUR --fusion='DF' >>stdout.txt 2>>stderr.txt & disown
+
+######## Hybrid
+
+nohup python3 Aod2SfcAer_2DED.py --skip_co=True --linear_layer=True --aer=pm --testPeriod=next --trainPeriod=YEAR --log=True --FSIMloss=True --wind=True --extract_norm_wind=True --humidity=True --pressure=True --temperature=True --angstrom=True --origData=Al-EUR --fusion='Hybrid1F' >>stdout.txt 2>>stderr.txt & disown
+
+nohup python3 Aod2SfcAer_2DED.py --skip_co=True --linear_layer=True --aer=pm --testPeriod=next --trainPeriod=YEAR --log=True --FSIMloss=True --wind=True --extract_norm_wind=True --humidity=True --pressure=True --temperature=True --angstrom=True --origData=Al-EUR --fusion='Hybrid2F' >>stdout.txt 2>>stderr.txt & disown
--- a/command_Cams.txt
+++ b/command_Cams.txt
--- a/dataset_generation/.attach_pid18459
+++ b/dataset_generation/.attach_pid18459
--- a/dataset_generation/__init__.py
+++ b/dataset_generation/__init__.py
+from .dataset_Cams import DatasetCams, DatasetCamsGAN, DatasetCamsHybrid
+from .transforms import BCnaive, BCrandom, BCrandomLand, BCfromFile, ApplyBC, PlotPrep, ApplyBC, MRtoConc3D, MRtoConc2D, BCloss, Filter, FilterAod, MultAod, Distrib, PrepKriDataFromSparse, ConvPM, AirQualityQuantif
+from .dataset_kri import DatasetCamsKri, DatasetAladinEurKri
+from .dataset_AeroClim import DatasetAladinEur, DatasetAladinEurGAN
\ No newline at end of file
--- a/dataset_generation/__pycache__/__init__.cpython-37.pyc
+++ b/dataset_generation/__pycache__/__init__.cpython-37.pyc
--- a/dataset_generation/__pycache__/__init__.cpython-39.pyc
+++ b/dataset_generation/__pycache__/__init__.cpython-39.pyc
--- a/dataset_generation/__pycache__/dataset_AeroClim.cpython-37.pyc
+++ b/dataset_generation/__pycache__/dataset_AeroClim.cpython-37.pyc
--- a/dataset_generation/__pycache__/dataset_Aladin.cpython-37.pyc
+++ b/dataset_generation/__pycache__/dataset_Aladin.cpython-37.pyc
--- a/dataset_generation/__pycache__/dataset_Cams.cpython-37.pyc
+++ b/dataset_generation/__pycache__/dataset_Cams.cpython-37.pyc
--- a/dataset_generation/__pycache__/dataset_CamsForecast.cpython-37.pyc
+++ b/dataset_generation/__pycache__/dataset_CamsForecast.cpython-37.pyc