Initial commit

6724705a · Solène · 9cfd5b5e · 6724705a · 6724705a · 6724705a
Commit 6724705a authored 4 years ago by Solène
--- a/models/efficientnet.py
+++ b/models/efficientnet.py
@@ -5,7 +5,7 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F

-def get_net(version_eff, stride=1):
+def get_net(version_eff, stride):

    n_channels_dict = {'efficientnet-b0': 1280, 'efficientnet-b1': 1280, 'efficientnet-b2': 1408,
                           'efficientnet-b3': 1536, 'efficientnet-b4': 1792, 'efficientnet-b5': 2048,

--- a/models/srnet.py
+++ b/models/srnet.py
@@ -6,9 +6,9 @@ import numpy as np

 class get_net(nn.Module):

-    def __init__(self, params):
+    def __init__(self, image_size):
        super(get_net, self).__init__()
-        self.im_size = params.image_size
+        self.im_size = image_size
        
        def _conv2d(in_channels, out_channels, stride=1, kernel_size=3, padding=1):
            return nn.Conv2d(in_channels=in_channels,\

--- a/models/xunet.py
+++ b/models/xunet.py
@@ -7,12 +7,12 @@ import numpy as np

 class get_net(nn.Module):

-    def __init__(self, params):
+    def __init__(self, folder_model, n_loops, image_size):
        super(get_net, self).__init__()
-        self.n_loops = params.n_loops
-        self.DCT4_kernel = torch.tensor(np.load(params.folder_model+'DCT_4.npy')\
+        self.n_loops = n_loops
+        self.DCT4_kernel = torch.tensor(np.load(folder_model+'DCT_4.npy')\
                                         .reshape((4,4,16,1)).transpose((2,3,0,1))).cuda().float()
-        self.im_size = params.image_size
+        self.im_size = image_size
        
        def _conv2d(in_channels, out_channels, stride):
            return nn.Conv2d(in_channels=in_channels,\

--- a/readme.md
+++ b/readme.md
@@ -10,8 +10,8 @@ A run of a protocol need data in input, which are:
 * a data base of cover images, where each cover is saved in a different .npy file.
 * a data base of stego images (each in a different .npy file)
 * a data base of initial costs (each in a different .npy). It can have two shapes:
-	* 3 x image_size x image_size : in that case, first channel for cost of -1, second for 0 and final for +1 
-	* or image_size x image_size:  in that case, only channel for symmetric cost of doing -1 or +1
+	* (3,image_size,image_size) : in that case, first channel for cost of -1, second for 0 and final for +1 
+	* or (image_size, image_size):  in that case, only channel for symmetric cost of doing -1 or +1

 # Details

@@ -36,7 +36,6 @@ A run of the protocol will save all values in a folder, which is defined in the
 The organization of this folder is described by the illustration, and in the following.
 At the beginning, it creates file description.txt which resumes all parameters parsed at the beggining of run of main.py. 

-
 * data_adv_$k/ 
 	* adv_final/
 		* $i.npy: adversarial stego images of size (image_size, image_size) stored in different .npy files
@@ -51,9 +50,32 @@ At the beginning, it creates file description.txt which resumes all parameters p
 		* probas.npy: output stego class probability obtained from the softmax of logits.npy.
 * data_train_$k/
 	* index.npy: of size (10000,), index of the adversarial stegos picked from the strategy given in input of the protocol. The index of stegos are stored following file --permutation_files.npy. The index are integers i comprised between 0 (for initial stego images stored in --data_dir_stego_0) and k (for adversarial stegos stored in data_adv_$i/adv_final/ for i>=1).
+* train_$model_$k/
+	* error_rate.npy: of size (3,). Error rate given at the end of the training of model $model at iteration $k, on the train set, validation set and test set (in this order).
+	* checkpoints.ckpt: during the training, every time the classifier achieved a higher accuracy on the validation set, the weights of the CNN are saved in a .ckpt file with the value of the epoch in its name. Such as at the end the best classifier is saved, and it is the one with the highest integer in its name.
+	* log.txt: the value of the error rate on train and validation set during the training are saved in this .txt file at each epoch.
+

+### How are trained the classifiers
+Three different architectures of CNN are available: XuNet, SRNet and EfficientNet. Each are defined in a file in the ./models/ folder. To add more, you can add a file, defining the function get_net() and class TrainGlobalConfig() and modify the file script_train.py.
+
+EfficientNet is initialized with weights given by the training on ImageNet, saved in file in ./models/. You can choose the version of Efficient Net (from B0 to B7) and the stride of the first convolution operation.
+
+The classifiers are trained between cover images, and new stegos are generated in each batch with the corresponding cost map. It allows to train the classifier, if desired (depending on parameters --CL and --start_emb_rate) to use curriculum learning during the training, such as new stegos embedding any size of payload can be generated during the training.
+
+### How works backpack
+Multiple version of backpack are available, which are or will be introduced in past/future publications. Those are:
+* SGE: Softmax Gumbel Estimation
+* Double Tanh
+* Double Compact Tanh. 
+
+To do the Gradient Descent, one needs to compute 
+
+
+# Parameters to pass in main.py
+
+Finally that we discussed about all the steps of the protocol, here the explanation of which parameters to pass in the CLI with main.py: 

-# Parameters to pass in main.py:
 * begin_step: first iteration of the protocol. Should be equals to 0 if you never launched it.
 * number_step: for how many further iteration to lauchn the protocol
 * folder_model: absolute path leading to the folder './models/' stored in this folder. It contains the model and the architectures of the steganalysts networks. 
@@ -85,8 +107,6 @@ At the beginning, it creates file description.txt which resumes all parameters p
 * lr: float value for the value of the learning rate to use in ADAM optimizer for the gradient descent. Advices: use 0.5 for QF 75 and 0.05 for QF 100.
 	

-### How are trained the classifiers
-The classifiers are trained between cover images, and new stegos are generated in each batch with the corresponding cost map. It allows to train the classifier, if desired (depending on parameters --CL and --start_emb_rate) to use curriculum learning during the training, such as new stegos embedding any size of payload can be generated during the training.


 # Files and folders in the root

--- a/script_evaluate_classif.py
+++ b/script_evaluate_classif.py
@@ -25,36 +25,43 @@ def softmax(array):

 class cover_stego_loader(object):

-    def __init__(self, params, iteration, mode): # mode = stego or cover
-        self.params = params
-        n_images = params.train_size + params.valid_size + params.test_size 
-        self.files = np.load(params.folder_model + 'permutation_files.npy')[:n_images]
+    def __init__(self, iteration, mode, train_size, valid_size, test_size, \
+                        batch_size_eval, QF, image_size, folder_model, \
+                        data_dir_prot, data_dir_cover, data_dir_stego_0): # mode = stego or cover
+        n_images = train_size + valid_size + test_size 
+        self.files = np.load(folder_model + 'permutation_files.npy')[:n_images]
        self.train_counter = 0
        self.train_data_size = len(self.files)
-        self.train_num_batches = int(np.ceil(1.0 * self.train_data_size / params.batch_size_eval))
+        self.train_num_batches = int(np.ceil(1.0 * self.train_data_size / batch_size_eval))
        self.iteration_step = iteration
        self.mode = mode
-        self.c_quant = np.load(params.folder_model + 'c_quant_'+str(params.QF)+'.npy')
+        self.c_quant = np.load(folder_model + 'c_quant_'+str(QF)+'.npy')
+        self.image_size = image_size
+        self.QF = QF
+        self.batch_size_eval = batch_size_eval
+        self.data_dir_prot = data_dir_prot
+        self.data_dir_cover = data_dir_cover
+        self.data_dir_stego_0 = data_dir_stego_0

    def next_batch(self):

-        borne_sup = min(self.train_counter + self.params.batch_size_eval, len(self.files))
+        borne_sup = min(self.train_counter + self.batch_size_eval, len(self.files))
        n_images = borne_sup-self.train_counter

-        next_batch_X = np.zeros((n_images,self.params.image_size,self.params.image_size),dtype=np.float32)
+        next_batch_X = np.zeros((n_images,self.image_size,self.image_size),dtype=np.float32)

        files_batch = self.files[self.train_counter:borne_sup]
        for i,file in enumerate(files_batch):
            if(self.mode=='stego'):
                if(self.iteration_step>0):
                    try:
-                        image = np.load(self.params.data_dir_prot+'data_adv_'+str(self.iteration_step)+'/adv_final/'+file[:-4]+'.npy')
+                        image = np.load(self.data_dir_prot+'data_adv_'+str(self.iteration_step)+'/adv_final/'+file[:-4]+'.npy')
                    except:
-                        image = np.load(self.params.data_dir_stego_0 + file[:-4]+'.npy')
+                        image = np.load(self.data_dir_stego_0 + file[:-4]+'.npy')
                else:
-                    image = np.load(self.params.data_dir_stego_0 + file[:-4]+'.npy')
+                    image = np.load(self.data_dir_stego_0 + file[:-4]+'.npy')
            elif(self.mode=='cover'):
-                image = np.load(self.params.data_dir_cover + file[:-4] + '.npy')
+                image = np.load(self.data_dir_cover + file[:-4] + '.npy')

            spat_image = compute_spatial_from_jpeg(image, self.c_quant)
            spat_image /= 255.0
@@ -62,38 +69,46 @@ class cover_stego_loader(object):
    
        next_batch_X = np.reshape(next_batch_X,(next_batch_X.shape[0], 1, next_batch_X.shape[1],next_batch_X.shape[2]))

-        self.train_counter = (self.train_counter + self.params.batch_size_eval) % self.train_data_size
+        self.train_counter = (self.train_counter + self.batch_size_eval) % self.train_data_size
        return(next_batch_X, files_batch)  
    
    def reset_counter(self):
        self.train_counter = 0


-def evaluate_step_i(params, iteration_f, iteration_adv): # if iteration_adv == -1 : cover
-    if(params.model=='efnet'):
-        net = get_net_ef(params.version_eff, params.stride).cuda()
-    elif(params.model=='xunet'):
-        net = get_net_xu(params).cuda()
-    elif(params.model=='srnet'):
-        net = get_net_sr(params).cuda()
+def evaluate_step_i(iteration_f, iteration_adv, model, data_dir_prot, train_size, valid_size, test_size, \
+                        batch_size_eval, QF, image_size, folder_model, \
+                        data_dir_prot, data_dir_cover, data_dir_stego_0, \
+                        version_eff=None, stride=None, n_loops=None) # if iteration_adv == -1 : cover
+    
+    if(model=='efnet'):
+        net = get_net_ef(version_eff, stride).cuda()
+    elif(model=='xunet'):
+        net = get_net_xu(folder_model, n_loops, image_size).cuda()
+    elif(model=='srnet'):
+        net = get_net_sr(image_size).cuda()

    best_epoch = [int(x.split('-')[-1][:3]) \
-        for x in os.listdir(params.data_dir_prot+'train_'+params.model+'_'+str(iteration_f)+'/') \
+        for x in os.listdir(data_dir_prot+'train_'+model+'_'+str(iteration_f)+'/') \
        if 'best' in x]
    best_epoch.sort()
    best_epoch = str(best_epoch[-1])
-    path = params.data_dir_prot+'train_'+params.model+'_'+str(iteration_f)+'/best-checkpoint-'+'0'*(3-len(best_epoch))+best_epoch+'epoch.bin'
+    path = data_dir_prot+'train_'+model+'_'+str(iteration_f)+'/best-checkpoint-'+'0'*(3-len(best_epoch))+best_epoch+'epoch.bin'
    checkpoint = torch.load(path)
    net.load_state_dict(checkpoint['model_state_dict'])
    net.eval()
   
    # Create directory
    if(iteration_adv==-1):
-        directory = params.data_dir_prot+'cover/eval_'+params.model+'_'+str(iteration_f)+'/'
-        dataloader = cover_stego_loader(params,iteration_adv,'cover')
+        directory = data_dir_prot+'cover/eval_'+model+'_'+str(iteration_f)+'/'
+        mode = 'cover'
    else:
-        directory = params.data_dir_prot+'data_adv_'+str(iteration_adv)+'/eval_'+params.model+'_'+str(iteration_f)+'/'
-        dataloader = cover_stego_loader(params,iteration_adv,'stego')
+        directory = data_dir_prot+'data_adv_'+str(iteration_adv)+'/eval_'+model+'_'+str(iteration_f)+'/'
+        mode = 'stego'
+
+    dataloader = cover_stego_loader(iteration_adv, mode, train_size, valid_size, test_size, \
+                        batch_size_eval, QF, image_size, folder_model, \
+                        data_dir_prot, data_dir_cover, data_dir_stego_0)

    result_fi = np.empty((0,2))
    dataloader.reset_counter()
@@ -104,7 +119,6 @@ def evaluate_step_i(params, iteration_f, iteration_adv): # if iteration_adv == -
        result_fi = np.concatenate((result_fi,l.cpu().detach().numpy()))
    np.save(directory+'probas',softmax(result_fi)[:,1])
    np.save(directory+'logits',result_fi)
-    #return(result_fi, softmax(result_fi)[:,1])


 if __name__ == '__main__':
@@ -141,7 +155,8 @@ if __name__ == '__main__':
    params = argparser.parse_args()

    print('Evaluate model '+params.model+' at iteration ' +str(params.iteration_f)+ ' on the adv images generated at '+ str(params.iteration_adv))
-    evaluate_step_i(params, params.iteration_f, params.iteration_adv)
+    
+    evaluate_step_i(**params)




--- a/script_train.py
+++ b/script_train.py
@@ -33,6 +33,8 @@ def my_collate(batch, pair_training=False):
    else:
        return torch.utils.data.dataloader.default_collate(batch)

+def train(iteration_step, folder_model, data_dir_prot, permutation_files, num_of_threads, \
+        data_dir_cover, cost_dir, train_size, )
    

 if __name__ == '__main__':
@@ -79,9 +81,6 @@ if __name__ == '__main__':
   
    argparser.add_argument('--load_model',type=str, default=None, help='Path to the saved efficient model')

-
-
-
    params = argparser.parse_args()

    params.pair_training = params.pair_training=='yes'
@@ -151,13 +150,13 @@ if __name__ == '__main__':
        )

    if(params.model=='efnet'):
-        net = get_net_ef(params.version_eff, params.stride).cuda()
+        net = get_net_ef(version_eff, stride).cuda()
        trainGlobalConfig = TrainGlobalConfig_ef(params)
    elif(params.model=='xunet'):
-        net = get_net_xu(params).cuda()
+        net = get_net_xu(folder_model, n_loops, image_size).cuda()
        trainGlobalConfig = TrainGlobalConfig_xu(params)
    elif(params.model=='srnet'):
-        net = get_net_sr(params).cuda()
+        net = get_net_sr(image_size).cuda()
        net.init()
        trainGlobalConfig = TrainGlobalConfig_sr(params)