diff --git a/.DS_Store b/.DS_Store index 4129a4dda88d42c9c462c600819e5136fe460e66..8d79c1b716ace8fe115eb5dec90b4d48eb401962 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/blind_steganalysts/DCTR.py b/blind_steganalysts/DCTR.py new file mode 100644 index 0000000000000000000000000000000000000000..87f9115d94f2322a3c40a4ec33200cfd9cab3272 --- /dev/null +++ b/blind_steganalysts/DCTR.py @@ -0,0 +1,217 @@ +from math import sqrt, pi +import numpy as np +from scipy.ndimage.filters import convolve +from scipy.fftpack import idct +from time import time + +import os, sys +# CURDIR = os.path.dirname(os.path.realpath(__file__)) +# JPEGDIR = os.path.join(CURDIR, os.pardir, os.pardir, os.pardir) +# if JPEGDIR not in sys.path: +# sys.path.append(JPEGDIR) + +# from jpeg import jpeg +#from ....jpeg import jpeg + +def unique_rows(a): + a = np.ascontiguousarray(a) + unique_a = np.unique(a.view([('', a.dtype)]*a.shape[1])) + return unique_a.view(a.dtype).reshape((unique_a.shape[0], a.shape[1])) + + +class DCTR_process: + """ + This class represents PHARM algorithm + """ + + def __init__(self, image_path, qual): + """ + Store the image path and the quality factor + """ + try: + self.imagepath = image_path + self.QF = int(qual) + except (ValueError): + "Please enter an integer as quality factor" + raise + + def idct2(self, x): + return idct(idct(x.astype('float'), norm='ortho').T, norm='ortho').T + + def compute_spatial_domain(self, dct_coeffs, c_quant): + """ + Decompress the JPEG Image in floats + """ + w,h = dct_coeffs.shape + sp_im = np.zeros((w,h)) + for bind_i in range(w//8): + for bind_j in range(h//8): + dct_bloc = dct_coeffs[bind_i*8:(bind_i+1)*8,bind_j*8:(bind_j+1)*8] + sp_im[bind_i*8:(bind_i+1)*8,bind_j*8:(bind_j+1)*8] = self.idct2(dct_bloc*c_quant) + return sp_im+128 + + # Computation of the histrogram + def Hist(self, res): + nbins = self.T+1 + M,N = res.shape + flatcount = np.zeros(nbins) + flatcount = np.bincount(res.flatten(1), minlength=nbins) + out = flatcount.T + + return out + + def compute_DCTR(self, color=False) : + """ + Computes features of the image. + + :param color: specify wether the image contains color or not + """ + # if os.path.getsize(self.imagepath) == 0 : + # raise Exception('Empty file : ' + self.imagepath) + + #img = jpeg(self.imagepath, verbosity=0) + img = np.load(self.imagepath) + if not color : + # Force usage of only a layer + #dct_coeff = img.coef_arrays[0] + dct_coeff=img + #c_quant = img.quant_tables[0] + c_quant = np.load('./c_quant_'+str(self.QF)+'.npy') + return self.compute_DCTR_layer(c_quant, dct_coeff) + else : + # Uses the three layers + if len(img.quant_tables) < 3 : + raise Exception('This image seems to be a greyscale image : ' + self.imagepath) + else : + # If there is less that 3 DCT coefficients specified, it means that they are the sames. + # ie. same for both chrominance or same for all layers. + for i in range(1, len(img.quant_tables)) : + if img.quant_tables[i] is None : + # don't have to copy it as it won't be modified + img.quant_tables[i] = img.quant_tables[i-1] + + # concatenates the 3 features + feats = [self.compute_DCTR_layer(img.quant_tables[i], img.coef_arrays[i]) for i in range(3)] + return np.concatenate(feats) + + def compute_DCTR_layer(self, c_quant, dct_coeffs) : + """ + Computes features for one layer. + For example, for a greyscale image it will be the hole features. + """ + self.T = 4 + total_convol_time = 0 + is_empty = False + # Set parameters + # Number of histogram bins + # if os.path.getsize(self.imagepath) != 0: + # I_STRUCT = jpeg(self.imagepath, verbosity=0) + # is_empty = False + + # compute quantization step based on quality factor + if self.QF < 50 : + q = min(8 * (50.0 / self.QF), 100) + else: + q = max(8 * (2 - (self.QF/50.0)), 0.2) + # q = max(8 * (2 - (75/50.0)), 0.2) + + # Prepare for computing DCT bases + k = np.array([0, 1, 2, 3, 4, 5, 6, 7]) + m = np.array([0, 1, 2, 3, 4, 5, 6, 7]) + [K,M] = np.meshgrid(k,m) + A=0.5*np.cos(((2.*K+1)*M*pi)/16) + A[0,:]=A[0,:]/sqrt(2) + A = A.T + + # Compute DCTR locations to be merged + mergedCoordinates = np.empty((25), dtype=object) + for i in range(5): + for j in range(5): + coordinates = np.array([[i,j], [i,8-j], [8-i,j], [8-i,8-j]]) + coordinates = [row for row in coordinates if row[0] < 8 and row[1] < 8] + coordinates = unique_rows(coordinates) + mergedCoordinates[i*5 + j] = coordinates; + #end = time() + #print(str(end-start) + " : OK init") + + # Decompress to spatial domain + # if not is_empty: + # dct_coeffs= I_STRUCT.coef_arrays[0] + # c_quant = I_STRUCT.quant_tables[0] + I_spatial = self.compute_spatial_domain(dct_coeffs, c_quant) + + # Warning: Modfied for NS! + #I_spatial = I_spatial[:-8,:-8] + + #end = time() + #print(str(end-start) + " : OK spatial") + + # Compute features + modeFeaDim = len(mergedCoordinates)*(self.T+1) + F = np.zeros(64*modeFeaDim) # single = 64 bits + if is_empty: + return F + for mode_r in range(8): + for mode_c in range(8): + modeIndex = mode_r*8 + mode_c + # Get DCT base for current mode + DCTbase = A[:,mode_r].reshape(1,-1)*A[:,mode_c].reshape(-1,1) # B(mode_r, mode_c) + + # Obtain DCT residual R by convolution between image in spatial domain and the current DCT base + start_convolve = time() + R = convolve(I_spatial-128, DCTbase) + R = R[np.ix_(list(range(3,R.shape[0]-4)), list(range(3,R.shape[0]-4)))] + end_convolve = time() + total_convol_time = total_convol_time + end_convolve - start_convolve + + # Quantization, rounding, absolute value, thresholding + R = np.absolute((R / q).round()) + R = R.astype(np.int16) + idx = R[:,:] > self.T + R[idx] = self.T + + # Core of the feature extraction + for merged_index in range (len(mergedCoordinates)): # 25 + f_merged = np.zeros(self.T+1) + for coord_index in range(len(mergedCoordinates[merged_index])): # 1, 2, or 4 + r_shift = mergedCoordinates[merged_index][coord_index][0] # a + c_shift = mergedCoordinates[merged_index][coord_index][1] # b + R_sub = R[np.ix_(list(range(r_shift,len(R),8)), list(range(c_shift,len(R[0]),8)))] # sous ensemble de R de coord rel (a, b) + f_merged = f_merged + self.Hist(R_sub) # ajout des bins + F_index_from = modeIndex*modeFeaDim+merged_index*(self.T+1) + F_index_to = modeIndex*modeFeaDim+merged_index*(self.T+1)+self.T+1 + F[F_index_from:F_index_to] = f_merged / sum(f_merged) + #end = time() + #print(str(end-start) + " : OK all") + #print("total convolve time : " + str(total_convol_time)) + return F + +from extractor import Extractor as ParentExtractor + + +class Extractor(ParentExtractor) : + """ + Extract caracteristics from an image using DCTR algorithm. + + self.conf should specify : + - imgs.QF : quality factor of cover images (corresponding to jpeg library, and most open-source softwares, such as imagemagic) + - imgs.color : specify wether image is color or not + """ + + #file_type = 'jpg' + + def run(self, file) : + engine = DCTR_process(file, self.conf.QF) + result = engine.compute_DCTR(color=self.conf.color) + return np.array(result) + + + + +# Main function +if __name__ == '__main__': + dctr = DCTR_process('image/1.jpg',75) + t1 = time() + res = dctr.compute_DCTR() + t2 = time() + print("execution time:", t2-t1, "seconds") diff --git a/blind_steganalysts/GFR.py b/blind_steganalysts/GFR.py new file mode 100644 index 0000000000000000000000000000000000000000..a9fe3fad55ce8855d359c82f55e1a4f639b26cb6 --- /dev/null +++ b/blind_steganalysts/GFR.py @@ -0,0 +1,282 @@ +# -*- coding: utf-8 -*- +# Import modules from ../../.. +import os, sys +# PARDIR = os.path.dirname(os.path.realpath(__file__)) +# PARDIR = os.path.join(PARDIR, os.pardir, os.pardir, os.pardir) +# if PARDIR not in sys.path: +# sys.path.append(PARDIR) + +sys.path.append('../SRNet/') +#from ....jpeg import jpeg +from jpeg import dct +from jpeg import base +#from jpeg import compress + + +def dequantise(C,Q): + """Dequantise JPEG coefficients using the quantisation matrix Q.""" + [k,l] = C.shape + [m,n] = Q.shape + rep = (k//m, l//n) + return C * base.repmat(Q, rep) + + +from math import pi, cos, sin +import numpy as np +from scipy.signal import fftconvolve, convolve2d +import time +from scipy.ndimage.filters import convolve + +TIME = 0 + +def unique_rows(a): + a = np.ascontiguousarray(a) + unique_a = np.unique(a.view([('', a.dtype)]*a.shape[1])) + return unique_a.view(a.dtype).reshape((unique_a.shape[0], a.shape[1])) + +class GFR_process: + """ + This class represents GFR algorithm + """ + def __init__(self, image_path, qual): + """ + Store the image path and the quality factor + ------------------------------------------------------------------------- + Input: image_path ....... path to the JPEG image + qual ............. JPEG quality factor (can be either 75 or 95) + ------------------------------------------------------------------------- + """ + try: + self.imagepath = image_path + self.QF = int(qual) + # NR = number of rotations for Gabor kernel + self.NR = 32 + except (ValueError): + "Please enter an integer as quality factor (QF = 75 or 95)" + raise + + + def gaborkernel(self, sigma, theta, phi, gamma): + """ + sigma represents the scale parameter, the small sigma means high spatial + resolution, the image filtering coefficients reflect local properties in + fine scale, while the large sigma means low spatial resolution, the + coefficient reflect local properties in coarse scale. + theta specifies the orientation of 2D Gabor filters. + phi specifies the phase offset of the cosine factor. + gamma is the spatial aspect ratio ans specifies the ellipticity of + Gaussian factor + """ + # cf. page 16 of [1] + # sigma = 0.56 * lambda + # lambda denotes the wavelength of the cosine factor + lbd = sigma / 0.56 + gamma2 = gamma**2 + s = 1 / (2*sigma**2) + f = 2*pi/lbd + # sampling points for Gabor function + xx = np.arange(-7.0/2.0, 4) + x,y = np.meshgrid(xx,xx) + y = np.copy(-y) + + xp = x * cos(theta) + y * sin(theta) + yp = -x * sin(theta) + y * cos(theta) + + kernel = np.exp(-s*(xp*xp + gamma2*(yp*yp))) * np.cos(f*xp + phi) + # normalization + # in order to capture the embedding changes, all the 2D Gabor filters are + # made zero mean bu substracting the kernel mean from all its elements + # to form high-pass filter + kernel = kernel- np.sum(kernel)/np.sum(abs(kernel))*abs(kernel) + return kernel + + def compute_GFR(self,color=False): + """ + This function extracts Gabor features for steganalysis of JPEG images + proposed in [1]. Parameters are exactly set as they are described in the + paper. Total dimensionality of the features with 32 rotations: 17000. + ------------------------------------------------------------------------- + Output: F ... extracted Gabor features + ------------------------------------------------------------------------- + [1] X. Song, F. Liu, C. Yang, X. Luo and Y. Zhang "Steganalysis of + Adaptive JPEG Steganography Using 2D Gabor Filters", Proceedings of + the 3rd ACM Workshop on Information Hiding and Multimedia Security, + Pages 15-23, Portland, OR, June 2015. + ------------------------------------------------------------------------- + """ + + """ + Computes features of the image. + + :param color: specify wether the image contains color or not + """ + + + #img = jpeg(self.imagepath, verbosity=0) + img = np.load(self.imagepath) + + if not color : + # Force usage of only a layer + #dct_coeff = img.coef_arrays[0] + dct_coeff = img + #c_quant = img.quant_tables[0] + c_quant = np.load('./c_quant_'+str(self.QF)+'.npy') + return self.compute_GFR_layer(c_quant, dct_coeff) + else : + # Uses the three layers + if len(img.quant_tables) < 3 : + raise Exception('This image seems to be a greyscale image : ' + self.imagepath) + else : + # If there is less that 3 DCT coefficients specified, it means that they are the sames. + # ie. same for both chrominance or same for all layers. + for i in range(1, len(img.quant_tables)) : + if img.quant_tables[i] is None : + # don't have to copy it as it won't be modified + img.quant_tables[i] = img.quant_tables[i-1] + + # concatenates the 3 features + feats = [self.compute_GFR_layer(img.quant_tables[i], img.coef_arrays[i]) for i in range(3)] + return np.concatenate(feats) + + def compute_GFR_layer(self, c_quant, dct_coeffs) : + + # number of histogram bins + T = 4; + q = [2, 4, 6, 8] + # quantization steps + if self.QF == 75: + q = [2, 4, 6, 8] + elif self.QF == 95 or self.QF == 100: + q = [0.5, 1, 1.5, 2] + elif self.QF == 85: # New + q = [1, 2, 3, 4] + + rotations = np.arange(0, self.NR) * pi / self.NR + sr=len(rotations) + + # Standard deviations + sigma = [0.5, 0.75, 1, 1.25] + ss=len(sigma) + + phase_shift = [0, pi/2] + sp=len(phase_shift) + + aspect_ratio = 0.5 + + # Decompress to spatial domain + # dct_coeffs= img.coef_arrays[0] + # c_quant = img.quant_tables[0] + #I_spatial = compress.dequantise(dct_coeffs, c_quant) + I_spatial = dequantise(dct_coeffs, c_quant) + I_spatial = dct.ibdct(I_spatial) + + # Compute DCTR locations to be merged + merged_coordinates = np.empty((25), dtype=object); + for i in range(5): + for j in range(5): + coordinates = np.array([[i,j], [i,8-j], [8-i,j], [8-i,8-j]]) + coordinates = [row for row in coordinates if row[0] < 8 and row[1] < 8] + coordinates = unique_rows(coordinates) + merged_coordinates[i*5 + j] = coordinates; + + # Load Gabor Kernels : the filter bank includes 2D Gabor filters + # with different scales and orientations + kernel = np.empty((ss, sr, sp), dtype=object); + s_i = 0 + for s in sigma: + r_i = 0 + for r in rotations: + p_i = 0 + for p in phase_shift: + kernel[s_i, r_i, p_i] = self.gaborkernel(s, r, p, aspect_ratio) + p_i += 1 + r_i +=1 + s_i += 1 + + # Compute features + mode_fea_dim = len(merged_coordinates)*(T+1) + dim_f = sp * ss * sr + dim_s = sp * ss * (sr // 2 + 1) + FF = np.zeros(dim_f*mode_fea_dim, dtype=np.float32) + F = np.zeros(dim_s*mode_fea_dim, dtype=np.float32) + for mode_P in range(sp): + for mode_S in range(ss): + for mode_R in range(sr): + + mode_index = mode_P*(sr*ss) + mode_S*sr+ mode_R + 1 + + if TIME: + t1 = time.time() + # 2D Gabor filtering + R = convolve(I_spatial, kernel[mode_S, mode_R, mode_P]) + R = R[np.ix_(range(3,R.shape[0]-4), range(3,R.shape[1]-4))] +# cpt +=1 + if TIME: + t2 = time.time() + print("Time for convolve() %.5f"%(t2 - t1)) + + # Subsample the filtered image + R = np.abs(np.round(R / q[mode_S])) + R[R > T] = T + # feature extraction and merging + for merged_index in range(len(merged_coordinates)): + f_merged = np.zeros(T+1, dtype=np.float32) + for coord_index in range(np.size(merged_coordinates[merged_index], 0)): + # According to the 64 DCt modes in 8*8 DCT block, the filtered + # image is subampling by step 8 to get 64 subimages + r_shift = merged_coordinates[merged_index][coord_index, 0] + c_shift = merged_coordinates[merged_index][coord_index, 1] + R_sub = R[r_shift::8, c_shift::8] + # Extract the histogram features + hist, bin_edges = np.histogram(R_sub, np.arange(T + 2)) + # Merge the histgram features + f_merged = f_merged + hist + F_index_from = (mode_index - 1) * mode_fea_dim + merged_index * (T + 1) + F_index_to = (mode_index - 1) * mode_fea_dim + merged_index * (T + 1) + T + 1 + FF[F_index_from:F_index_to] = f_merged / np.sum(f_merged) + + # merging of symmetrical directions + M_index=np.arange(0, sr//2 - 1) + MS=len(M_index)+2 + SI = (mode_index-sr)*mode_fea_dim + + F_out=FF[SI: SI + MS* mode_fea_dim] + F_M=FF[SI: SI + sr* mode_fea_dim] + # For example, suppose the orientation parameter + # theta = {0, pi/8, 2pi/8, ..., 6pi/8, 7pi/8}, then the histogram + # features of the filtered image with theta = pi/8,7pi/8, + # theta=2pi/8, 6pi/8 and so on should be merged by averaging + for i in M_index: + F_out[(i + 1) * mode_fea_dim:(i + 1) * mode_fea_dim + mode_fea_dim]= (F_M[(i + 1) * mode_fea_dim:(i + 1) * mode_fea_dim + mode_fea_dim]+ F_M[(sr - i - 1) * mode_fea_dim: (sr - i - 1) * mode_fea_dim + mode_fea_dim]) / 2 + ind = mode_P * ss + mode_S + F[ind * MS * mode_fea_dim :ind * MS * mode_fea_dim + MS * mode_fea_dim]= F_out + return F + + +from extractor import Extractor as ParentExtractor + + +class Extractor(ParentExtractor) : + """ + Extract caracteristics from an image using DCTR algorithm. + + self.conf should specify : + - imgs.QF : quality factor of cover images (corresponding to jpeg library, and most open-source softwares, such as imagemagic) + - imgs.color : specify wether image is color or not + """ + + #file_type = 'jpg' + + def run(self, file) : + engine = GFR_process(file, self.conf.QF) + result = engine.compute_GFR(color=self.conf.color) + return np.array(result) + + +# Main function +if __name__ == '__main__': + gfr= GFR_process('image/1.jpg',75) + t1 = time() + res = gfr.compute_GFR() + t2 = time() + print("execution time:", t2-t1, "seconds") diff --git a/blind_steganalysts/JRM.py b/blind_steganalysts/JRM.py new file mode 100755 index 0000000000000000000000000000000000000000..8472686f4420ecea8c9962d220b64f7b89d60991 --- /dev/null +++ b/blind_steganalysts/JRM.py @@ -0,0 +1,846 @@ +# ccJRM - Version 1 + +import numpy as np +from jpeg import jpeg +from scipy.misc import imread +from PIL import Image +import os, sys +import glob +import time +import h5py +import multiprocessing +from multiprocessing import Pool +from scipy.fftpack import dct, idct + +#Preprocessing +def dct2(x): + return dct(dct(x, norm='ortho').T, norm='ortho').T + +def idct2(x): + return idct(idct(x, norm='ortho').T, norm='ortho').T + +def compute_spatial_from_jpeg(jpeg_im,c_quant): + """ + Compute the 8x8 DCT transform of the jpeg representation + """ + w,h = jpeg_im.shape + spatial_im = np.zeros((w,h)) + for bind_i in range(int(w//8)): + for bind_j in range(int(h//8)): + block = idct2(jpeg_im[bind_i*8:(bind_i+1)*8,bind_j*8:(bind_j+1)*8]*(c_quant))+128 + #block[block>255]=255 + #block[block<0]=0 + spatial_im[bind_i*8:(bind_i+1)*8,bind_j*8:(bind_j+1)*8] = block + spatial_im = spatial_im.astype(np.float32) + return(spatial_im) + +#0. Tools + +def IndexDCT(mode): #For each group ('mode'), returns a list which contains the relevant indices of the DCT plane + out = [] + for a in range (0,6): + for b in range (0,6): + if a+b>0: + if (mode =='h' or mode=='ih' or mode=='ix'): + if a+b<6: + out.append([a,b]) + elif (mode=='d1' or mode=='id' or mode=='im'): + if b>=a: + if a+b<6: + out.append([a,b]) + elif (mode=='d2' or mode=='x' or mode=='od2'): + if b>a: + if a+b<6: + out.append([a,b]) + elif (mode=='oh'): + if a+b<5: + out.append([a,b]) + elif (mode=='km'): + if a>0: + if a+b<6: + out.append([a,b]) + elif (mode=='od1'): + if b>=a: + if a+b<5: + out.append([a,b]) + else: + print('Mode error') + + return out + +def SignSym(res,T): #Sign Symmetry used for co-occurrences matrices based on differences of absolute values + res_sym = res[:,:] + res[::-1,::-1] + res_sym = res_sym.flatten() + idx = int(0.5*(2*T+1)*(2*T+1)+0.5) + out = res_sym[:idx] + return out + +# 1. DCT-mode specific components +# We have to distinguish whether the co-occurrence matrices are based on +# absolute values or on differences of absolute values. +# In each case, we have to distinguish intra-block ou inter-block relationships. + +# 1.1 Based on absolute values + +# 1.1.1 Intra-block + +def IntraABS1(res,mode,dx,dy,T): #Groups 1,2,3,5,6 + + tx, ty = res.shape + res[res>T] = T + + out1 = [] + out2 = [] + tab = IndexDCT(mode) + + for z in range(0,len(tab)): + x_a1 = [] + y_a1 = [] + x_a2 = [] + y_a2 = [] + + for i in range (0,tx//8): + for j in range (0,ty//8): + x_a1.append(res[8*i+tab[z][0],8*j+tab[z][1]]) + y_a1.append(res[8*i+tab[z][0]+dx,8*j+tab[z][1]+dy]) + + x_a2.append(res[8*i+tab[z][1],8*j+tab[z][0]]) + y_a2.append(res[8*i+tab[z][1]+dy,8*j+tab[z][0]+dx]) + + x_a1 = np.asarray(x_a1) + y_a1 = np.asarray(y_a1) + count1 = np.bincount(x_a1 + (T+1)*y_a1,minlength=(T+1)*(T+1)) + out1 = np.append(out1,count1) + + + x_a2 = np.asarray(x_a2) + y_a2 = np.asarray(y_a2) + count2 = np.bincount(x_a2 + (T+1)*y_a2,minlength=(T+1)*(T+1)) + out2 = np.append(out2,count2) + + out = out1 + out2 + return out + +def IntraABS2(res,mode,T): #Group 4 + + tx, ty = res.shape + res[res>T] = T + + out1 = [] + out2 = [] + tab = IndexDCT(mode) + + for z in range(0,len(tab)): + x_a1 = [] + y_a1 = [] + x_a2 = [] + y_a2 = [] + + dx = tab[z][1] - tab[z][0] + dy = -dx + + for i in range (0,tx/8): + for j in range (0,ty/8): + x_a1.append(res[8*i+tab[z][0],8*j+tab[z][1]]) + y_a1.append(res[8*i+tab[z][0]+dx,8*j+tab[z][1]+dy]) + + x_a2.append(res[8*i+tab[z][1],8*j+tab[z][0]]) + y_a2.append(res[8*i+tab[z][1]+dy,8*j+tab[z][0]+dx]) + + x_a1 = np.asarray(x_a1) + y_a1 = np.asarray(y_a1) + count1 = np.bincount(x_a1 + (T+1)*y_a1,minlength=(T+1)*(T+1)) + out1 = np.append(out1,count1) + + x_a2 = np.asarray(x_a2) + y_a2 = np.asarray(y_a2) + count2 = np.bincount(x_a2 + (T+1)*y_a2,minlength=(T+1)*(T+1)) + out2 = np.append(out2,count2) + + out = out1 + out2 + return out + +# 1.1.2. Inter-block + +def InterABS1(res,mode,dx,dy,T): #Groups 7,8,9 + + tx, ty = res.shape + res[res>T] = T + + out1 = [] + out2 = [] + tab = IndexDCT(mode) + + range_i1 = [] + range_j1 = [] + range_i2 = [] + range_j2 = [] + + if dx>=0: + range_i1 = range(0,tx/8-dx) + range_j2 = range(0,ty/8-dx) + else: + range_i1 = range(-dx,tx/8) + range_j2 = range(-dx,ty/8) + + if dy>=0: + range_j1 = range(0,ty/8-dy) + range_i2 = range(0,tx/8-dy) + else: + range_j1 = range(-dy,ty/8) + range_i2 = range(-dy,tx/8) + + for z in range(0,len(tab)): + x_a1 = [] + y_a1 = [] + x_a2 = [] + y_a2 = [] + + for i in range_i1: + for j in range_j1: + x_a1.append(res[8*i+tab[z][0],8*j+tab[z][1]]) + y_a1.append(res[8*(i+dx)+tab[z][0],8*(j+dy)+tab[z][1]]) + + x_a1 = np.asarray(x_a1) + y_a1 = np.asarray(y_a1) + count1 = np.bincount(x_a1 + (T+1)*y_a1,minlength=(T+1)*(T+1)) + out1 = np.append(out1,count1) + + for i in range_i2: + for j in range_j2: + x_a2.append(res[8*i+tab[z][1],8*j+tab[z][0]]) + y_a2.append(res[8*(i+dy)+tab[z][1],8*(j+dx)+tab[z][0]]) + + x_a2 = np.asarray(x_a2) + y_a2 = np.asarray(y_a2) + count2 = np.bincount(x_a2 + (T+1)*y_a2,minlength=(T+1)*(T+1)) + out2 = np.append(out2,count2) + + out = out1 + out2 + return out + +def InterABS2(res,mode,T): #Group 10 + + tx, ty = res.shape + res[res>T] = T + + out1 = [] + out2 = [] + tab = IndexDCT(mode) + + range_i1 = range(0,tx/8) + range_j1 = range(0,ty/8-1) + range_i2 = range(0,tx/8-1) + range_j2 = range(0,ty/8) + + for z in range(0,len(tab)): + x_a1 = [] + y_a1 = [] + x_a2 = [] + y_a2 = [] + + dx = tab[z][1] - tab[z][0] + dy = -dx + 8 + + for i in range_i1: + for j in range_j1: + x_a1.append(res[8*i+tab[z][0],8*j+tab[z][1]]) + y_a1.append(res[8*i+tab[z][0]+dx,8*j+tab[z][1]+dy]) + + x_a1 = np.asarray(x_a1) + y_a1 = np.asarray(y_a1) + count1 = np.bincount(x_a1 + (T+1)*y_a1,minlength=(T+1)*(T+1)) + out1 = np.append(out1,count1) + + for i in range_i2: + for j in range_j2: + x_a2.append(res[8*i+tab[z][1],8*j+tab[z][0]]) + y_a2.append(res[8*i+tab[z][1]+dy,8*j+tab[z][0]+dx]) + + x_a2 = np.asarray(x_a2) + y_a2 = np.asarray(y_a2) + count2 = np.bincount(x_a2 + (T+1)*y_a2,minlength=(T+1)*(T+1)) + out2 = np.append(out2,count2) + + out = out1 + out2 + return out + +# 1.2. Based on differences of absolute values + +# 1.2.1. Intra-block + +def IntraOTH1(res1,res2,mode,dx,dy,T): #Groups 1,2,3,5,6 + + tx1, ty1 = res1.shape + tx2, ty2 = res2.shape + + res1[res1>T] = T + res2[res2>T] = T + res1[res1<-T] = -T + res2[res2<-T] = -T + + out = [] + tab = IndexDCT(mode) + + for z in range(0,len(tab)): + x_a1 = [] + y_a1 = [] + x_a2 = [] + y_a2 = [] + + for i in range (0,tx1/8): + for j in range (0,ty1/8): + x_a1.append(res1[8*i+tab[z][0],8*j+tab[z][1]]) + y_a1.append(res1[8*i+tab[z][0]+dx,8*j+tab[z][1]+dy]) + + x_a1 = np.asarray(x_a1) + T + y_a1 = np.asarray(y_a1) + T + count1 = np.bincount(x_a1 + (2*T+1)*y_a1,minlength=(2*T+1)*(2*T+1)) + + for i in range (0,tx2/8): + for j in range (0,ty2/8): + x_a2.append(res2[8*i+tab[z][1],8*j+tab[z][0]]) + y_a2.append(res2[8*i+tab[z][1]+dy,8*j+tab[z][0]+dx]) + + x_a2 = np.asarray(x_a2) + T + y_a2 = np.asarray(y_a2) + T + count2 = np.bincount(x_a2 + (2*T+1)*y_a2,minlength=(2*T+1)*(2*T+1)) + + tmp = np.reshape(count1+count2,(2*T+1,2*T+1)) + tmp = SignSym(tmp,T) + out = np.append(out,tmp) + + return out + +def IntraOTH2(res1,res2,mode,T): #Group 4 + + tx1, ty1 = res1.shape + tx2, ty2 = res2.shape + + res1[res1>T] = T + res2[res2>T] = T + res1[res1<-T] = -T + res2[res2<-T] = -T + + out = [] + tab = IndexDCT(mode) + + for z in range(0,len(tab)): + x_a1 = [] + y_a1 = [] + x_a2 = [] + y_a2 = [] + + dx = tab[z][1] - tab[z][0] + dy = -dx + + for i in range (0,tx1/8): + for j in range (0,ty1/8): + x_a1.append(res1[8*i+tab[z][0],8*j+tab[z][1]]) + y_a1.append(res1[8*i+tab[z][0]+dx,8*j+tab[z][1]+dy]) + + x_a1 = np.asarray(x_a1) + T + y_a1 = np.asarray(y_a1) + T + count1 = np.bincount(x_a1 + (2*T+1)*y_a1,minlength=(2*T+1)*(2*T+1)) + + for i in range (0,tx2/8): + for j in range (0,ty2/8): + x_a2.append(res2[8*i+tab[z][1],8*j+tab[z][0]]) + y_a2.append(res2[8*i+tab[z][1]+dy,8*j+tab[z][0]+dx]) + + x_a2 = np.asarray(x_a2) + T + y_a2 = np.asarray(y_a2) + T + count2 = np.bincount(x_a2 + (2*T+1)*y_a2,minlength=(2*T+1)*(2*T+1)) + + tmp = np.reshape(count1+count2,(2*T+1,2*T+1)) + tmp = SignSym(tmp,T) + out = np.append(out,tmp) + + return out + +def InterOTH1(res1,res2,mode,dx,dy,T): #Groups 7,8,9 + + tx1, ty1 = res1.shape + tx2, ty2 = res2.shape + + res1[res1>T] = T + res2[res2>T] = T + res1[res1<-T] = -T + res2[res2<-T] = -T + + out = [] + tab = IndexDCT(mode) + + range_i1 = [] + range_j1 = [] + range_i2 = [] + range_j2 = [] + + if dx>=0: + range_i1 = range(0,tx1/8-dx) + range_j2 = range(0,ty2/8-dx) + else: + range_i1 = range(-dx,tx1/8) + range_j2 = range(-dx,ty2/8) + + if dy>=0: + range_j1 = range(0,ty1/8-dy) + range_i2 = range(0,tx2/8-dy) + else: + range_j1 = range(-dy,ty1/8) + range_i2 = range(-dy,tx2/8) + + for z in range(0,len(tab)): + x_a1 = [] + y_a1 = [] + x_a2 = [] + y_a2 = [] + + for i in range_i1: + for j in range_j1: + x_a1.append(res1[8*i+tab[z][0],8*j+tab[z][1]]) + y_a1.append(res1[8*(i+dx)+tab[z][0],8*(j+dy)+tab[z][1]]) + + x_a1 = np.asarray(x_a1) + T + y_a1 = np.asarray(y_a1) + T + count1 = np.bincount(x_a1 + (2*T+1)*y_a1,minlength=(2*T+1)*(2*T+1)) + + for i in range_i2: + for j in range_j2: + x_a2.append(res2[8*i+tab[z][1],8*j+tab[z][0]]) + y_a2.append(res2[8*(i+dy)+tab[z][1],8*(j+dx)+tab[z][0]]) + + x_a2 = np.asarray(x_a2) + T + y_a2 = np.asarray(y_a2) + T + count2 = np.bincount(x_a2 + (2*T+1)*y_a2,minlength=(2*T+1)*(2*T+1)) + + tmp = np.reshape(count1+count2,(2*T+1,2*T+1)) + tmp = SignSym(tmp,T) + out = np.append(out,tmp) + + return out + +def InterOTH2(res1,res2,mode,T): #Group 10 + + tx1, ty1 = res1.shape + tx2, ty2 = res2.shape + + res1[res1>T] = T + res2[res2>T] = T + res1[res1<-T] = -T + res2[res2<-T] = -T + + out = [] + tab = IndexDCT(mode) + + range_i1 = range(0,tx1/8) + range_j1 = range(0,ty1/8-1) + range_i2 = range(0,tx2/8-1) + range_j2 = range(0,ty2/8) + + for z in range(0,len(tab)): + x_a1 = [] + y_a1 = [] + x_a2 = [] + y_a2 = [] + + dx = tab[z][1] - tab[z][0] + dy = -dx + 8 + + for i in range_i1: + for j in range_j1: + x_a1.append(res1[8*i+tab[z][0],8*j+tab[z][1]]) + y_a1.append(res1[8*i+tab[z][0]+dx,8*j+tab[z][1]+dy]) + + x_a1 = np.asarray(x_a1) + T + y_a1 = np.asarray(y_a1) + T + count1 = np.bincount(x_a1 + (2*T+1)*y_a1,minlength=(2*T+1)*(2*T+1)) + + for i in range_i2: + for j in range_j2: + x_a2.append(res2[8*i+tab[z][1],8*j+tab[z][0]]) + y_a2.append(res2[8*i+tab[z][1]+dy,8*j+tab[z][0]+dx]) + + x_a2 = np.asarray(x_a2) + T + y_a2 = np.asarray(y_a2) + T + count2 = np.bincount(x_a2 + (2*T+1)*y_a2,minlength=(2*T+1)*(2*T+1)) + + tmp = np.reshape(count1+count2,(2*T+1,2*T+1)) + tmp = SignSym(tmp,T) + out = np.append(out,tmp) + + return out + +# 2. Integral components +# Same distinction as before + +# 2.1. Based on absolute values + +# 2.1.1 Intra-block (Group 1 - Partial) + +def Extract_Integral_Intra_ABS(res,dx,dy,T): + + tx, ty = res.shape + res[res>T] = T + + out = np.zeros((T+1)*(T+1)) + + for x in range (0,8): + for y in range (0,8): + x_a = [] + y_a = [] + + for i in range (0,tx/8): + for j in range (0,ty/8): + if(8*i+x+dx<tx and 8*j+y+dy<ty): + x_a.append(res[8*i+x,8*j+y]) + y_a.append(res[8*i+x+dx,8*j+y+dy]) + + x_a = np.asarray(x_a) + y_a = np.asarray(y_a) + count = np.bincount(x_a + (T+1)*y_a,minlength=(T+1)*(T+1)) + + out = out + count + + return out + +# 2.1.2. Inter-block (Group 1 - Partial) + +def Extract_Integral_Inter_ABS(res,dx,dy,T): + + tx, ty = res.shape + res[res>T] = T + + out = np.zeros((T+1)*(T+1)) + + range_i = [] + range_j = [] + + if dx>=0: + range_i = range(0,tx/8-dx) + else: + range_i = range(-dx,tx/8) + + if dy>=0: + range_j = range(0,ty/8-dy) + else: + range_j = range(-dy,ty/8) + + for x in range (0,8): + for y in range (0,8): + x_a = [] + y_a = [] + + for i in range_i: + for j in range_j: + x_a.append(res[8*i+x,8*j+y]) + y_a.append(res[8*(i+dx)+x,8*(j+dy)+y]) + + x_a = np.asarray(x_a) + y_a = np.asarray(y_a) + count = np.bincount(x_a + (T+1)*y_a,minlength=(T+1)*(T+1)) + + out = out + count + + return out + +# 2.2. Based on differences of absolute values + +# 2.2.1. Intra-block (Group 2) + +def Extract_Integral_Intra_OTH(res,dx,dy,T): + + tx, ty = res.shape + res[res>T] = T + res[res<-T] = -T + + out = np.zeros(int(0.5*(2*T+1)*(2*T+1)+0.5)) + + for x in range (0,8): + for y in range (0,8): + x_a = [] + y_a = [] + + for i in range (0,tx/8): + for j in range (0,ty/8): + if(8*i+x+dx<tx and 8*j+y+dy<ty): + x_a.append(res[8*i+x,8*j+y]) + y_a.append(res[8*i+x+dx,8*j+y+dy]) + + x_a = np.asarray(x_a) + T + y_a = np.asarray(y_a) + T + count = np.bincount(x_a + (2*T+1)*y_a,minlength=(2*T+1)*(2*T+1)) + count = np.reshape(count,(2*T+1,2*T+1)) + count_sym = SignSym(count,T) + + out = out + count_sym + + return out + +# 2.2.2. Inter-block (Group 3) + +def Extract_Integral_Inter_OTH(res,dx,dy,T): + + tx, ty = res.shape + res[res>T] = T + res[res<-T] = -T + + out = np.zeros(int(0.5*(2*T+1)*(2*T+1)+0.5)) + + range_i = [] + range_j = [] + + if dx>=0: + range_i = range(0,tx/8-dx) + else: + range_i = range(-dx,tx/8) + + if dy>=0: + range_j = range(0,ty/8-dy) + else: + range_j = range(-dy,ty/8) + + for x in range (0,8): + for y in range (0,8): + x_a = [] + y_a = [] + + for i in range_i: + for j in range_j: + x_a.append(res[8*i+x,8*j+y]) + y_a.append(res[8*(i+dx)+x,8*(j+dy)+y]) + + x_a = np.asarray(x_a) + T + y_a = np.asarray(y_a) + T + count = np.bincount(x_a + (2*T+1)*y_a,minlength=(2*T+1)*(2*T+1)) + count = np.reshape(count,(2*T+1,2*T+1)) + count_sym = SignSym(count,T) + + out = out + count_sym + + return out + +# 3. Extracting all submodels +# Given an image (array), extract all submodels, resulting in a feature vector +# of dimensionality 11,255 + +def ExtractTotal(X): + + M,N = np.shape(X) + + # Shapes adjusted to the closest multiple of 8 + M8 = int(np.floor(M/8)*8) + N8 = int(np.floor(N/8)*8) + + #Residuals X_abs, X_h, X_v, X_d, X_ih, X_iv + X_abs = np.abs(X) + + X_h = X_abs[:,0:N-1] - X_abs[:,1:N] + X_v = X_abs[0:M-1,:] - X_abs[1:M,:] + X_d = X_abs[0:M-1,0:N-1] - X_abs[1:M,1:N] + + X_abs = X_abs[0:M8,0:N8] #Bords + X_h = X_h[0:M8,0:N8] #Bords + X_v = X_h[0:M8,0:N8] #Bords + X_d = X_h[0:M8,0:N8] #Bords + + X_ih = X_abs[:M8,0:N8-8] - X_abs[:M8,8:N8] + X_iv = X_abs[0:M8-8,:N8] - X_abs[8:M8,:N8] + + feat = {} #Features storing + + feat['Abs.h'] = IntraABS1(X_abs,'h',0,1,3) + feat['Abs.d1'] = IntraABS1(X_abs,'d1',1,1,3) + feat['Abs.d2'] = IntraABS1(X_abs,'d2',1,-1,3) + feat['Abs.oh'] = IntraABS1(X_abs,'oh',0,2,3) + feat['Abs.x'] = IntraABS2(X_abs,'x',3) + feat['Abs.od1'] = IntraABS1(X_abs,'od1',2,2,3) + feat['Abs.od2'] = IntraABS1(X_abs,'od2',2,-2,3) + feat['Abs.km'] = IntraABS1(X_abs,'km',-1,2,3) + feat['Abs.ih'] = InterABS1(X_abs,'ih',0,1,3) + feat['Abs.id'] = InterABS1(X_abs,'id',1,1,3) + feat['Abs.im'] = InterABS1(X_abs,'im',-1,1,3) + feat['Abs.ix'] = InterABS2(X_abs,'ix',3) + + feat['Hor.h'] = IntraOTH1(X_h,X_v,'h',0,1,2) + feat['Hor.d1'] = IntraOTH1(X_h,X_v,'d1',1,1,2) + feat['Hor.d2'] = IntraOTH1(X_h,X_v,'d2',1,-1,2) + feat['Hor.oh'] = IntraOTH1(X_h,X_v,'oh',0,2,2) + feat['Hor.x'] = IntraOTH2(X_h,X_v,'x',2) + feat['Hor.od1'] = IntraOTH1(X_h,X_v,'od1',2,2,2) + feat['Hor.od2'] = IntraOTH1(X_h,X_v,'od2',2,-2,2) + feat['Hor.km'] = IntraOTH1(X_h,X_v,'km',-1,2,2) + feat['Hor.ih'] = InterOTH1(X_h,X_v,'ih',0,1,2) + feat['Hor.id'] = InterOTH1(X_h,X_v,'id',1,1,2) + feat['Hor.im'] = InterOTH1(X_h,X_v,'im',-1,1,2) + feat['Hor.ix'] = InterOTH2(X_h,X_v,'ix',2) + + feat['Diag.h'] = IntraOTH1(X_d,X_d,'h',0,1,2) + feat['Diag.d1'] = IntraOTH1(X_d,X_d,'d1',1,1,2) + feat['Diag.d2'] = IntraOTH1(X_d,X_d,'d2',1,-1,2) + feat['Diag.oh'] = IntraOTH1(X_d,X_d,'oh',0,2,2) + feat['Diag.x'] = IntraOTH2(X_d,X_d,'x',2) + feat['Diag.od1'] = IntraOTH1(X_d,X_d,'od1',2,2,2) + feat['Diag.od2'] = IntraOTH1(X_d,X_d,'od2',2,-2,2) + feat['Diag.km'] = IntraOTH1(X_d,X_d,'km',-1,2,2) + feat['Diag.ih'] = InterOTH1(X_d,X_d,'ih',0,1,2) + feat['Diag.id'] = InterOTH1(X_d,X_d,'id',1,1,2) + feat['Diag.im'] = InterOTH1(X_d,X_d,'im',-1,1,2) + feat['Diag.ix'] = InterOTH2(X_d,X_d,'ix',2) + + feat['Ihor.h'] = IntraOTH1(X_ih,X_iv,'h',0,1,2) + feat['Ihor.d1'] = IntraOTH1(X_ih,X_iv,'d1',1,1,2) + feat['Ihor.d2'] = IntraOTH1(X_ih,X_iv,'d2',1,-1,2) + feat['Ihor.oh'] = IntraOTH1(X_ih,X_iv,'oh',0,2,2) + feat['Ihor.x'] = IntraOTH2(X_ih,X_iv,'x',2) + feat['Ihor.od1'] = IntraOTH1(X_ih,X_iv,'od1',2,2,2) + feat['Ihor.od2'] = IntraOTH1(X_ih,X_iv,'od2',2,-2,2) + feat['Ihor.km'] = IntraOTH1(X_ih,X_iv,'km',-1,2,2) + feat['Ihor.ih'] = InterOTH1(X_ih,X_iv,'ih',0,1,2) + feat['Ihor.id'] = InterOTH1(X_ih,X_iv,'id',1,1,2) + feat['Ihor.im'] = InterOTH1(X_ih,X_iv,'im',-1,1,2) + feat['Ihor.ix'] = InterOTH2(X_ih,X_iv,'ix',2) + + feat['Int.Abs.1'] = Extract_Integral_Intra_ABS(X_abs,0,1,5) + feat['Int.Abs.2'] = Extract_Integral_Intra_ABS(X_abs,1,1,5) + feat['Int.Abs.3'] = Extract_Integral_Intra_ABS(X_abs,1,-1,5) + feat['Int.Abs.4'] = Extract_Integral_Inter_ABS(X_abs,0,1,5) + feat['Int.Abs.5'] = Extract_Integral_Inter_ABS(X_abs,1,1,5) + + feat['Int.h.1'] = Extract_Integral_Intra_OTH(X_h,0,1,5) + feat['Int.h.2'] = Extract_Integral_Intra_OTH(X_h,1,0,5) + feat['Int.h.3'] = Extract_Integral_Intra_OTH(X_h,1,1,5) + feat['Int.h.4'] = Extract_Integral_Intra_OTH(X_h,1,-1,5) + feat['Int.h.5'] = Extract_Integral_Inter_OTH(X_h,0,1,5) + feat['Int.h.6'] = Extract_Integral_Inter_OTH(X_h,1,0,5) + feat['Int.h.7'] = Extract_Integral_Inter_OTH(X_h,1,1,5) + feat['Int.h.8'] = Extract_Integral_Inter_OTH(X_h,1,-1,5) + + feat['Int.v.1'] = Extract_Integral_Intra_OTH(X_v,0,1,5) + feat['Int.v.2'] = Extract_Integral_Intra_OTH(X_v,1,0,5) + feat['Int.v.3'] = Extract_Integral_Intra_OTH(X_v,1,1,5) + feat['Int.v.4'] = Extract_Integral_Intra_OTH(X_v,1,-1,5) + feat['Int.v.5'] = Extract_Integral_Inter_OTH(X_v,0,1,5) + feat['Int.v.6'] = Extract_Integral_Inter_OTH(X_v,1,0,5) + feat['Int.v.7'] = Extract_Integral_Inter_OTH(X_v,1,1,5) + feat['Int.v.8'] = Extract_Integral_Inter_OTH(X_v,1,-1,5) + + feat['Int.d.1'] = Extract_Integral_Intra_OTH(X_d,0,1,5) + feat['Int.d.2'] = Extract_Integral_Intra_OTH(X_d,1,0,5) + feat['Int.d.3'] = Extract_Integral_Intra_OTH(X_d,1,1,5) + feat['Int.d.4'] = Extract_Integral_Intra_OTH(X_d,1,-1,5) + feat['Int.d.5'] = Extract_Integral_Inter_OTH(X_d,0,1,5) + feat['Int.d.6'] = Extract_Integral_Inter_OTH(X_d,1,0,5) + feat['Int.d.7'] = Extract_Integral_Inter_OTH(X_d,1,1,5) + feat['Int.d.8'] = Extract_Integral_Inter_OTH(X_d,1,-1,5) + + feat['Int.ih.1'] = Extract_Integral_Intra_OTH(X_ih,0,1,5) + feat['Int.ih.2'] = Extract_Integral_Intra_OTH(X_ih,1,0,5) + feat['Int.ih.3'] = Extract_Integral_Intra_OTH(X_ih,1,1,5) + feat['Int.ih.4'] = Extract_Integral_Intra_OTH(X_ih,1,-1,5) + feat['Int.ih.5'] = Extract_Integral_Inter_OTH(X_ih,0,1,5) + feat['Int.ih.6'] = Extract_Integral_Inter_OTH(X_ih,1,0,5) + feat['Int.ih.7'] = Extract_Integral_Inter_OTH(X_ih,1,1,5) + feat['Int.ih.8'] = Extract_Integral_Inter_OTH(X_ih,1,-1,5) + + feat['Int.iv.1'] = Extract_Integral_Intra_OTH(X_iv,0,1,5) + feat['Int.iv.2'] = Extract_Integral_Intra_OTH(X_iv,1,0,5) + feat['Int.iv.3'] = Extract_Integral_Intra_OTH(X_iv,1,1,5) + feat['Int.iv.4'] = Extract_Integral_Intra_OTH(X_iv,1,-1,5) + feat['Int.iv.5'] = Extract_Integral_Inter_OTH(X_iv,0,1,5) + feat['Int.iv.6'] = Extract_Integral_Inter_OTH(X_iv,1,0,5) + feat['Int.iv.7'] = Extract_Integral_Inter_OTH(X_iv,1,1,5) + feat['Int.iv.8'] = Extract_Integral_Inter_OTH(X_iv,1,-1,5) + + out = [] + for k in feat.keys(): + out = np.append(out,feat[k]) + + return out + +# 4. Whole process +# Given an image (provide a path), returns the CC-JRM feature vector +# Its dimensionality is 22,510 : +# The extractor is applied to the image and its calibration (2 x 11,255) + +def JRM_Process(path, QF, folder_model, color): + pardir = os.path.dirname(path) + '/' + basename = os.path.basename(path) + + c_quant = np.asarray(np.load(folder_model+'c_quant_'+str(QF)+'.npy'),dtype=np.int64) + #im1 = jpeg(path) + #X1 = im1.coef_arrays[0] + X1 = np.asarray(np.load(path),dtype=np.int64) + o1 = ExtractTotal(X1) + + print 'shape o1:', np.shape(o1) + + #im_sp = imread(path) + im_sp = np.round(compute_spatial_from_jpeg(X1, c_quant),0).astype(np.uint8) + x,y = im_sp.shape + im_sp = im_sp[4:x-4,4:y-4] + im = Image.fromarray(im_sp) + im.save(pardir + 'tmp' + basename, 'JPEG', quality=QF) + + im2 = jpeg(pardir + 'tmp' + basename) + X2 = im2.coef_arrays[0] + o2 = ExtractTotal(X2) + + print 'shape o2:', np.shape(o2) + + os.remove(pardir + 'tmp' + basename) + out = np.hstack((o1,o2)) + + print 'shape out:', np.shape(out) + + return out + +# 5. Main (for multiprocessing) + +def main_JRM(image_dir,file_out): + global QF #Quality factor for calibration + QF = 75 + + list_im = sorted(glob.glob(image_dir +'/*.jpg')) + + nbCores = multiprocessing.cpu_count() + + tic = time.time() + pool = Pool(nbCores) + results = pool.map(JRM_Process, list_im) + pool.close() + pool.join() + toc = time.time() + print "Processing time: ", (toc-tic), " seconds - ",(toc-tic)/len(list_im), "sec/im" + + feat = h5py.File(file_out,'w') + feat.create_dataset('dataset_1', data=results) + feat.close() + + return 1 + + + + +""" +Implemention of JRM extraction class. +Theses features can be generated from a greyscale jpeg image. + +""" + +from extractor import Extractor as ParentExtractor + + +class Extractor(ParentExtractor) : + """ + Extract caracteristics from an image using GFR algorithm. + + self.conf should specify : + - imgs.QF : quality factor of cover images (corresponding to jpeg library, and most open-source softwares, such as imagemagic) + """ + + file_type = 'jpg' + + def run(self, file) : + result = JRM_Process(file, self.conf.QF, self.conf.folder_model, color=False) + return np.array(result) diff --git a/blind_steganalysts/classifier.py b/blind_steganalysts/classifier.py new file mode 100644 index 0000000000000000000000000000000000000000..236720fe93d4549dd5c9d904dbf1df45df1f7d64 --- /dev/null +++ b/blind_steganalysts/classifier.py @@ -0,0 +1,65 @@ +import os, sys +import numpy as np +import pickle + +# Dirty import of parent module +#sys.path.append(os.path.dirname(__file__) + '/..') +from tools import msg + + +class Classifier() : + """ + An abstract class containing methods an classifier should have. + """ + + def __init__(self, conf) : + """ + Creation of a new classifier that needs to train. + """ + self.trained = False # becomes true if the classifier is trained + self.conf = conf + + def train(self, cover, stego) : + """ + Trains the classifier. Supposes cover and stego have the same dimention + """ + msg('Started training the classifier on %d images' % np.size(cover, axis=0)) + self.trained = True + + + def guess_stego(self, features) : + """ + Guess if given features represents a stego image. + If features represents several images, an array of answers is returned. + """ + if not self.trained : + raise NameError('Using an untrained classifier.') + + # def test(self, cover, stego) : + # """ + # Tests the trained classifier on entries, return (success_cover, success_stego) + # """ + # msg('Testing the classifier', type='status') + # guessed_cover = self.guess_stego(cover) + # guessed_stego = self.guess_stego(stego) + # ok_cover = np.sum(1-guessed_cover) + # ok_stego = np.sum(guessed_stego) + # return ok_cover, ok_stego + + + # Je pense que cette fonction ne sert a rien (bas) + # def test(self, cover, stego) : + # """ + # Tests the trained classifier on entries, return (success_cover, success_stego) + # """ + # msg('Testing the classifier', type='status') + # PE_min , PE_true = self.test(cover,stego) + # return PE_min , PE_true + + + def save(self, file) : + """ + Saves the classifier in a file. + """ + with open(file, 'wb') as fout : + pickle.dump(self, fout) diff --git a/blind_steganalysts/extractor.py b/blind_steganalysts/extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..b5525e2d4762f9711b14d5e0c29f84ec99d5edea --- /dev/null +++ b/blind_steganalysts/extractor.py @@ -0,0 +1,90 @@ +import os, sys +import numpy as np +from multiprocessing import Pool, cpu_count + +# Dirty import of parent module +#sys.path.append(os.path.dirname(__file__) + '/..') +from tools import msg + + +def apply(map_arg) : + """Apply arguments to object + It permits to use a mapping to external threads. + """ + engine, file = map_arg + msg('Extracting from ' + file, 'extract') + return engine.run(file) + + +class Extractor() : + """ + An abstract class, every extractor should inherit from it. + + self.conf must contain : + - extract.nb_cores to specify max number of cores to use + """ + + #file_type = None # file extension the extractor process on + + def __init__(self, conf) : + self.conf = conf + + def run(self, file) : + """ + Runs the extraction on one file. Returns the feature vector. + + :param file: path to the file that must be read + + :return: features vector + :rtype: a numpy array of dimension 1 + """ + + raise NameError('This method is abstract') + + def run_set(self, files_list) : + """ + Runs the extraction on the list of specified files. Returns corresponding features as a matrix. + + :param file: paths to the files that must be read + + :return: features matrix + :rtype: a numpy array of dimension 2 + """ + pool = Pool(self.conf.nb_cores) + # dumps exports the methods to be compatible out of the context + files_list = [(self, x) for x in files_list] + results = pool.map(apply, files_list) + #print results[0] + results = np.array(results[:]) + #print results.shape + pool.close() + pool.join() + msg('Outputed a %s features matrix' % str(results.shape), 'debug') + return np.array(results) # each file is represented with a list + + +class ConcatExtractors(Extractor) : + """ + Build an extractor from a list of extractors. + + This new extractor outputs the concatenation of each extractors results. + + Attributes of an instance : + :param sub_extractors: the extractors to be concatenated + :type sub_extractors: list of Extractor + """ + + def __init__(self, extractors) : + """ + Init with a list of extractors. + """ + self.conf = extractors[0].conf + #self.file_type = extractors[0].file_type + self.sub_extractors = extractors + + def run(self, file) : + """ + Run the extraction for each sub_extractors. + """ + results = [sub.run(file) for sub in self.sub_extractors] + return np.concatenate(results) diff --git a/blind_steganalysts/job_classify.slurm b/blind_steganalysts/job_classify.slurm new file mode 100644 index 0000000000000000000000000000000000000000..05207ec61cc9ccd71ddcc9118f65cff8a96ca7bf --- /dev/null +++ b/blind_steganalysts/job_classify.slurm @@ -0,0 +1,17 @@ +#!/bin/bash +#SBATCH --nodes=1 +#SBATCH --job-name=DCTR_classify +#SBATCH --account=srp@cpu +##SBATCH --ntasks-per-node=4 +#SBATCH --cpus-per-task=4 +#SBATCH --mem=64G +#SBATCH --array=1-4 +#SBATCH --time=01:00:00 +#SBATCH --hint=nomultithread + +#module load TensorFlow/1.13.1-foss-2018b-Python-3.6.6 +module load tensorflow-gpu/py3/1.14 +#time python main_classifier.py --iteration_step=$SLURM_ARRAY_TASK_ID --features=GFR --path_folder='/home/bernasol/python3/DCTR_GFR/experience_0_2/' +#time python main_classifier.py --iteration_step=$SLURM_ARRAY_TASK_ID --features=GFR --path_folder='/home/bernasol/python3/DCTR_GFR/experience_xu_net_srnet_3/' --model='xu-net' +#time python main_classifier.py --iteration_step=$SLURM_ARRAY_TASK_ID --features=DCTR --path_folder='/gpfswork/rech/srp/commun/python3/tifs_protocol/experience_512_75_0_4_SGE/' --model='None' +time python main_classifier.py --iteration_step=$SLURM_ARRAY_TASK_ID --features=DCTR --path_folder='/gpfswork/rech/srp/commun/python3/tifs_protocol_efficientnet/experience_512_100_0_4_SGE_efnetB0s1_xunet_srnet/' --model='None' diff --git a/blind_steganalysts/job_features.slurm b/blind_steganalysts/job_features.slurm new file mode 100644 index 0000000000000000000000000000000000000000..0a9bc513a0e9369ba301a534ab95fd292ac75134 --- /dev/null +++ b/blind_steganalysts/job_features.slurm @@ -0,0 +1,39 @@ +#!/bin/bash +#SBATCH --job-name=JRM +#SBATCH --ntasks-per-node=32 +#SBATCH --ntasks=1 +#SBATCH --account=srp@cpu +#SBATCH --array=0-8 +#SBATCH --time=10:00:00 + +# module load tensorflow-gpu/py3/1.14 +# time python main_features.py --iteration_step=$SLURM_ARRAY_TASK_ID --stego_or_cover=stego --features=DCTR --path_folder_stego='/gpfswork/rech/srp/commun/python3/tifs_protocol_efficientnet/experience_512_100_0_4_SGE_efnetB0s1_xunet_srnet/' --path_folder_stego_0='/gpfswork/rech/srp/commun/JPEG_100_512/J_UNI_0_4_npy/' --path_folder_cover='/gpfswork/rech/srp/commun/JPEG_100_512/c_coeffs/' --path_save='/gpfswork/rech/srp/commun/python3/tifs_protocol_efficientnet/experience_512_100_0_4_SGE_efnetB0s1_xunet_srnet/DCTR_GFR/' --model='None' --QF=100 --permutation_files='../tifs_protocol/models/permutation_files.npy' + +# module load python/2.7.16 +# time python main_features.py --iteration_step=$SLURM_ARRAY_TASK_ID \ +# --stego_or_cover=stego \ +# --features=JRM \ +# --path_folder_stego='/gpfswork/rech/srp/commun/python3/tifs_protocol_efficientnet/experience_512_75_0_4_SGE_xunet/' \ +# --folder_model='/gpfswork/rech/srp/commun/python3/tifs_protocol_efficientnet/models/' \ +# --path_folder_stego_0='/gpfswork/rech/srp/commun/JPEG_75_512/J_UNI_0_4_npy/' \ +# --path_folder_cover='/gpfswork/rech/srp/commun/JPEG_75_512/c_coeffs/' \ +# --path_save='/gpfswork/rech/srp/commun/python3/tifs_protocol_efficientnet/experience_512_75_0_4_SGE_xunet/' \ +# --QF=75 \ +# --permutation_files='/gpfswork/rech/srp/commun/python3/tifs_protocol_efficientnet/models/permutation_files.npy' \ +# --nb_cores=32 + + +module load python/2.7.16 +time python main_features.py --iteration_step=$SLURM_ARRAY_TASK_ID \ + --stego_or_cover=stego \ + --features=JRM \ + --path_folder_stego='/gpfswork/rech/srp/commun/python3/tifs_protocol_efficientnet/experience_512_75_0_4_SGE_efnetB0s1_xunet_srnet/' \ + --folder_model='/gpfswork/rech/srp/commun/python3/tifs_protocol_efficientnet/models/' \ + --path_folder_stego_0='/gpfswork/rech/srp/commun/JPEG_75_512/J_UNI_0_4_npy/' \ + --path_folder_cover='/gpfswork/rech/srp/commun/JPEG_75_512/c_coeffs/' \ + --path_save='/gpfswork/rech/srp/commun/python3/tifs_protocol_efficientnet/experience_512_75_0_4_SGE_efnetB0s1_xunet_srnet/' \ + --QF=75 \ + --permutation_files='/gpfswork/rech/srp/commun/python3/tifs_protocol_efficientnet/models/permutation_files.npy' \ + --nb_cores=32 + + \ No newline at end of file diff --git a/blind_steganalysts/main_classifier.py b/blind_steganalysts/main_classifier.py new file mode 100644 index 0000000000000000000000000000000000000000..2b388d7abc5f7101084f0e4e491af472e4446321 --- /dev/null +++ b/blind_steganalysts/main_classifier.py @@ -0,0 +1,62 @@ +import numpy as np +import os, sys +import argparse + +from LCLSMR import Classifier + + +if __name__ == '__main__': + + argparser = argparse.ArgumentParser(sys.argv[0]) + + argparser.add_argument('--path_folder', type=str,default='/home/bernasol/python3/DCTR_GFR/experience_0_2/') + argparser.add_argument('--iteration_step', type=int) + + argparser.add_argument('--features',type=str) #DCTR, GFR or JRM + argparser.add_argument('--model',type=str) #xu-net or srnet + + argparser.add_argument('--train_size', type=int, default=4000) + argparser.add_argument('--valid_size', type=int, default=1000) + argparser.add_argument('--test_size', type=int, default=5000) + + #argparser.add_argument('--model',type=str,default='xu_net') + + conf = argparser.parse_args() + conf.path_proj_cover_file = conf.path_folder+'proj_cover_'+conf.features + conf.path_proj_stego_file = conf.path_folder+'proj_data_adv_'+str(conf.iteration_step)+'_'+conf.features + + path_cover = conf.path_folder+'cover_'+conf.features+'.npy' + if(conf.model=='None'): + paths_stego = [conf.path_folder+'data_adv_'+str(i)+'_'+conf.features+'.npy' for i in range(conf.iteration_step+1)] + else: + paths_stego = [conf.path_folder+'data_adv_0_'+conf.features+'.npy',conf.path_folder+'data_adv_0_'+conf.features+'.npy'] + for i in range(1,conf.iteration_step+1): + paths_stego += [conf.path_folder+'data_adv_xu_net_'+str(i)+'_'+conf.features+'.npy', conf.path_folder+'data_adv_srnet_'+str(i)+'_'+conf.features+'.npy'] + cover, stegos = np.load(path_cover), np.array([np.load(path_stego) for path_stego in paths_stego]) + if conf.iteration_step==0: + index = np.zeros(conf.train_size+conf.valid_size+conf.test_size,dtype=int) + else: + index = np.load(conf.path_folder+'data_train_'+str(conf.iteration_step)+'/index.npy') + stego = np.array([stegos[x,i] for i,x in enumerate(index)]) + + cover_tr, cover_va, cover_te = \ + cover[:conf.train_size], \ + cover[conf.train_size:conf.train_size+conf.valid_size], \ + cover[conf.train_size+conf.valid_size:conf.train_size+conf.valid_size+conf.test_size] + stego_tr, stego_va, stego_te = \ + stego[:conf.train_size], \ + stego[conf.train_size:conf.train_size+conf.valid_size], \ + stego[conf.train_size+conf.valid_size:conf.train_size+conf.valid_size+conf.test_size] + + lin_class = Classifier(conf) + PE_train , PINV , th = lin_class.train(np.concatenate((cover_tr,cover_va),axis=0), \ + np.concatenate((stego_tr,stego_va),axis=0)) + TFP_tr , TFN_tr , TTN_tr , TTP_tr , PE_min , PE_true_tr = lin_class.test(cover_tr, stego_tr, conf) + TFP_va , TFN_va , TTN_va , TTP_va , PE_min , PE_true_va = lin_class.test(cover_va, stego_va, conf) + TFP_te , TFN_te , TTN_te , TTP_te , PE_min , PE_true_te = lin_class.test(cover_te, stego_te, conf) + + np.save(conf.path_folder+'train_'+str(conf.iteration_step)+'_'+conf.features+'.npy', np.array([PE_true_tr,PE_true_va,PE_true_te])) + + print(PE_true_tr,PE_true_va,PE_true_te) + + diff --git a/blind_steganalysts/main_features.py b/blind_steganalysts/main_features.py new file mode 100644 index 0000000000000000000000000000000000000000..1fc42fdbddd7bf6d46de02a35032f9f61be7e7f2 --- /dev/null +++ b/blind_steganalysts/main_features.py @@ -0,0 +1,233 @@ +import os, sys +import numpy as np + +import DCTR +import GFR +import JRM + +import multiprocessing +import argparse + + +from extractor import ConcatExtractors + +# List available extractors + + +def save(file, features) : + """Saves features in a file.""" + np.save(file, features) + + +def load(file) : + """Returns features saved in a file""" + ret = np.load(file) + msg('Opened ' + file, 'debug') + return ret + + +def join(features_list) : + """ + Given a list of features representing sets of images, returns a feature matrix representing every image. + + :param features_list: list of numpy arrays + :rtype: a numpy array (its number of lines is the sum of line counts of features_list's elements) + """ + return np.concatenate(features_list, axis=0) # only has to take every lines + + +def count_features(feat_files) : + """ + Counts total number of features contained in provided files. + + :param feat_files: a list of files to open + """ + nb_features = 0 + for file in feat_files : + feats = load(file) + nb_features += np.size(feats, axis=0) + return nb_features + +def process(files, sizes, feat_dir, prefix='cover',seed=0) : + """ + Process for cover or stego : read files and separate them in groups reparted with sizes specified. + + :files: files we load features from + :sizes: list of the number of features needed per files + the first item corresponds to training, remaining to tests + :prefix: string to start the outputed filename with + """ + current_file = 0 # index of last loaded file + current_pool = load(files[0]) # features from last loaded file that were not used yet + + + print('sizes:' , sizes) + sum_size = sizes[0]+sizes[1] + print('sum:' , sum_size) + #for i in range(len(sizes)) : + # Fills this matrix with correct number of features + #print current_pool + out_feats = np.zeros((0, np.size(current_pool, axis=1))) + while np.size(out_feats, axis=0) < sum_size : + # Number of features we want to transfer from current file + out_feats = np.concatenate([out_feats, current_pool], axis=0) + + # Loads a new file if necessary + if current_file + 1 < len(files) : + current_file += 1 + current_pool = load(files[current_file]) + + #seed = 1000 #np.random.randint(10000) + im_idx = range(sum_size) + if seed!=0: + np.random.seed(seed) + np.random.shuffle(im_idx) + + save('%s/%s_train' % (feat_dir, prefix), out_feats[im_idx[:sizes[0]],:]) + save('%s/%s_test_%d' % (feat_dir, prefix, 0), out_feats[im_idx[sizes[0]:],:]) + + + +def separate_features(cover_files, stego_files, feat_dir, nb_learn=None, ratio=0.5, learn_split=1,reproducible=1) : + """ + Keeps `nb_learn` images from cover and stego for training. + The remaining is used for tests, separated in `learn_split` files. + + The goal of this function is to avoid loading all features in the ram simultaneously. + + :param cover_files: list of files containing cover features + :param stego_files: list of files containing stego features + :param feat_dir: directory in which provided features will be saved + :param nb_learn: number of stego/cover images used for training + :param ratio: if `nb_learn` isn't specified, the ratio to keep + :param learn_split: number of splited tests to generate + """ + + msg('Counting features before concatenation', 'concat') + nb_tot_cover = count_features(cover_files) + nb_tot_stego = count_features(stego_files) + nb_both = min(nb_tot_cover, nb_tot_stego) + msg('Found %d cover and %d stego' % (nb_tot_cover, nb_tot_stego), 'concat') + # Auto-set of nb_learn + if nb_learn is None or nb_learn >= nb_both : + nb_learn = int(nb_both * ratio) + + # cover_files.sort() # ensure files will be used in same order + # stego_files.sort() # ensure files will be used in same order + + # seed = 1000 #np.random.randint(10000) + # np.random.seed(seed) + # np.random.shuffle(cover_files) + # np.random.seed(seed) + # np.random.shuffle(stego_files) + + if reproducible: + seed = 1 + else: + seed = np.random.randint(10000) # = 0 + # Process for cover + msg('Concatenates cover features', 'concat') + sizes = [nb_learn] + sizes += partition(nb_tot_cover - nb_learn, learn_split) + process(cover_files, sizes, feat_dir, 'cover',seed) + + # Process for stego + msg('Concatenates stego features', 'concat') + sizes = [nb_learn] + sizes += partition(nb_tot_stego - nb_learn, learn_split) + process(stego_files, sizes, feat_dir, 'stego',seed) + + msg('Concatenation is over', 'concat') + + + + +#files = np.load('../SRNet/permutation_files.npy') +#files = np.array([x[:-4]+'.npy' for x in files]) + + + +if __name__ == '__main__': + + argparser = argparse.ArgumentParser(sys.argv[0]) + + argparser.add_argument('--nb_cores', type=int) + + argparser.add_argument('--iteration_step', type=int, default=0) + argparser.add_argument('--stego_or_cover', type=str, default='stego') + argparser.add_argument('--folder_model', type=str) + argparser.add_argument('--features',type=str) #DCTR, GFR or JRM + + argparser.add_argument('--path_folder_stego', type=str) + argparser.add_argument('--path_folder_stego_0', type=str) + argparser.add_argument('--path_folder_cover', type=str) + argparser.add_argument('--path_save', type=str) + + argparser.add_argument('--train_size', type=int, default=4000) + argparser.add_argument('--valid_size', type=int, default=1000) + argparser.add_argument('--test_size', type=int, default=10000) + argparser.add_argument('--permutation_files', type=str) + + argparser.add_argument('--QF', type=int) + + conf = argparser.parse_args() + + conf.color = False + #conf.nb_cores = multiprocessing.cpu_count() + #conf.nb_cores = len(os.sched_getaffinity(0)) + #conf.nb_cores = 16 + + print(conf.nb_cores) + + files = np.load(conf.permutation_files) + files = np.array([x[:-4]+str('.npy') for x in files]) + + #files_tr = files[:conf.train_size] + #files_te = files[conf.train_size+conf.valid_size:conf.train_size+conf.valid_size+conf.test_size] + + + # extractors = [DCTR.Extractor(conf), GFR.Extractor(conf)] + # extract = ConcatExtractors(extractors) + # extract.run(files) + + if conf.features=='DCTR': + extract = DCTR.Extractor(conf) + elif conf.features=='GFR': + extract = GFR.Extractor(conf) + elif conf.features=='JRM': + extract = JRM.Extractor(conf) + + if conf.stego_or_cover=='stego': + + if(conf.iteration_step>0): + path_stego = conf.path_folder_stego + 'data_adv_'+str(conf.iteration_step) +'/adv_final/' + dir_log = conf.path_save + 'data_adv_' + str(conf.iteration_step) + '_'+conf.features+'.npy' + else: + path_stego = conf.path_folder_stego_0 + dir_log = conf.path_save + 'data_adv_0_'+conf.features+'.npy' + + + path_stego_0 = conf.path_folder_stego_0 + f = os.listdir(path_stego) + files_stego = np.array([path_stego+x if x in f else path_stego_0+x for x in files ]) + features = extract.run_set(files_stego) + + if not os.path.exists(conf.path_save): + os.makedirs(conf.path_save) + + np.save(dir_log, features) + + else: + path_cover = conf.path_folder_cover + files_cover = np.array([path_cover+x for x in files]) + features = extract.run_set(files_cover) + + if not os.path.exists(conf.path_save): + os.makedirs(conf.path_save) + + np.save(conf.path_save + 'cover_'+conf.features+'.npy', features) + + + + + diff --git a/blind_steganalysts/tools.py b/blind_steganalysts/tools.py new file mode 100644 index 0000000000000000000000000000000000000000..cc65110cbc200b81cbcbd9429757330351f800ab --- /dev/null +++ b/blind_steganalysts/tools.py @@ -0,0 +1,28 @@ +import time +from sys import stdout + + +def msg(msg, type='info', show_time=True, spacing=10, format_only=False) : + """ + Prompts a message in the standart output. + + :param msg: content of the message + :param type: type of the message (eg : error, info, warning) + :param show_time: whether time should be prompted or not + :param spacing: width of the flag column + :param format_only: wether the message sould be printed or not + + :return: builded message + """ + spaces = (spacing - len(type) - 2) * ' ' + head = '[%s]%s ' % (type, spaces) + if show_time : + head = time.strftime('%d/%m %H:%M:%S ') + head + + msg = head + msg # adds head to first line + msg = msg.replace('\n', '\n' + head) # adds head to other lines + + if not format_only : + print(msg) + stdout.flush() + return msg diff --git a/data_loader.py b/data_loader.py index 20ce5e48c2bad845a8a9be570aed97190fbc72a1..9dcdb2a51e1481a080ec6ea1dae2e1bcda2315bd 100644 --- a/data_loader.py +++ b/data_loader.py @@ -31,7 +31,7 @@ def get_train_transforms(): A.VerticalFlip(p=0.5), #A.ToGray(always_apply=True, p=1.0), #A.Resize(height=512, width=512, p=1.0), - ToTensorV2(p=1.0), + ToTensorV2(p=1.0), # Si le V2 n'exite pas chez toi, tu peux utiliser ToTensor également ], p=1.0) def get_valid_transforms(): @@ -48,7 +48,6 @@ def onehot(size, target): return vec - def onehot(size, target): vec = torch.zeros(size, dtype=torch.float32) vec[target] = 1. @@ -56,33 +55,38 @@ def onehot(size, target): class DatasetRetriever(Dataset): - def __init__(self, image_names, params, indexs_db, kinds=None, labels=None, transforms=None, pair_training=False, spatial=False): + def __init__(self, image_names, folder_model, QF, emb_rate, image_size, \ + data_dir_cover, data_dir_stego_0, cost_dir, data_dir_prot, \ + H1_filter, L1_filter, L2_filter, indexs_db, train_on_cost_map=False, \ + labels=None, transforms=None, pair_training=False, spatial=False): + super().__init__() - self.kinds = kinds self.image_names = image_names self.indexs_db = indexs_db self.labels = labels self.transforms = transforms - self.c_quant = np.load(params.folder_model + 'c_quant_'+str(params.QF)+'.npy') + self.c_quant = np.load(folder_model + 'c_quant_'+str(QF)+'.npy') self.WET_COST = 10 ** 13 - self.emb_rate = params.emb_rate - self.im_size = params.image_size - self.cover_path = params.data_dir_cover - self.cost_dir = params.cost_dir + self.emb_rate = emb_rate + self.im_size = image_size + self.cover_path = data_dir_cover + self.data_dir_stego_0 = data_dir_stego_0 + self.cost_dir = cost_dir self.pair_training=pair_training - self.data_dir_prot = params.data_dir_prot + self.data_dir_prot = data_dir_prot self.spatial=spatial + self.train_on_cost_map=train_on_cost_map if self.spatial: - if(params.H1_filter is None): + if(H1_filter is None): self.H1_filter = 4 * np.array([[-0.25, 0.5, -0.25], [0.5, -1, 0.5], [-0.25, 0.5, -0.25]]) self.L1_filter = (1.0/9.0)*np.ones((3, 3)) self.L2_filter = (1.0/225.0)*np.ones((15, 15)) else: - self.H1_filter = np.load(params.H1_filter).reshape((3,3)) - self.L1_filter = np.load(params.L1_filter).reshape((3,3)) - self.L2_filter = np.load(params.L2_filter).reshape((15,15)) + self.H1_filter = np.load(H1_filter).reshape((3,3)) + self.L1_filter = np.load(L1_filter).reshape((3,3)) + self.L2_filter = np.load(L2_filter).reshape((15,15)) def __getitem__(self, index: int): @@ -99,28 +103,37 @@ class DatasetRetriever(Dataset): message_length = nz_AC*self.emb_rate index_db = self.indexs_db[index] - if(self.spatial): - rho = HILL(cover, self.H1_filter, self.L1_filter, self.L2_filter) - else: + if self.train_on_cost_map: # Load the cost map and generate new stego + if(self.spatial): + rho = HILL(cover, self.H1_filter, self.L1_filter, self.L2_filter) + else: + try: + cost_dir = self.data_dir_prot + 'data_adv_'+ str(index_db) +'/adv_cost/' + rho = np.load(cost_dir+image_name[:-3]+'npy') + except: + cost_dir = self.cost_dir + rho = np.load(cost_dir+image_name[:-3]+'npy') + if(rho.shape==(self.im_size, self.im_size)): + rho = np.reshape(rho,(1,self.im_size, self.im_size)) + rho = np.concatenate((np.copy(rho), np.zeros_like(rho), np.copy(rho)),axis=0) + if(self.spatial): + rho[0,cover <= 0] = self.WET_COST + rho[2,cover >= 255] = self.WET_COST + if not self.spatial: + rho[0,cover < -1023] = self.WET_COST + rho[2,cover > 1023] = self.WET_COST + + p = compute_proba(rho, message_length) + u = np.random.uniform(0,1,(3, self.im_size, self.im_size)) + stego = (cover + np.argmax(-np.log(-np.log(u))+np.log(p+1e-30),axis=0) - 1).astype(np.float32) + + else: # Load the stego try: - cost_dir = self.data_dir_prot + 'data_adv_'+ str(index_db) +'/adv_cost/' - rho = np.load(cost_dir+image_name[:-3]+'npy') + dir_stego = self.data_dir_prot + 'data_adv_'+ str(index_db) +'/adv_final/' + stego = np.load(dir_stego+image_name[:-3]+'npy') except: - cost_dir = self.cost_dir - rho = np.load(cost_dir+image_name[:-3]+'npy') - if(rho.shape==(self.im_size, self.im_size)): - rho = np.reshape(rho,(1,self.im_size, self.im_size)) - rho = np.concatenate((np.copy(rho), np.zeros_like(rho), np.copy(rho)),axis=0) - #if(self.spatial): - # rho[0,cover <= 0] = self.WET_COST - # rho[2,cover >= 255] = self.WET_COST - if not self.spatial: - rho[0,cover < -1023] = self.WET_COST - rho[2,cover > 1023] = self.WET_COST - - p = compute_proba(rho, message_length) - u = np.random.uniform(0,1,(3, self.im_size, self.im_size)) - stego = (cover + np.argmax(-np.log(-np.log(u))+np.log(p+1e-30),axis=0) - 1).astype(np.float32) + dir_stego = self.data_dir_stego_0 + stego = np.load(dir_stego+image_name[:-3]+'npy') if not self.spatial: cover = compute_spatial_from_jpeg(cover, self.c_quant)/255 @@ -143,7 +156,7 @@ class DatasetRetriever(Dataset): return((cover,stego), (target_cover, target_stego)) else: - kind, image_name, label = self.kinds[index], self.image_names[index], self.labels[index] + image_name, label = self.image_names[index], self.labels[index] if(self.spatial): cover = np.asarray(Image.open(path.join(self.cover_path, image_name[:-3]+'pgm')),dtype=np.float32) message_length= cover.size*self.emb_rate @@ -152,32 +165,41 @@ class DatasetRetriever(Dataset): nz_AC = np.sum(cover!=0)-np.sum(cover[::8,::8]!=0) message_length = nz_AC*self.emb_rate - if kind =='Cover': + if label == 0: image = cover - elif kind == 'Stego': - + elif label == 1: index_db = self.indexs_db[index] - if(self.spatial): - rho = HILL(cover, self.H1_filter, self.L1_filter, self.L2_filter) - else: + + if self.train_on_cost_map: # Load the cost map and generate new stego + if(self.spatial): + rho = HILL(cover, self.H1_filter, self.L1_filter, self.L2_filter) + else: + try: + cost_dir = self.data_dir_prot + 'data_adv_'+ str(index_db) +'/adv_cost/' + rho = np.load(cost_dir+image_name[:-3]+'npy') + except: + cost_dir = self.cost_dir + rho = np.load(cost_dir+image_name[:-3]+'npy') + if(rho.shape==(self.im_size, self.im_size)): + rho = np.reshape(rho,(1,self.im_size, self.im_size)) + rho = np.concatenate((np.copy(rho), np.zeros_like(rho), np.copy(rho)),axis=0) + if(self.spatial): + rho[0,cover <= 0] = self.WET_COST + rho[2,cover >= 255] = self.WET_COST + if not self.spatial: + rho[0,cover < -1023] = self.WET_COST + rho[2,cover > 1023] = self.WET_COST + p = compute_proba(rho, message_length) + u = np.random.uniform(0,1,(3, self.im_size, self.im_size)) + stego = cover + np.argmax(-np.log(-np.log(u))+np.log(p+1e-30),axis=0) - 1 + else: # Load the stego try: - cost_dir = self.data_dir_prot + 'data_adv_'+ str(index_db) +'/adv_cost/' - rho = np.load(cost_dir+image_name[:-3]+'npy') + dir_stego = self.data_dir_prot + 'data_adv_'+ str(index_db) +'/adv_final/' + stego = np.load(dir_stego+image_name[:-3]+'npy') except: - cost_dir = self.cost_dir - rho = np.load(cost_dir+image_name[:-3]+'npy') - if(rho.shape==(self.im_size, self.im_size)): - rho = np.reshape(rho,(1,self.im_size, self.im_size)) - rho = np.concatenate((np.copy(rho), np.zeros_like(rho), np.copy(rho)),axis=0) - #if(self.spatial): - # rho[0,cover <= 0] = self.WET_COST - # rho[2,cover >= 255] = self.WET_COST - if not self.spatial: - rho[0,cover < -1023] = self.WET_COST - rho[2,cover > 1023] = self.WET_COST - p = compute_proba(rho, message_length) - u = np.random.uniform(0,1,(3, self.im_size, self.im_size)) - stego = cover + np.argmax(-np.log(-np.log(u))+np.log(p+1e-30),axis=0) - 1 + dir_stego = self.data_dir_stego_0 + stego = np.load(dir_stego+image_name[:-3]+'npy') + image = stego image = image.astype(np.float32) @@ -230,18 +252,19 @@ class LabelSmoothing(nn.Module): return torch.nn.functional.cross_entropy(x, target) -def load_dataset(params, pair_training=False): +def load_dataset(iteration_step, permutation_files, train_size, valid_size, test_size, \ + data_dir_prot, pair_training): dataset = [] - im_list = np.load(params.permutation_files) - folds = np.zeros(params.train_size + params.valid_size + params.test_size,dtype=np.int8) - folds[params.train_size:]+=1 - folds[params.train_size+params.valid_size:]+=1 + im_list = np.load(permutation_files) + folds = np.zeros(train_size + valid_size + test_size,dtype=np.int8) + folds[train_size:]+=1 + folds[train_size+valid_size:]+=1 - if(params.iteration_step>0): - indexs_db = np.load(params.data_dir_prot +'data_train_'+str(params.iteration_step)+'/index.npy') + if(iteration_step>0): + indexs_db = np.load(data_dir_prot +'data_train_'+str(iteration_step)+'/index.npy') else: - indexs_db = np.zeros(params.train_size+params.valid_size + params.test_size, dtype=np.int8) + indexs_db = np.zeros(train_size+valid_size + test_size, dtype=np.int8) if pair_training: for im,fold,ind in zip(im_list, folds, indexs_db): @@ -252,14 +275,13 @@ def load_dataset(params, pair_training=False): }) else: - for label, kind in enumerate(['Cover', 'Stego']): + for label in range(2): # 0 for cover and 1 for stego for im,fold,ind in zip(im_list, folds,indexs_db): - if(kind=='Cover'): + if(label==0): index = -1 else: index=ind dataset.append({ - 'kind': kind, 'image_name': im, 'label': label, 'fold': fold, diff --git a/main.py b/main.py index 9d56d5c18afcd3aefb18d06990bada81cbe28b65..0a6bee3b6c442d9ac81c37e7475b8ccbcfee60a9 100644 --- a/main.py +++ b/main.py @@ -9,97 +9,27 @@ import sys from shutil import copy import os import glob -from time import time, sleep -from datetime import date -from generate_train_db import generate_train_db - -def command_main(params, iteration_step, mode, model=None): - - if (mode=='gen'): - com = ' --iteration_step='+str(iteration_step) - com+= ' --data_dir_prot='+params.data_dir_prot - com+= ' --data_dir_cover='+params.data_dir_cover - com+= ' --image_size='+str(params.image_size) - com+= ' --QF=' + str(params.QF) - com+= ' --folder_model='+params.folder_model - com+= ' --permutation_files=' + params.permutation_files - com+= ' --version_eff=' + params.version_eff - com+= ' --stride=' + str(params.stride) - com+= ' --n_loops=' + str(params.n_loops) - - elif(mode=='classif'): - com = ' --batch_size_eval='+str(params.training_dictionnary[model]['batch_size_eval']) - com+= ' --data_dir_stego_0='+params.data_dir_stego_0 - com+= ' --train_size='+str(params.train_size) - com+= ' --valid_size='+str(params.valid_size) - com+= ' --test_size='+str(params.test_size) - com+= ' --model='+model - - elif (mode=='attack'): - com = ' --attack=' + str(params.attack) - com+= ' --attack_last=' + str(params.attack_last) - com+= ' --emb_rate=' + str(params.emb_rate) - com+= ' --iteration_step=' + str(iteration_step) - com+= ' --cost_dir=' + str(params.cost_dir) - com+= ' --idx_start=$SLURM_ARRAY_TASK_ID' - com+= ' --batch_adv=' + str(params.batch_adv) - com+= ' --n_iter_max_backpack=' + str(params.n_iter_max_backpack) - com+= ' --N_samples=' + str(params.N_samples) - com+= ' --tau_0=' + str(params.tau_0) - com+= ' --precision=' + str(params.precision) - com+= ' --lr=' + str(params.lr) - com+= ' --model=' + params.model - - - elif (mode=='train'): - com = ' --train_size='+str(params.train_size) - com+= ' --valid_size='+str(params.valid_size) - com+= ' --test_size='+str(params.test_size) - com+= ' --model='+model - com+= ' --num_of_threads='+str(params.num_of_threads) - com+= ' --emb_rate=' + str(params.emb_rate) - com+= ' --cost_dir=' + str(params.cost_dir) - com+= ' --spatial=' + str(params.spatial) - - com+= ' --batch_size_classif='+str(params.training_dictionnary[model]['batch_size_classif']) - com+= ' --batch_size_eval='+str(params.training_dictionnary[model]['batch_size_eval']) - com+= ' --epoch_num='+str(params.training_dictionnary[model]['epoch_num']) - com+= ' --CL=' + str(params.training_dictionnary[model]['CL']) - com+= ' --start_emb_rate=' + str(params.training_dictionnary[model]['start_emb_rate']) - com+= ' --pair_training=' + str(params.training_dictionnary[model]['pair_training']) - - - return(com) - -def is_finished(params, iteration): - liste = os.popen("squeue --user=ulc18or").read() - liste = liste.split('\n')[1:] - for x in liste: - if (params.label + '_' in x): - return(False) - return(True) - -def wait(params,iteration_step): - while not is_finished(params, iteration_step): - sleep(60) +from generate_train_db import generate_train_db +from write_description import describe_exp_txt +from write_jobs import create_training_dictionnary, wait, write_command, run_job -def p_error(params, iteration_f, model): - train_size, valid_size, test_size = params.train_size, params.valid_size, params.test_size +def p_error(iteration_f, model, train_size, valid_size, test_size, data_dir_prot): if(iteration_f>0): - indexs = np.load(params.data_dir_prot + 'data_train_'+str(iteration_f)+'/index.npy') + indexs = np.load(data_dir_prot + 'data_train_'+str(iteration_f)+'/index.npy') else: indexs = np.zeros(train_size+valid_size+test_size, dtype=np.int8) - probas_stego_z = np.array([np.load(params.data_dir_prot+'data_adv_'+str(i)+'/eval_'+model+'_'+str(iteration_f)+'/probas.npy') \ + probas_stego_z = np.array([np.load(data_dir_prot+'data_adv_'+str(i)+'/eval_'+model+'_'+str(iteration_f)+'/probas.npy') \ for i in range(iteration_f+1)]) - predictions_cover = np.load(params.data_dir_prot+'cover/eval_'+model+'_'+str(iteration_f)+'/probas.npy') + predictions_cover = np.load(data_dir_prot+'cover/eval_'+model+'_'+str(iteration_f)+'/probas.npy') predictions_stego = np.array([probas_stego_z[ind,i] for i,ind in enumerate(indexs)]) + # Choose best threshold thresholds = np.linspace(0,1,51)[1:-1] PE_train, PE_valid, PE_test = [], [], [] - # Choose best threshold + for t in thresholds: pred_cover, pred_stego = predictions_cover[:train_size], predictions_stego[:train_size] fa = np.sum(pred_cover>t)/train_size @@ -109,7 +39,6 @@ def p_error(params, iteration_f, model): fa = np.sum(pred_cover>t)/valid_size md = np.sum(pred_stego<=t)/valid_size pe_valid = (fa+md)/2 - #np.save(params.dir_log+'pe_valid/'+str(index_model)+'.npy',pe_valid) pred_cover, pred_stego = predictions_cover[train_size+valid_size:], predictions_stego[train_size+valid_size:] fa = np.sum(pred_cover>t)/test_size md = np.sum(pred_stego<=t)/test_size @@ -118,242 +47,139 @@ def p_error(params, iteration_f, model): PE_valid.append(pe_valid) PE_test.append(pe_test) PE_valid = np.asarray(PE_valid) - mini = np.argmin(PE_valid) - dir_log = params.data_dir_prot+'train_'+model+'_'+str(iteration_f) + mini = np.argmin(PE_valid) # choose best thresholds on validation set + dir_log = data_dir_prot+'train_'+model+'_'+str(iteration_f) return(PE_train[mini], PE_valid[mini], PE_test[mini]) -def run_job(params, mode, command, iteration, gpu=True, batch_adv=None): # Index if generation of adversarial embedding - - name = params.label + '_' + str(iteration) +'_'+ mode - job_file = "./%s.job" % name - - with open(job_file, 'w+') as fh: - fh.writelines("#!/bin/bash\n") - fh.writelines("#SBATCH --nodes=1\n") - fh.writelines("#SBATCH --job-name=%s\n" % name) - - if(gpu): - fh.writelines("#SBATCH --account=%s\n" % 'srp@gpu') - fh.writelines("#SBATCH --gres=gpu:1\n") - fh.writelines("#SBATCH --ntasks=1\n") - #fh.writelines("#SBATCH --partition=gpu_p1\n") - fh.writelines("#SBATCH --hint=nomultithread\n") - #fh.writelines("#SBATCH --mem=16G\n") - fh.writelines("#SBATCH --time=10:00:00 \n") - - else: - fh.writelines("#SBATCH --account=%s\n" % 'kun@cpu') - #fh.writelines("#SBATCH --partition=cpu_p1\n") - fh.writelines("#SBATCH --time=2:00:00 \n") - - if(mode=='attack'): - fh.writelines("#SBATCH -C v100-32g\n") - fh.writelines("#SBATCH --array="+str(0)+'-'+str(params.n_images//batch_adv)+" \n") - fh.writelines("module purge\n") - fh.writelines("module load python/3.7.5\n") - fh.writelines("module load pytorch-gpu/py3/1.7.1\n") - - fh.writelines("python -u " + command) - - elif('train' in mode): - fh.writelines("#SBATCH -C v100-32g\n") - fh.writelines("#SBATCH --cpus-per-task="+str(params.num_of_threads)+"\n") - fh.writelines("module load pytorch-gpu/py3/1.7.1\n") - fh.writelines("time python -u " + command) - - else: - fh.writelines("module load pytorch-gpu/py3/1.7.1\n") - fh.writelines("time python -u " + command) - - os.system("sbatch %s" %job_file) def create_folder(path, list_dir): for d in list_dir: if not os.path.exists(path+d+'/'): os.makedirs(path+d+'/') -def run_iteration(params, iteration_step): - main_command = command_main(params, iteration_step, 'gen') +def run_iteration(iteration_step, label, data_dir_prot, data_dir_cover, data_dir_stego_0, cost_dir, \ + image_size, QF, folder_model, permutation_files, version_eff, stride, n_loops, \ + model, train_size, valid_size, test_size, attack, attack_last, emb_rate, \ + batch_adv, n_iter_max_backpack, N_samples, tau_0, precision, lr, \ + num_of_threads, training_dictionnary, spatial, strategy): + + n_images = train_size+valid_size+test_size + models = model.split(',') - t1 = time() + def custom_command(mode, iteration_step, my_model): + return(write_command(mode, iteration_step, my_model, data_dir_prot, data_dir_cover, data_dir_stego_0, cost_dir, \ + image_size, QF, folder_model, permutation_files, version_eff, stride, n_loops, \ + train_size, valid_size, test_size, attack, attack_last, emb_rate, \ + batch_adv, n_iter_max_backpack, N_samples, tau_0, precision, lr, \ + num_of_threads, training_dictionnary, spatial)) if(iteration_step>0): # GENERATE ADV DATA BASE OF THE BEST LAST CLASSIFIER - directory_adv = params.data_dir_prot+'data_adv_'+str(iteration_step)+'/' + directory_adv = data_dir_prot+'data_adv_'+str(iteration_step)+'/' create_folder(directory_adv,['adv_final', 'adv_cost',]) print('Generating adv step ' + str(iteration_step)) - num_batch = params.n_images // params.batch_adv - command = 'script_attack.py ' + main_command + command_main(params, iteration_step, 'attack') - run_job(params, 'attack', command, iteration_step, gpu=True, batch_adv = params.batch_adv) - wait(params,iteration_step) - print(time()-t1) + num_batch = n_images // batch_adv + command = 'script_attack.py ' + custom_command('attack',iteration_step, model) + run_job('attack', label, command, iteration_step, gpu=True, num_batch=num_batch) + wait(label) #EVALUATION OF ALL THE CLASSIFIERS ON THE NEW ADV DATA BASE for i in range(iteration_step): - for model in params.models: + for my_model in models: if(i)==-1: - directory = params.data_dir_prot+'cover/eval_'+model+'_'+str(i)+'/' + directory = data_dir_prot+'cover/eval_'+my_model+'_'+str(i)+'/' else: - directory = params.data_dir_prot+'data_adv_'+str(iteration_step)+'/eval_'+model+'_'+str(i)+'/' + directory = data_dir_prot+'data_adv_'+str(iteration_step)+'/eval_'+my_model+'_'+str(i)+'/' if not os.path.exists(directory): os.makedirs(directory) - print('Evaluation of classifier '+model+ ' ' + str(i)+' on database ' + str(iteration_step)) - command = 'script_evaluate_classif.py' + main_command + command_main(params, iteration_step, 'classif', model=model)\ + print('Evaluation of classifier '+my_model+ ' ' + str(i)+' on database ' + str(iteration_step)) + command = 'script_evaluate_classif.py' + custom_command('classif', iteration_step, my_model) \ + ' --iteration_f='+str(i)+' --iteration_adv='+str(iteration_step) - run_job(params, 'eval_'+str(model), command, iteration_step, gpu=True) + run_job('eval_'+str(my_model), label, command, iteration_step, gpu=True) - wait(params,iteration_step) - print(time()-t1) + wait(label) # GENERATION OF THE TRAIN DATA BASE - generate_train_db(params, iteration_step, params.strategy) + generate_train_db(iteration_step, strategy, models, permutation_files, data_dir_prot) # TRAINING NEW CLASSIFIER FOR EACH MODEL - for model in params.models: - print('Training '+model+' at iteration ' + str(iteration_step)) - create_folder(params.data_dir_prot,['train_'+model+'_'+str(iteration_step)]) - create_folder(params.data_dir_prot+'train_'+model+'_'+str(iteration_step)+'/', ['p_error']) - command = 'script_train.py' + main_command + command_main(params, iteration_step, 'train', model=model) - run_job(params, 'train_'+model, command, iteration_step, gpu=True) + for my_model in models: + print('Training '+my_model+' at iteration ' + str(iteration_step)) + create_folder(data_dir_prot,['train_'+my_model+'_'+str(iteration_step)]) + command = 'script_train.py' + command_main('train', iteration_step, my_model) + run_job('train_'+my_model, label, command, iteration_step, gpu=True) - wait(params,iteration_step) - print(time()-t1) + wait(label) # EVALUATE NEW CLASSIFIERS ON ALL STEGO DATA BASES AND ON COVER for i in range(-1, iteration_step+1): # -1 for cover - for model in params.models: + for my_model in models: if(i)==-1: - directory = params.data_dir_prot+'cover/' + directory = data_dir_prot+'cover/' else: - directory = params.data_dir_prot+'data_adv_'+str(i)+'/' - create_folder(directory, ['eval_'+model+'_'+str(iteration_step)]) - print('Evaluation of classifier '+model+ ' ' + str(iteration_step)+' on database ' + str(i)) - command = 'script_evaluate_classif.py' + main_command + \ - command_main(params, iteration_step, 'classif', model=model) + ' --iteration_f='+str(iteration_step)+' --iteration_adv='+str(i) - run_job(params, 'eval_'+str(model), command, iteration_step, gpu=True) + directory = data_dir_prot+'data_adv_'+str(i)+'/' + create_folder(directory, ['eval_'+my_model+'_'+str(iteration_step)]) + print('Evaluation of classifier '+my_model+ ' ' + str(iteration_step)+' on database ' + str(i)) + command = 'script_evaluate_classif.py' + command_main( 'classif', iteration_step, my_model) \ + + ' --iteration_f='+str(iteration_step)+' --iteration_adv='+str(i) + run_job('eval_'+str(model), label, command, iteration_step, gpu=True) - wait(params,iteration_step) - print(time()-t1) + wait(label) - for model in params.models: - print(model, p_error(params, iteration_step, model)) + for my_model in models: + print(my_model, p_error(iteration_f, my_model, train_size, valid_size, test_size, data_dir_prot)) return(True) -def update_exp_txt(params, lines_to_append): - file_name = params.data_dir_prot+"description.txt" - # Open the file in append & read mode ('a+') - with open(file_name, "a+") as file_object: - appendEOL = False - # Move read cursor to the start of file. - file_object.seek(0) - # Check if file is not empty - data = file_object.read(100) - if len(data) > 0: - appendEOL = True - # Iterate over each string in the list - for line in lines_to_append: - # If file is not empty then append '\n' before first line for - # other lines always append '\n' before appending line - if appendEOL == True: - file_object.write("\n") - else: - appendEOL = True - # Append element at the end of file - file_object.write(line) - -def describe_exp_txt(params): - tab = [] - tab.append(date.today().strftime("%b-%d-%Y")) - tab.append('Launch of the protocol, starting from iteration ' + str(params.begin_step) + ' to ' + str(params.begin_step+params.number_steps) + '\n') - tab.append('Number of CPUs called : ' + str(params.num_of_threads) + '\n \n') - - tab.append('PARAMETERS \n') - - tab.append('Image characteristics \n') - tab.append('- QF = ' + str(params.QF) + '\n') - tab.append('- Image size = ' + str(params.image_size) + '\n') - tab.append('- Embedding rate = ' + str(params.emb_rate) + ' bpnzAC\n') - tab.append('- Cover images are taken in folder ' + params.data_dir_cover + '\n') - tab.append('- Stego images are taken in folder ' + params.data_dir_stego_0 + '\n') - tab.append('- Cost maps are taken in folder ' + params.cost_dir + '\n \n') - - tab.append('Protocol setup \n') - tab.append('- Strategy =' + params.strategy +' \n \n') - - tab.append('Model description \n') - if(len(params.models)==1): - tab.append('- Model architecture is ' + params.model + ' with the following setup :\n') - if params.model=='efnet': - tab.append(' - Efficient-net version is ' +str(params.version_eff) +' pretrained on image-net \n') - tab.append(' - First conv stem is with stride = ' +str(params.stride) +' \n \n') - elif params.model=='xunet': - tab.append(' - XuNet architecture is composed with '+str(params.n_loops) +' big blocks \n \n') - else: - tab.append('- The '+str(len(params.models)) +' model architectures are ' + params.model + ' with the following setup :\n') - if 'efnet' in params.models: - tab.append(' - Efficient-net version is ' +str(params.version_eff) +' pretrained on image-net \n') - tab.append(' - First conv stem is with stride = ' +str(params.stride) +' \n \n') - if 'xunet' in params.models: - tab.append(' - XuNet architecture is composed with '+str(params.n_loops) +' big blocks \n \n') - - - tab.append('Training setup \n') - tab.append('- Train size = '+str(params.train_size) +'\n') - tab.append('- Valid size = '+str(params.valid_size) +'\n') - tab.append('- Test size = '+str(params.test_size) +'\n') - tab.append('- Files permutation, which order determines train, valid and test sets is '+ params.permutation_files +'\n') + +def run_protocol(begin_step, number_steps, label, data_dir_prot, data_dir_cover, data_dir_stego_0, cost_dir, \ + image_size, QF, folder_model, permutation_files, version_eff, stride, n_loops, \ + model, train_size, valid_size, test_size, attack, attack_last, emb_rate, \ + batch_adv, n_iter_max_backpack, N_samples, tau_0, precision, lr, strategy,\ + num_of_threads, spatial, batch_size_classif_xu, batch_size_eval_xu, epoch_num_xu, \ + CL_xu, start_emb_rate_xu, pair_training_xu, batch_size_classif_sr, batch_size_eval_sr, epoch_num_sr, \ + CL_sr, start_emb_rate_sr, pair_training_sr, batch_size_classif_ef, batch_size_eval_ef, epoch_num_ef, \ + CL_ef, start_emb_rate_ef, pair_training_ef, train_on_cost_map): + + training_dictionnary = create_training_dictionnary(model, batch_size_classif_xu, batch_size_eval_xu, epoch_num_xu, \ + CL_xu, start_emb_rate_xu, pair_training_xu, batch_size_classif_sr, batch_size_eval_sr, epoch_num_sr, \ + CL_sr, start_emb_rate_sr, pair_training_sr, batch_size_classif_ef, batch_size_eval_ef, epoch_num_ef, \ + CL_ef, start_emb_rate_ef, pair_training_ef, train_on_cost_map) - if(len(params.models)==1): - tab.append('- Model is trained during '+str(params.training_dictionnary[params.model]['epoch_num']) + ' epochs \n') - if(params.training_dictionnary[params.model]['pair_training']=='no'): - tab.append('- Pair training is not used \n') - tab.append('- Batch size is ' + str(params.training_dictionnary[params.model]['batch_size_classif']) + ' \n') - else: - tab.append('- Pair training is used \n') - tab.append('- Batch size is 2*' + str(params.training_dictionnary[params.model]['batch_size_classif']) + ' \n') - if(params.training_dictionnary[params.model]['CL']=='yes'): - tab.append('- Curriculum is used : the embedding rate starts from '+ str(params.training_dictionnary[params.model]['start_emb_rate']) +' and decreases every two epochs by factor 0.9 to reach target embedding rate '+str(params.emb_rate)+'\n \n') - else: - tab.append('- Curriculum is not used : the embedding rate = '+str(params.emb_rate)+' is constant during training \n \n') - else: - for model in params.models: - tab.append('- Model '+model+' is trained during '+str(params.training_dictionnary[model]['epoch_num']) + ' epochs \n') - if(params.training_dictionnary[model]['pair_training']=='no'): - tab.append('- Pair training is not used \n') - tab.append('- Batch size is ' + str(params.training_dictionnary[model]['batch_size_classif']) + ' \n') - else: - tab.append('- Pair training is used \n') - tab.append('- Batch size is 2*' + str(params.training_dictionnary[model]['batch_size_classif']) + ' \n') - if(params.training_dictionnary[model]['CL']=='yes'): - tab.append('- Curriculum is used : the embedding rate starts from '+ str(params.training_dictionnary[model]['start_emb_rate']) +' and decreases every two epochs by -0.1 to reach target embedding rate '+str(params.emb_rate)+'\n \n') - else: - tab.append('- Curriculum is not used : the embedding rate = '+str(params.emb_rate)+' is constant during training \n \n') + iteration_step=begin_step + describe_exp_txt(begin_step, number_steps, num_of_threads, \ + QF, image_size, emb_rate, data_dir_cover, data_dir_stego_0, cost_dir, \ + strategy, model, version_eff, stride, n_loops, \ + train_size, valid_size, test_size, permutation_files, training_dictionnary,\ + attack, n_iter_max_backpack, N_samples, tau_0, precision, data_dir_prot) - tab.append('Attack setup \n') - tab.append('- The smoothing function is ' +str(params.attack) +' \n') - tab.append('- Maximum number of steps is ' +str(params.n_iter_max_backpack) +' \n') - tab.append('- Number of samples is ' +str(params.N_samples) +' \n') - tab.append('- Tau is initialized with value ' +str(params.tau_0) +' and decreases by factor 0.5 when needed\n') - tab.append('- The exit condition is required to be respected with precision = '+str(params.precision)+'\n \n') + while iteration_step<begin_step+number_steps+1: + + run_iteration(iteration_step, label, data_dir_prot, data_dir_cover, data_dir_stego_0, cost_dir, \ + image_size, QF, folder_model, permutation_files, version_eff, stride, n_loops, \ + model, train_size, valid_size, test_size, attack, attack_last, emb_rate, \ + batch_adv, n_iter_max_backpack, N_samples, tau_0, precision, lr, \ + num_of_threads, training_dictionnary, spatial, strategy) + + iteration_step+=1 + + - file = open(params.data_dir_prot+"description.txt","w") - file.writelines(tab) - file.close() if __name__ == '__main__': argparser = argparse.ArgumentParser(sys.argv[0]) argparser.add_argument('--begin_step', type=int) argparser.add_argument('--number_steps', type=int, default=10) - argparser.add_argument('--folder_model', type=str, help='The path to the folder where the architecture of models are saved. ') + argparser.add_argument('--folder_model', type=str, help='The path to the folder where the architecture of models are saved') + argparser.add_argument('--permutation_files', type=str, help='The path to the permutation of indexes of images (to shuffle for train, valid and test set).') argparser.add_argument('--num_of_threads', type=int, default=10, help='number of cpus') # MODEL ARCHITECTURE @@ -395,72 +221,32 @@ if __name__ == '__main__': argparser.add_argument('--train_size', type=int, default=4000) argparser.add_argument('--valid_size', type=int, default=1000) argparser.add_argument('--test_size', type=int, default=5000) + argparser.add_argument('--train_on_cost_map', type=str, default='yes', help='yes or no. If yes stegos are created from the cost maps at during the training, elif no classifiers are trained directly with the stegos.') ## FOR TRAINING XU argparser.add_argument('--batch_size_classif_xu', type=int, default=16) argparser.add_argument('--batch_size_eval_xu', type=int, default=16) argparser.add_argument('--epoch_num_xu', type=int, default=30) - argparser.add_argument('--CL_xu',type=str, help='yes or no. If yes starts from params.emb_rate of start_emb_rate and decrease until reach params.emb_rate') + argparser.add_argument('--CL_xu',type=str, help='yes or no. If yes starts from emb_rate of start_emb_rate and decrease until reach emb_rate') argparser.add_argument('--start_emb_rate_xu',type=float, default=0.7,help='Is CL=yes, is the starting emb_rate') argparser.add_argument('--pair_training_xu',type=str, help='yes or no') ## FOR TRAINING SR argparser.add_argument('--batch_size_classif_sr', type=int, default=16) argparser.add_argument('--batch_size_eval_sr', type=int, default=16) argparser.add_argument('--epoch_num_sr', type=int, default=30) - argparser.add_argument('--CL_sr',type=str, help='yes or no. If yes starts from params.emb_rate of start_emb_rate and decrease until reach params.emb_rate') + argparser.add_argument('--CL_sr',type=str, help='yes or no. If yes starts from emb_rate of start_emb_rate and decrease until reach emb_rate') argparser.add_argument('--start_emb_rate_sr',type=float, default=0.7,help='Is CL=yes, is the starting emb_rate') argparser.add_argument('--pair_training_sr',type=str, help='yes or no') ## FOR TRAINING EF argparser.add_argument('--batch_size_classif_ef', type=int, default=16) argparser.add_argument('--batch_size_eval_ef', type=int, default=16) argparser.add_argument('--epoch_num_ef', type=int, default=30) - argparser.add_argument('--CL_ef',type=str, help='yes or no. If yes starts from params.emb_rate of start_emb_rate and decrease until reach params.emb_rate') + argparser.add_argument('--CL_ef',type=str, help='yes or no. If yes starts from emb_rate of start_emb_rate and decrease until reach emb_rate') argparser.add_argument('--start_emb_rate_ef',type=float, default=0.7,help='Is CL=yes, is the starting emb_rate') argparser.add_argument('--pair_training_ef',type=str, help='yes or no') params = argparser.parse_args() - params.n_images = params.train_size + params.valid_size + params.test_size - params.permutation_files = params.folder_model + 'permutation_files.npy' - - params.models = params.model.split(',') - params.training_dictionnary = {} - for model in params.models: - if(model=='xunet'): - params.training_dictionnary[model]={'batch_size_classif':params.batch_size_classif_xu, - 'batch_size_eval':params.batch_size_eval_xu, - 'epoch_num':params.epoch_num_xu, - 'CL':params.CL_xu, - 'start_emb_rate':params.start_emb_rate_xu, - 'pair_training':params.pair_training_xu} - elif(model=='srnet'): - params.training_dictionnary[model]={'batch_size_classif':params.batch_size_classif_sr, - 'batch_size_eval':params.batch_size_eval_sr, - 'epoch_num':params.epoch_num_sr, - 'CL':params.CL_sr, - 'start_emb_rate':params.start_emb_rate_sr, - 'pair_training':params.pair_training_sr} - elif(model=='efnet'): - params.training_dictionnary[model]={'batch_size_classif':params.batch_size_classif_ef, - 'batch_size_eval':params.batch_size_eval_ef, - 'epoch_num':params.epoch_num_ef, - 'CL':params.CL_ef, - 'start_emb_rate':params.start_emb_rate_ef, - 'pair_training':params.pair_training_ef} - - print(params.training_dictionnary) - - iteration_step=params.begin_step - - describe_exp_txt(params) - - while iteration_step<params.begin_step+params.number_steps+1: - - run_iteration(params, iteration_step) - iteration_step+=1 - #print(time()-t1) - - - + + run_protocol(**vars(params)) - diff --git a/models/efficientnet.py b/models/efficientnet.py index 0f8b41e0cde2921713d0840085afa45f1f3ea6fb..1f19803d866bb8b49db2ad6515d09dac9942e5a7 100644 --- a/models/efficientnet.py +++ b/models/efficientnet.py @@ -5,7 +5,7 @@ import torch import torch.nn as nn import torch.nn.functional as F -def get_net(version_eff, stride): +def get_net(version_eff, stride=1): n_channels_dict = {'efficientnet-b0': 1280, 'efficientnet-b1': 1280, 'efficientnet-b2': 1408, 'efficientnet-b3': 1536, 'efficientnet-b4': 1792, 'efficientnet-b5': 2048, @@ -32,10 +32,10 @@ def get_net(version_eff, stride): # CONFIG class TrainGlobalConfig(): - def __init__(self, params): - self.num_workers = params.num_of_threads - self.batch_size = params.batch_size_classif - self.n_epochs = params.epoch_num + def __init__(self, num_of_threads, batch_size_classif, epoch_num): + self.num_workers = num_of_threads + self.batch_size = batch_size_classif + self.n_epochs = epoch_num self.lr = 0.0005 self.verbose = True self.verbose_step = 1 diff --git a/models/srnet.py b/models/srnet.py index 2dd990b3ad6a9d1011ffecaac8635b3d998afafc..8a8fc6cc2e5cfb3dfab4318754846f242ab45f4a 100644 --- a/models/srnet.py +++ b/models/srnet.py @@ -8,7 +8,7 @@ class get_net(nn.Module): def __init__(self, image_size): super(get_net, self).__init__() - self.im_size = image_size + self.im_size = params.image_size def _conv2d(in_channels, out_channels, stride=1, kernel_size=3, padding=1): return nn.Conv2d(in_channels=in_channels,\ @@ -142,10 +142,10 @@ class get_net(nn.Module): # CONFIG class TrainGlobalConfig(): - def __init__(self, params): - self.num_workers = params.num_of_threads - self.batch_size = params.batch_size_classif - self.n_epochs = params.epoch_num + def __init__(self, num_of_threads, batch_size_classif, epoch_num): + self.num_workers = num_of_threads + self.batch_size = batch_size_classif + self.n_epochs = epoch_num self.lr = 0.001 self.verbose = True self.verbose_step = 1 diff --git a/models/xunet.py b/models/xunet.py index d43e47bae1357b074dcd829ec55f88a7393234ac..ffe1e5a0b6c8458fe1e850a4cbbc8d52603a5961 100644 --- a/models/xunet.py +++ b/models/xunet.py @@ -107,10 +107,10 @@ class get_net(nn.Module): # CONFIG class TrainGlobalConfig(): - def __init__(self, params): - self.num_workers = params.num_of_threads - self.batch_size = params.batch_size_classif - self.n_epochs = params.epoch_num + def __init__(self, num_of_threads, batch_size_classif, epoch_num): + self.num_workers = num_of_threads + self.batch_size = batch_size_classif + self.n_epochs = epoch_num self.lr = 0.001 self.verbose = True self.verbose_step = 1 diff --git a/script_attack.py b/script_attack.py index 6c2cfc57314b837c8f4278ad9eae38ccf3537a7d..aff834447fc7608b2fdb525d0de32c8b898dafab 100644 --- a/script_attack.py +++ b/script_attack.py @@ -21,21 +21,22 @@ import numpy as np from statsmodels.distributions.empirical_distribution import ECDF -def backpack_attack(params, nets, image_path, tau_0, ecdf_list): +def backpack_attack(data_dir_cover, cost_dir, image_size, QF, folder_model, emb_rate, \ + N_samples, nets, image_path, tau_0, ecdf_list, attack, lr, precision, n_iter_max_backpack): - c_coeffs = np.load(params.data_dir_cover + image_path) + c_coeffs = np.load(data_dir_cover + image_path) nz_AC = np.sum(c_coeffs!=0)-np.sum(c_coeffs[::8,::8]!=0) - rho = np.load(params.cost_dir+image_path) - if(rho.shape==(params.image_size, params.image_size)): - rho = np.reshape(rho,(1,params.image_size, params.image_size)) + rho = np.load(cost_dir+image_path) + if(rho.shape==(image_size, image_size)): + rho = np.reshape(rho,(1,image_size, image_size)) rho = np.concatenate((np.copy(rho), np.zeros_like(rho), np.copy(rho)),axis=0) rho[0,c_coeffs < -1023] = 10e30 rho[2,c_coeffs > 1023] = 10e30 - entropy = params.emb_rate*nz_AC + entropy = emb_rate*nz_AC tau = tau_0 - backpack = BackPack(c_coeffs, rho, entropy, params.N_samples, nets, params, ecdf_list) - optimizer = torch.optim.Adam([backpack.rho_vec], lr=params.lr) + backpack = BackPack(image_size, QF, folder_model, c_coeffs, rho, entropy, N_samples, nets, ecdf_list, attack) + optimizer = torch.optim.Adam([backpack.rho_vec], lr=lr) proba_cover = backpack.proba_cover best_probas_soft, mean_probas_soft, mean_probas_hard, stego_hard = backpack(tau) mean_p_soft = mean_probas_soft @@ -43,11 +44,10 @@ def backpack_attack(params, nets, image_path, tau_0, ecdf_list): i=0 print(proba_cover, mean_p_soft, mean_p_hard) - #tab = [] while True: - while((np.max(mean_p_soft-proba_cover)>params.precision) and (np.max(mean_p_hard-proba_cover)>params.precision) and (i<params.n_iter_max_backpack)): + while((np.max(mean_p_soft-proba_cover)>precision) and (np.max(mean_p_hard-proba_cover)>precision) and (i<n_iter_max_backpack)): optimizer.zero_grad() best_probas_soft, mean_probas_soft, mean_probas_hard, stego_hard = backpack(tau) @@ -62,22 +62,76 @@ def backpack_attack(params, nets, image_path, tau_0, ecdf_list): mean_p_hard = mean_probas_hard lr = optimizer.param_groups[0]['lr'] - print(np.round(mean_p_soft,2), np.round(mean_p_hard,2), np.round(tau,4), np.round(lr,4)) + print(i, np.round(mean_p_soft,2), np.round(mean_p_hard,2), np.round(tau,4), np.round(lr,4)) #tab.append([mean_p_soft, mean_p_hard, tau]) i+=1 if(torch.isnan(backpack.rho_vec).sum()>0): + print('Nan in cost map') return(None, None) - if((np.max(mean_p_hard-proba_cover)<=params.precision) or (i>=params.n_iter_max_backpack)): + if((np.max(mean_p_hard-proba_cover)<=precision) or (i>=n_iter_max_backpack)): return(backpack.rho_vec.cpu().detach().numpy(), stego_hard.cpu().detach().numpy()[0,0]) else: tau/=2 best_probas_soft, mean_probas_soft, mean_probas_hard, stego_hard = backpack(tau) mean_p_soft = mean_probas_soft mean_p_hard = mean_probas_hard + +def run_attack(iteration_step, folder_model, data_dir_prot, data_dir_cover, cost_dir, \ + image_size, QF, emb_rate, model, version_eff, stride, n_loops, attack_last, \ + permutation_files, attack, tau_0, lr, precision, n_iter_max_backpack, N_samples, idx_start, batch_adv): + + models = model.split(',') + attack_last = attack_last=='yes' + + nets = [] + + ecdf_list=[] # For calibration + if(attack_last): + n_classifier_min = max(iteration_step-3, 0) + else: + n_classifier_min = 0 + for i in range(n_classifier_min, iteration_step): + for model in models: + if(model=='efnet'): + net = get_net_ef(version_eff, stride).cuda() + elif(model=='xunet'): + net = get_net_xu(folder_model, n_loops, image_size).cuda() + elif(model=='srnet'): + net = get_net_sr(image_size).cuda() + + paths = os.listdir(data_dir_prot+'train_'+model+'_'+str(i)+'/') + paths = [int(x.split('-')[-1][:-9]) for x in paths if 'best-checkpoint' in x] + best_epoch = str(max(paths)) + path = data_dir_prot+'train_'+model+'_'+str(i)+'/best-checkpoint-'+'0'*(3-len(best_epoch))+best_epoch+'epoch.bin' + + checkpoint = torch.load(path) + net.load_state_dict(checkpoint['model_state_dict']) + net.eval() + for x in net.parameters(): + x.requires_grad = False + nets.append(net) + ecdf = ECDF(np.load(data_dir_prot+'cover/eval_'+model+'_'+str(i)+'/probas.npy')) + ecdf_list.append(ecdf) + + files = np.load(permutation_files) + files = files[idx_start*batch_adv:(idx_start+1)*batch_adv] + files = files[np.random.permutation(len(files))] + print(files) + + path_save = data_dir_prot + "data_adv_"+str(iteration_step) + "/" + + for image_path in files: + if not os.path.exists(path_save+'adv_cost/'+image_path[:-4]+'.npy'): + rho,stego = backpack_attack(data_dir_cover, cost_dir, image_size, QF, \ + folder_model, emb_rate, N_samples, nets, image_path[:-4]+'.npy', tau_0, ecdf_list, \ + attack, lr, precision, n_iter_max_backpack) + np.save(path_save+'adv_cost/'+image_path[:-4]+'.npy', rho) + np.save(path_save+'adv_final/'+image_path[:-4]+'.npy', stego) + if __name__ == '__main__': @@ -86,7 +140,6 @@ if __name__ == '__main__': argparser.add_argument('--data_dir_prot', type=str) argparser.add_argument('--data_dir_cover', type=str) - argparser.add_argument('--data_dir_stego_0', type=str) argparser.add_argument('--cost_dir',type=str) argparser.add_argument('--permutation_files', type=str) argparser.add_argument('--folder_model', type=str) @@ -115,52 +168,8 @@ if __name__ == '__main__': argparser.add_argument('--batch_adv',type=int) params = argparser.parse_args() - params.models = params.model.split(',') - params.attack_last = params.attack_last=='yes' - - nets = [] - ecdf_list=[] # For calibration - if(params.attack_last): - n_classifier_min = max(params.iteration_step-3, 0) - else: - n_classifier_min = 0 - for i in range(n_classifier_min, params.iteration_step): - for model in params.models: - if(model=='efnet'): - net = get_net_ef(params.version_eff, params.stride).cuda() - elif(model=='xunet'): - net = get_net_xu(params).cuda() - elif(model=='srnet'): - net = get_net_sr(params).cuda() - paths = os.listdir(params.data_dir_prot+'train_'+model+'_'+str(i)+'/') - paths = [int(x.split('-')[-1][:-9]) for x in paths if 'best-checkpoint' in x] - best_epoch = str(max(paths)) - path = params.data_dir_prot+'train_'+model+'_'+str(i)+'/best-checkpoint-'+'0'*(3-len(best_epoch))+best_epoch+'epoch.bin' - - checkpoint = torch.load(path) - net.load_state_dict(checkpoint['model_state_dict']) - net.eval() - for x in net.parameters(): - x.requires_grad = False - nets.append(net) - ecdf = ECDF(np.load(params.data_dir_prot+'cover/eval_'+model+'_'+str(i)+'/probas.npy')) - ecdf_list.append(ecdf) - - files = np.load(params.permutation_files) - files = files[np.random.permutation(np.arange(len(files)))] - files = files[params.idx_start*params.batch_adv:(params.idx_start+1)*params.batch_adv] - print(files) - - for image_path in files: - path_save = params.data_dir_prot + "data_adv_"+str(params.iteration_step) + "/" - if not os.path.exists(path_save+'adv_cost/'+image_path[:-4]+'.npy'): - rho,stego = backpack_attack(params, nets, image_path[:-4]+'.npy', params.tau_0, ecdf_list) - np.save(path_save+'adv_cost/'+image_path[:-4]+'.npy', rho) - np.save(path_save+'adv_final/'+image_path[:-4]+'.npy', stego) - - - + run_attack(**vars(params)) diff --git a/script_evaluate_classif.py b/script_evaluate_classif.py index 23cac4729358d5d07787303aa0ae92c190b08489..848dfe892a440dc8f6d5e622064b5364091903de 100644 --- a/script_evaluate_classif.py +++ b/script_evaluate_classif.py @@ -25,15 +25,15 @@ def softmax(array): class cover_stego_loader(object): - def __init__(self, iteration, mode, train_size, valid_size, test_size, \ + def __init__(self, iteration_step, mode, train_size, valid_size, test_size, \ batch_size_eval, QF, image_size, folder_model, \ - data_dir_prot, data_dir_cover, data_dir_stego_0): # mode = stego or cover + data_dir_prot, data_dir_cover, data_dir_stego_0, permutation_files): # mode = stego or cover n_images = train_size + valid_size + test_size - self.files = np.load(folder_model + 'permutation_files.npy')[:n_images] + self.files = np.load(permutation_files)[:n_images] self.train_counter = 0 self.train_data_size = len(self.files) self.train_num_batches = int(np.ceil(1.0 * self.train_data_size / batch_size_eval)) - self.iteration_step = iteration + self.iteration_step = iteration_step self.mode = mode self.c_quant = np.load(folder_model + 'c_quant_'+str(QF)+'.npy') self.image_size = image_size @@ -78,8 +78,8 @@ class cover_stego_loader(object): def evaluate_step_i(iteration_f, iteration_adv, model, data_dir_prot, train_size, valid_size, test_size, \ batch_size_eval, QF, image_size, folder_model, \ - data_dir_prot, data_dir_cover, data_dir_stego_0, \ - version_eff=None, stride=None, n_loops=None) # if iteration_adv == -1 : cover + data_dir_cover, data_dir_stego_0, permutation_files,\ + version_eff=None, stride=None, n_loops=None): # if iteration_adv == -1 : cover if(model=='efnet'): net = get_net_ef(version_eff, stride).cuda() @@ -108,7 +108,7 @@ def evaluate_step_i(iteration_f, iteration_adv, model, data_dir_prot, train_size dataloader = cover_stego_loader(iteration_adv, mode, train_size, valid_size, test_size, \ batch_size_eval, QF, image_size, folder_model, \ - data_dir_prot, data_dir_cover, data_dir_stego_0) + data_dir_prot, data_dir_cover, data_dir_stego_0, permutation_files) result_fi = np.empty((0,2)) dataloader.reset_counter() @@ -124,7 +124,6 @@ def evaluate_step_i(iteration_f, iteration_adv, model, data_dir_prot, train_size if __name__ == '__main__': argparser = argparse.ArgumentParser(sys.argv[0]) - argparser.add_argument('--iteration_step', type=int) argparser.add_argument('--data_dir_prot', type=str) argparser.add_argument('--folder_model', type=str) argparser.add_argument('--data_dir_cover', type=str) @@ -133,7 +132,6 @@ if __name__ == '__main__': argparser.add_argument('--iteration_f', type=int) argparser.add_argument('--iteration_adv', type=int) - argparser.add_argument('--stego_or_cover', type=str) argparser.add_argument('--train_size', type=int, help='Size of train set') argparser.add_argument('--valid_size', type=int, help='Size of valid set') @@ -141,7 +139,6 @@ if __name__ == '__main__': argparser.add_argument('--image_size', type=int) argparser.add_argument('--QF', type=int) - argparser.add_argument('--data_format', type=str, default='NHWC') argparser.add_argument('--batch_size_eval', type=int) # Model parameters @@ -156,14 +153,5 @@ if __name__ == '__main__': print('Evaluate model '+params.model+' at iteration ' +str(params.iteration_f)+ ' on the adv images generated at '+ str(params.iteration_adv)) - evaluate_step_i(**params) + evaluate_step_i(**vars(params)) - - - - - - - - - diff --git a/script_train.py b/script_train.py index 23c262dec1d2222ecdd66934be11a52996349477..87b7c88bc24a26a7afef5cae6c61fbd671c8abd7 100644 --- a/script_train.py +++ b/script_train.py @@ -33,9 +33,152 @@ def my_collate(batch, pair_training=False): else: return torch.utils.data.dataloader.default_collate(batch) -def train(iteration_step, folder_model, data_dir_prot, permutation_files, num_of_threads, \ - data_dir_cover, cost_dir, train_size, ) + +def run_training(iteration_step, model, net, trainGlobalConfig, data_dir_prot, start_emb_rate, emb_rate, pair_training, \ + version_eff, load_checkpoint, train_dataset, validation_dataset, test_dataset): + + device = torch.device('cuda:0') + + train_dataset.emb_rate = start_emb_rate + train_loader = torch.utils.data.DataLoader( + train_dataset, + batch_size=trainGlobalConfig.batch_size, + pin_memory=True, + drop_last=True, + shuffle=True, + num_workers=trainGlobalConfig.num_workers, + collate_fn=lambda x: my_collate(x,pair_training=pair_training) + ) + val_loader = torch.utils.data.DataLoader( + validation_dataset, + batch_size=trainGlobalConfig.batch_size, + num_workers=trainGlobalConfig.num_workers, + shuffle=False, + #sampler=SequentialSampler(validation_dataset), + pin_memory=True, + collate_fn=lambda x: my_collate(x,pair_training=pair_training) + ) + + test_loader = torch.utils.data.DataLoader( + test_dataset, + batch_size=trainGlobalConfig.batch_size, + num_workers=trainGlobalConfig.num_workers, + shuffle=False, + #sampler=SequentialSampler(test_dataset), + pin_memory=True, + collate_fn=lambda x: my_collate(x,pair_training=pair_training) + ) + + save_path = data_dir_prot + 'train_'+model+'_'+str(iteration_step) +'/' + #save_path = data_dir_prot + 'train_'+model+'_stego_'+str(iteration_step) +'/' + #save_path = data_dir_prot + 'train_'+model+'_'+str(iteration_step) +'_'+str(emb_rate)+'/' + if not os.path.exists(save_path): + os.makedirs(save_path) + + fitter = Fitter(model=net, device=device, config=trainGlobalConfig, save_path=save_path, model_str=model) + print(f'{fitter.base_dir}') + + if(model=='efnet'): + # Load the pretrained model + fitter.load(folder_model+version_eff+"-imagenet") + + if(load_checkpoint is not None): + save_path_load = save_path + #save_path_load = data_dir_prot + 'train_'+model+'_'+str(iteration_step) +'/' + if(load_checkpoint=='best'): + best_epoch = [int(x.split('-')[-1][:3]) \ + for x in os.listdir(save_path_load) \ + if 'best' in x] + best_epoch.sort() + best_epoch = str(best_epoch[-1]) + path = save_path_load + 'best-checkpoint-'+'0'*(3-len(best_epoch))+best_epoch+'epoch.bin' + fitter.load(path) + else: + fitter.load(save_path+load_checkpoint) + + fitter.fit(train_loader, val_loader, emb_rate) + + train_loader.dataset.emb_rate = emb_rate + + _, train = fitter.validation(train_loader) + print('Pe_err train :', train.avg) + _, val = fitter.validation(val_loader) + print('Pe_err val :', val.avg) + _, test = fitter.validation(test_loader) + print('Pe_err test :', test.avg) + + np.save(save_path+'error_rate.npy', np.array([train.avg, val.avg, test.avg])) + + return(train.avg, val.avg, test.avg) + + +def train(iteration_step, model, folder_model, data_dir_prot, permutation_files, num_of_threads, \ + data_dir_cover, data_dir_stego_0, cost_dir, train_size, valid_size, test_size, \ + image_size, batch_size_classif, batch_size_eval, epoch_num, QF, emb_rate, \ + spatial, train_on_cost_map, H1_filter, L1_filter, L2_filter, \ + version_eff, stride, n_loops, CL, start_emb_rate, pair_training, load_model): + pair_training = pair_training=='yes' + spatial = spatial=='yes' + train_on_cost_map = train_on_cost_map=='yes' + + dataset = load_dataset(iteration_step, permutation_files, train_size, valid_size, test_size, \ + data_dir_prot, pair_training=pair_training) + + + train_valid_test_names = [dataset[dataset['fold'] == i].image_name.values for i in range(3)] + train_valid_test_indexs_db = [dataset[dataset['fold'] == i].indexs_db.values for i in range(3)] + + if pair_training: + train_valid_test_labels = [None, None, None] + else: + train_valid_test_labels = [dataset[dataset['fold'] == i].labels.values for i in range(3)] + + train_valid_test_transforms = [get_train_transforms(), get_valid_transforms(), None] + + datasets = [DatasetRetriever(image_names, folder_model, QF, emb_rate, image_size, \ + data_dir_cover, data_dir_stego_0, cost_dir, data_dir_prot, \ + H1_filter, L1_filter, L2_filter, indexs_db, train_on_cost_map, \ + labels, transforms, pair_training, spatial) \ + for image_names, indexs_db, labels, transforms in \ + zip(train_valid_test_names, train_valid_test_indexs_db, \ + train_valid_test_labels, train_valid_test_transforms)] + + train_dataset, validation_dataset, test_dataset = datasets[0], datasets[1], datasets[2] + + if(model=='efnet'): + net = get_net_ef(version_eff, stride).cuda() + trainGlobalConfig = TrainGlobalConfig_ef(num_of_threads, batch_size_classif, epoch_num) + elif(model=='xunet'): + net = get_net_xu(folder_model, n_loops, image_size).cuda() + trainGlobalConfig = TrainGlobalConfig_xu(num_of_threads, batch_size_classif, epoch_num) + elif(model=='srnet'): + net = get_net_sr(image_size).cuda() + net.init() + trainGlobalConfig = TrainGlobalConfig_sr(num_of_threads, batch_size_classif, epoch_num) + + + + if(CL=='yes'): + start_emb_rate = start_emb_rate + else: + start_emb_rate = emb_rate + + + # Train first with cost map + run_training(iteration_step, model, net, trainGlobalConfig, data_dir_prot, start_emb_rate, \ + emb_rate, pair_training, version_eff, load_model, \ + train_dataset, validation_dataset, test_dataset) + + # Train then with the stegos from the best iteration obtained from the training with cost maps + # (set load_model to 'best') + train, val, test = run_training(iteration_step, model, net, trainGlobalConfig, data_dir_prot, start_emb_rate, \ + emb_rate, pair_training, version_eff, 'best', \ + train_dataset, validation_dataset, test_dataset) + + return(train, val, test) + + if __name__ == '__main__': @@ -48,6 +191,7 @@ if __name__ == '__main__': argparser.add_argument('--num_of_threads',type=int, help='Number of CPUs available') argparser.add_argument('--data_dir_cover', type=str, help='Where are the .npy files of cover') + argparser.add_argument('--data_dir_stego_0', type=str, help='Where are the inital stegos') argparser.add_argument('--cost_dir', type=str, help='Where are the .npy of costs') argparser.add_argument('--train_size', type=int, help='Size of train set') @@ -61,6 +205,8 @@ if __name__ == '__main__': argparser.add_argument('--QF', type=int, help='Quality factor, values accepted are 75, 95 and 100') argparser.add_argument('--emb_rate', type=float, help= 'Float between 0 and 1') argparser.add_argument('--spatial', type=str, default='no', help= 'yes if for spatial steganography, no for jpeg steganography ') + argparser.add_argument('--train_on_cost_map', type=str, default='yes', help='yes or no. If yes stegos are created from the cost maps at during the training, elif no classifiers are trained directly with the stegos.') + # FOR CUSTOM FILTERS FOR HILL argparser.add_argument('--H1_filter',type=str, default=None, help='Path to the saved H1 filter') argparser.add_argument('--L1_filter',type=str, default=None, help='Path to the saved L1 filter') @@ -75,165 +221,17 @@ if __name__ == '__main__': argparser.add_argument('--n_loops',type=int, default=5, help='Number of loops in th xunet architecture') # Training parameters - argparser.add_argument('--CL',type=str, help='yes or no. If yes starts from emb_rate of params.start_emb_rate and decrease until reach params.emb_rate') + argparser.add_argument('--CL',type=str, help='yes or no. If yes starts from emb_rate of start_emb_rate and decrease until reach emb_rate') argparser.add_argument('--start_emb_rate',type=float, default=0.7, help='Is CL=yes, is the starting emb_rate') argparser.add_argument('--pair_training',type=str, help='Yes or no') argparser.add_argument('--load_model',type=str, default=None, help='Path to the saved efficient model') - params = argparser.parse_args() - params.pair_training = params.pair_training=='yes' - params.spatial = params.spatial=='yes' - dataset = load_dataset(params, pair_training=params.pair_training) - - if params.pair_training: - - train_dataset = DatasetRetriever( - image_names=dataset[dataset['fold'] == 0].image_name.values, - params=params, - indexs_db=dataset[dataset['fold'] == 0].indexs_db.values, - transforms=get_train_transforms(), - pair_training=True, - spatial=params.spatial - ) - - validation_dataset = DatasetRetriever( - image_names=dataset[dataset['fold'] == 1].image_name.values, - params=params, - indexs_db=dataset[dataset['fold'] == 1].indexs_db.values, - transforms=get_valid_transforms(), - pair_training=True, - spatial=params.spatial - ) - - test_dataset = DatasetRetriever( - image_names=dataset[dataset['fold'] == 2].image_name.values, - params=params, - indexs_db=dataset[dataset['fold'] == 2].indexs_db.values, - transforms=None, - pair_training=True, - spatial=params.spatial - ) + params = argparser.parse_args() - else: - - train_dataset = DatasetRetriever( - image_names=dataset[dataset['fold'] == 0].image_name.values, - params=params, - indexs_db=dataset[dataset['fold'] == 0].indexs_db.values, - kinds=dataset[dataset['fold'] == 0].kind.values, - labels=dataset[dataset['fold'] == 0].label.values, - transforms=get_train_transforms(), - spatial=params.spatial - ) - - validation_dataset = DatasetRetriever( - - image_names=dataset[dataset['fold'] == 1].image_name.values, - params=params, - indexs_db=dataset[dataset['fold'] == 1].indexs_db.values, - kinds=dataset[dataset['fold'] == 1].kind.values, - labels=dataset[dataset['fold'] == 1].label.values, - transforms=get_valid_transforms(), - spatial=params.spatial - ) - - test_dataset = DatasetRetriever( - image_names=dataset[dataset['fold'] == 2].image_name.values, - params=params, - indexs_db=dataset[dataset['fold'] == 2].indexs_db.values, - kinds=dataset[dataset['fold'] == 2].kind.values, - labels=dataset[dataset['fold'] == 2].label.values, - transforms=None, - spatial=params.spatial - ) - - if(params.model=='efnet'): - net = get_net_ef(version_eff, stride).cuda() - trainGlobalConfig = TrainGlobalConfig_ef(params) - elif(params.model=='xunet'): - net = get_net_xu(folder_model, n_loops, image_size).cuda() - trainGlobalConfig = TrainGlobalConfig_xu(params) - elif(params.model=='srnet'): - net = get_net_sr(image_size).cuda() - net.init() - trainGlobalConfig = TrainGlobalConfig_sr(params) - - if(params.CL=='yes'): - start_emb_rate = params.start_emb_rate - else: - start_emb_rate = params.emb_rate - - def run_training(load_checkpoint=None): - - device = torch.device('cuda:0') - - train_dataset.emb_rate = start_emb_rate - train_loader = torch.utils.data.DataLoader( - train_dataset, - batch_size=trainGlobalConfig.batch_size, - pin_memory=True, - drop_last=True, - shuffle=True, - num_workers=trainGlobalConfig.num_workers, - collate_fn=lambda x: my_collate(x,pair_training=params.pair_training) - ) - val_loader = torch.utils.data.DataLoader( - validation_dataset, - batch_size=trainGlobalConfig.batch_size, - num_workers=trainGlobalConfig.num_workers, - shuffle=False, - pin_memory=True, - collate_fn=lambda x: my_collate(x,pair_training=params.pair_training) - ) - - test_loader = torch.utils.data.DataLoader( - test_dataset, - batch_size=trainGlobalConfig.batch_size, - num_workers=trainGlobalConfig.num_workers, - shuffle=False, - #sampler=SequentialSampler(test_dataset), - pin_memory=True, - collate_fn=lambda x: my_collate(x,pair_training=params.pair_training) - ) - - save_path = params.data_dir_prot + 'train_'+params.model+'_'+str(params.iteration_step) +'/' - fitter = Fitter(model=net, device=device, config=trainGlobalConfig, save_path=save_path, model_str=params.model) - print(f'{fitter.base_dir}') - - if(params.model=='efnet'): - fitter.load(params.folder_model+params.version_eff+"-imagenet") - - if(load_checkpoint is not None): - if(load_checkpoint=='best'): - best_epoch = [int(x.split('-')[-1][:3]) \ - for x in os.listdir(save_path) \ - if 'best' in x] - best_epoch.sort() - best_epoch = str(best_epoch[-1]) - path = save_path + 'best-checkpoint-'+'0'*(3-len(best_epoch))+best_epoch+'epoch.bin' - fitter.load(path) - else: - fitter.load(save_path+load_checkpoint) - - fitter.fit(train_loader, val_loader, params.emb_rate) - - train_loader.dataset.emb_rate = params.emb_rate - - _, train = fitter.validation(train_loader) - print('Pe_err train :', train.avg) - _, val = fitter.validation(val_loader) - print('Pe_err val :', val.avg) - _, test = fitter.validation(test_loader) - print('Pe_err test :', test.avg) - - np.save(save_path+'error_rate.npy', np.array([train.avg, val.avg, test.avg])) - - return(train.avg, val.avg, test.avg) - - - train, val, test = run_training(params.load_model) + train, val, test = train(**vars(params)) print(train, val, test) + diff --git a/write_description.py b/write_description.py new file mode 100644 index 0000000000000000000000000000000000000000..5fc17f1aeb2d4f4067d636c7a2e427f4c8a363d1 --- /dev/null +++ b/write_description.py @@ -0,0 +1,99 @@ +from datetime import date + +def describe_exp_txt(begin_step, number_steps, num_of_threads, \ + QF, image_size, emb_rate, data_dir_cover, data_dir_stego_0, cost_dir, \ + strategy, model, version_eff, stride, n_loops, \ + train_size, valid_size, test_size, permutation_files, training_dictionnary,\ + attack, n_iter_max_backpack, N_samples, tau_0, precision, data_dir_prot): + + n_images = train_size + valid_size + test_size + models = model.split(',') + + tab = [] + tab.append(date.today().strftime("%b-%d-%Y")) + tab.append('Launch of the protocol, starting from iteration ' + str(begin_step) + ' to ' + str(begin_step+number_steps) + '\n') + tab.append('Number of CPUs called : ' + str(num_of_threads) + '\n \n') + + tab.append('PARAMETERS \n') + + tab.append('Image characteristics \n') + tab.append('- QF = ' + str(QF) + '\n') + tab.append('- Image size = ' + str(image_size) + '\n') + tab.append('- Embedding rate = ' + str(emb_rate) + ' bpnzAC\n') + tab.append('- Cover images are taken in folder ' + data_dir_cover + '\n') + tab.append('- Stego images are taken in folder ' + data_dir_stego_0 + '\n') + tab.append('- Cost maps are taken in folder ' + cost_dir + '\n \n') + + tab.append('Protocol setup \n') + tab.append('- Strategy =' + strategy +' \n \n') + + tab.append('Model description \n') + if(len(models)==1): + tab.append('- Model architecture is ' + model + ' with the following setup :\n') + if model=='efnet': + tab.append(' - Efficient-net version is ' +str(version_eff) +' pretrained on image-net \n') + tab.append(' - First conv stem is with stride = ' +str(stride) +' \n \n') + elif model=='xunet': + tab.append(' - XuNet architecture is composed with '+str(n_loops) +' big blocks \n \n') + else: + tab.append('- The '+str(len(models)) +' model architectures are ' + model + ' with the following setup :\n') + if 'efnet' in models: + tab.append(' - Efficient-net version is ' +str(version_eff) +' pretrained on image-net \n') + tab.append(' - First conv stem is with stride = ' +str(stride) +' \n \n') + if 'xunet' in models: + tab.append(' - XuNet architecture is composed with '+str(n_loops) +' big blocks \n \n') + + + tab.append('Training setup \n') + tab.append('- Train size = '+str(train_size) +'\n') + tab.append('- Valid size = '+str(valid_size) +'\n') + tab.append('- Test size = '+str(test_size) +'\n') + tab.append('- Files permutation, which order determines train, valid and test sets is '+ permutation_files +'\n') + + for model in models: + tab.append('- Model '+model+' is trained during '+str(training_dictionnary[model]['epoch_num']) + ' epochs \n') + if(training_dictionnary[model]['pair_training']=='no'): + tab.append('- Pair training is not used \n') + tab.append('- Batch size is ' + str(training_dictionnary[model]['batch_size_classif']) + ' \n') + else: + tab.append('- Pair training is used \n') + tab.append('- Batch size is 2*' + str(training_dictionnary[model]['batch_size_classif']) + ' \n') + if(training_dictionnary[model]['CL']=='yes'): + tab.append('- Curriculum is used : the embedding rate starts from '+ str(training_dictionnary[model]['start_emb_rate']) +' and decreases every two epochs by factor 0.9 to reach target embedding rate '+str(emb_rate)+'\n \n') + else: + tab.append('- Curriculum is not used : the embedding rate = '+str(emb_rate)+' is constant during training \n \n') + + + tab.append('Attack setup \n') + tab.append('- The smoothing function is ' +str(attack) +' \n') + tab.append('- Maximum number of steps is ' +str(n_iter_max_backpack) +' \n') + tab.append('- Number of samples is ' +str(N_samples) +' \n') + tab.append('- Tau is initialized with value ' +str(tau_0) +' and decreases by factor 0.5 when needed\n') + tab.append('- The exit condition is required to be respected with precision = '+str(precision)+'\n \n') + + file = open(data_dir_prot+"description.txt","w") + file.writelines(tab) + file.close() + + +def update_exp_txt(params, lines_to_append): + file_name = params.data_dir_prot+"description.txt" + # Open the file in append & read mode ('a+') + with open(file_name, "a+") as file_object: + appendEOL = False + # Move read cursor to the start of file. + file_object.seek(0) + # Check if file is not empty + data = file_object.read(100) + if len(data) > 0: + appendEOL = True + # Iterate over each string in the list + for line in lines_to_append: + # If file is not empty then append '\n' before first line for + # other lines always append '\n' before appending line + if appendEOL == True: + file_object.write("\n") + else: + appendEOL = True + # Append element at the end of file + file_object.write(line) \ No newline at end of file diff --git a/write_jobs.py b/write_jobs.py new file mode 100644 index 0000000000000000000000000000000000000000..2b3b0dbeb384fd5bfe631b55afd672dd1b3bbf09 --- /dev/null +++ b/write_jobs.py @@ -0,0 +1,160 @@ +import os +from time import time, sleep + + +def is_finished(label): + liste = os.popen("squeue --user=ulc18or").read() + liste = liste.split('\n')[1:] + for x in liste: + if (label + '_' in x): + return(False) + return(True) + +def wait(label): + while not is_finished(label): + sleep(60) + + +def create_training_dictionnary(model, batch_size_classif_xu, batch_size_eval_xu, epoch_num_xu, \ + CL_xu, start_emb_rate_xu, pair_training_xu, batch_size_classif_sr, batch_size_eval_sr, epoch_num_sr, \ + CL_sr, start_emb_rate_sr, pair_training_sr, batch_size_classif_ef, batch_size_eval_ef, epoch_num_ef, \ + CL_ef, start_emb_rate_ef, pair_training_ef, train_on_cost_map): + + training_dictionnary = {} + models = model.split(',') + + for model in models: + if(model=='xunet'): + training_dictionnary[model]={'batch_size_classif':batch_size_classif_xu, + 'batch_size_eval':batch_size_eval_xu, + 'epoch_num':epoch_num_xu, + 'CL':CL_xu, + 'start_emb_rate':start_emb_rate_xu, + 'pair_training':pair_training_xu, + 'train_on_cost_map':train_on_cost_map} + elif(model=='srnet'): + training_dictionnary[model]={'batch_size_classif':batch_size_classif_sr, + 'batch_size_eval':batch_size_eval_sr, + 'epoch_num':epoch_num_sr, + 'CL':CL_sr, + 'start_emb_rate':start_emb_rate_sr, + 'pair_training':pair_training_sr, + 'train_on_cost_map':train_on_cost_map} + elif(model=='efnet'): + training_dictionnary[model]={'batch_size_classif':batch_size_classif_ef, + 'batch_size_eval':batch_size_eval_ef, + 'epoch_num':epoch_num_ef, + 'CL':CL_ef, + 'start_emb_rate':start_emb_rate_ef, + 'pair_training':pair_training_ef, + 'train_on_cost_map':train_on_cost_map} + + return(training_dictionnary) + + +def write_command(mode, iteration_step, model, data_dir_prot, data_dir_cover, data_dir_stego_0, cost_dir, \ + image_size, QF, folder_model, permutation_files, version_eff, stride, n_loops, \ + train_size, valid_size, test_size, attack, attack_last, emb_rate, \ + batch_adv, n_iter_max_backpack, N_samples, tau_0, precision, lr, \ + num_of_threads, training_dictionnary, spatial): + + com = ' --data_dir_prot='+data_dir_prot + com+= ' --data_dir_cover='+data_dir_cover + com+= ' --image_size='+str(image_size) + com+= ' --QF=' + str(QF) + com+= ' --folder_model='+folder_model + com+= ' --permutation_files=' + permutation_files + com+= ' --version_eff=' + version_eff + com+= ' --stride=' + str(stride) + com+= ' --n_loops=' + str(n_loops) + + if(mode=='classif'): + com+= ' --batch_size_eval='+str(training_dictionnary[model]['batch_size_eval']) + com+= ' --data_dir_stego_0='+data_dir_stego_0 + com+= ' --train_size='+str(train_size) + com+= ' --valid_size='+str(valid_size) + com+= ' --test_size='+str(test_size) + com+= ' --model='+model + + elif (mode=='attack'): + com+= ' --iteration_step='+str(iteration_step) + com+= ' --attack=' + str(attack) + com+= ' --attack_last=' + str(attack_last) + com+= ' --emb_rate=' + str(emb_rate) + com+= ' --cost_dir=' + str(cost_dir) + com+= ' --idx_start=$SLURM_ARRAY_TASK_ID' + com+= ' --batch_adv=' + str(batch_adv) + com+= ' --n_iter_max_backpack=' + str(n_iter_max_backpack) + com+= ' --N_samples=' + str(N_samples) + com+= ' --tau_0=' + str(tau_0) + com+= ' --precision=' + str(precision) + com+= ' --lr=' + str(lr) + com+= ' --model=' + model + + elif (mode=='train'): + com+= ' --iteration_step='+str(iteration_step) + com+= ' --train_size='+str(train_size) + com+= ' --valid_size='+str(valid_size) + com+= ' --test_size='+str(test_size) + com+= ' --model='+model + com+= ' --num_of_threads='+str(num_of_threads) + com+= ' --emb_rate=' + str(emb_rate) + com+= ' --cost_dir=' + str(cost_dir) + com+= ' --data_dir_stego_0=' + str(data_dir_stego_0) + com+= ' --spatial=' + str(spatial) + + com+= ' --batch_size_classif='+str(training_dictionnary[model]['batch_size_classif']) + com+= ' --batch_size_eval='+str(training_dictionnary[model]['batch_size_eval']) + com+= ' --epoch_num='+str(training_dictionnary[model]['epoch_num']) + com+= ' --CL=' + str(training_dictionnary[model]['CL']) + com+= ' --start_emb_rate=' + str(training_dictionnary[model]['start_emb_rate']) + com+= ' --pair_training=' + str(training_dictionnary[model]['pair_training']) + com+= ' --train_on_cost_map=' + str(training_dictionnary[model]['train_on_cost_map']) + + + return(com) + + + +def run_job(mode, label, command, iteration, \ + num_batch=None, num_of_threads = None, gpu=True): + + name = label + '_' + str(iteration) +'_'+ mode + job_file = "./%s.job" % name + + with open(job_file, 'w+') as fh: + fh.writelines("#!/bin/bash\n") + fh.writelines("#SBATCH --nodes=1\n") + fh.writelines("#SBATCH --job-name=%s\n" % name) + + if(gpu): + fh.writelines("#SBATCH --account=%s\n" % 'srp@gpu') + fh.writelines("#SBATCH --gres=gpu:1\n") + fh.writelines("#SBATCH --ntasks=1\n") + fh.writelines("#SBATCH --hint=nomultithread\n") + fh.writelines("#SBATCH --time=10:00:00 \n") + + else: + fh.writelines("#SBATCH --account=%s\n" % 'kun@cpu') + fh.writelines("#SBATCH --time=2:00:00 \n") + + if(mode=='attack'): + fh.writelines("#SBATCH -C v100-32g\n") + fh.writelines("#SBATCH --array="+str(0)+'-'+str(num_batch)+" \n") + fh.writelines("module purge\n") + fh.writelines("module load python/3.7.5\n") + fh.writelines("module load pytorch-gpu/py3/1.7.1\n") + + fh.writelines("python -u " + command) + + elif('train' in mode): + fh.writelines("#SBATCH -C v100-32g\n") + fh.writelines("#SBATCH --cpus-per-task="+str(num_of_threads)+"\n") + fh.writelines("module load pytorch-gpu/py3/1.7.1\n") + fh.writelines("time python -u " + command) + + else: + fh.writelines("module load pytorch-gpu/py3/1.7.1\n") + fh.writelines("time python -u " + command) + + os.system("sbatch %s" %job_file)