diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..9fcb829077bb9730ba09f7b9f1df5f3dd0252015 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +imgs/example/a_dcp.png filter=lfs diff=lfs merge=lfs -text +imgs/example/b_dcp.png filter=lfs diff=lfs merge=lfs -text +imgs/hand.gif filter=lfs diff=lfs merge=lfs -text diff --git a/cores/__init__.py b/cores/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..59321c09934652a995b18883b7d0406e66be1a5e --- /dev/null +++ b/cores/__init__.py @@ -0,0 +1 @@ +from .options import * \ No newline at end of file diff --git a/cores/add.py b/cores/add.py new file mode 100644 index 0000000000000000000000000000000000000000..edf8027c573ee9ddd47486d80a371b4d01a94045 --- /dev/null +++ b/cores/add.py @@ -0,0 +1,130 @@ +import os +from queue import Queue +from threading import Thread +import time +import numpy as np +import cv2 +from models import runmodel +from util import mosaic,util,ffmpeg,filt +from util import image_processing as impro +from .init import video_init + + +''' +---------------------Add Mosaic--------------------- +''' +def addmosaic_img(opt,netS): + path = opt.media_path + print('Add Mosaic:',path) + img = impro.imread(path) + mask = runmodel.get_ROI_position(img,netS,opt)[0] + img = mosaic.addmosaic(img,mask,opt) + impro.imwrite(os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_add.jpg'),img) + +def get_roi_positions(opt,netS,imagepaths,savemask=True): + # resume + continue_flag = False + if os.path.isfile(os.path.join(opt.temp_dir,'step.json')): + step = util.loadjson(os.path.join(opt.temp_dir,'step.json')) + resume_frame = int(step['frame']) + if int(step['step'])>2: + mask_index = np.load(os.path.join(opt.temp_dir,'mask_index.npy')) + return mask_index + if int(step['step'])>=2 and resume_frame>0: + pre_positions = np.load(os.path.join(opt.temp_dir,'roi_positions.npy')) + continue_flag = True + imagepaths = imagepaths[resume_frame:] + + positions = [] + t1 = time.time() + if not opt.no_preview: + cv2.namedWindow('mask', cv2.WINDOW_NORMAL) + print('Step:2/4 -- Find mosaic location') + + img_read_pool = Queue(4) + def loader(imagepaths): + for imagepath in imagepaths: + img_origin = impro.imread(os.path.join(opt.temp_dir+'/video2image',imagepath)) + img_read_pool.put(img_origin) + t = Thread(target=loader,args=(imagepaths,)) + t.daemon = True + t.start() + + for i,imagepath in enumerate(imagepaths,1): + img_origin = img_read_pool.get() + mask,x,y,size,area = runmodel.get_ROI_position(img_origin,netS,opt) + positions.append([x,y,area]) + if savemask: + t = Thread(target=cv2.imwrite,args=(os.path.join(opt.temp_dir+'/ROI_mask',imagepath), mask,)) + t.start() + if i%1000==0: + save_positions = np.array(positions) + if continue_flag: + save_positions = np.concatenate((pre_positions,save_positions),axis=0) + np.save(os.path.join(opt.temp_dir,'roi_positions.npy'),save_positions) + step = {'step':2,'frame':i+resume_frame} + util.savejson(os.path.join(opt.temp_dir,'step.json'),step) + + #preview result and print + if not opt.no_preview: + cv2.imshow('mask',mask) + cv2.waitKey(1) & 0xFF + t2 = time.time() + print('\r',str(i)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),util.counttime(t1,t2,i,len(imagepaths)),end='') + + if not opt.no_preview: + cv2.destroyAllWindows() + + print('\nOptimize ROI locations...') + if continue_flag: + positions = np.concatenate((pre_positions,positions),axis=0) + mask_index = filt.position_medfilt(np.array(positions), 7) + step = {'step':3,'frame':0} + util.savejson(os.path.join(opt.temp_dir,'step.json'),step) + np.save(os.path.join(opt.temp_dir,'roi_positions.npy'),positions) + np.save(os.path.join(opt.temp_dir,'mask_index.npy'),np.array(mask_index)) + + return mask_index + +def addmosaic_video(opt,netS): + path = opt.media_path + fps,imagepaths = video_init(opt,path)[:2] + length = len(imagepaths) + start_frame = int(imagepaths[0][7:13]) + mask_index = get_roi_positions(opt,netS,imagepaths)[(start_frame-1):] + + t1 = time.time() + if not opt.no_preview: + cv2.namedWindow('preview', cv2.WINDOW_NORMAL) + + # add mosaic + print('Step:3/4 -- Add Mosaic:') + t1 = time.time() + # print(mask_index) + for i,imagepath in enumerate(imagepaths,1): + mask = impro.imread(os.path.join(opt.temp_dir+'/ROI_mask',imagepaths[np.clip(mask_index[i-1]-start_frame,0,1000000)]),'gray') + img = impro.imread(os.path.join(opt.temp_dir+'/video2image',imagepath)) + if impro.mask_area(mask)>100: + try:#Avoid unknown errors + img = mosaic.addmosaic(img, mask, opt) + except Exception as e: + print('Warning:',e) + t = Thread(target=cv2.imwrite,args=(os.path.join(opt.temp_dir+'/addmosaic_image',imagepath),img)) + t.start() + os.remove(os.path.join(opt.temp_dir+'/video2image',imagepath)) + + #preview result and print + if not opt.no_preview: + cv2.imshow('preview',img) + cv2.waitKey(1) & 0xFF + t2 = time.time() + print('\r',str(i)+'/'+str(length),util.get_bar(100*i/length,num=35),util.counttime(t1,t2,i,length),end='') + + print() + if not opt.no_preview: + cv2.destroyAllWindows() + print('Step:4/4 -- Convert images to video') + ffmpeg.image2video( fps, + opt.temp_dir+'/addmosaic_image/output_%06d.'+opt.tempimage_type, + opt.temp_dir+'/voice_tmp.mp3', + os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_add.mp4')) \ No newline at end of file diff --git a/cores/clean.py b/cores/clean.py new file mode 100644 index 0000000000000000000000000000000000000000..285542b79179f3a2cb0c47931d6133ebfa57375d --- /dev/null +++ b/cores/clean.py @@ -0,0 +1,249 @@ +import os +import time +import numpy as np +import cv2 +import torch +from models import runmodel +from util import data,util,ffmpeg,filt +from util import image_processing as impro +from .init import video_init +from multiprocessing import Queue, Process +from threading import Thread + +''' +---------------------Clean Mosaic--------------------- +''' +def get_mosaic_positions(opt,netM,imagepaths,savemask=True): + # resume + continue_flag = False + if os.path.isfile(os.path.join(opt.temp_dir,'step.json')): + step = util.loadjson(os.path.join(opt.temp_dir,'step.json')) + resume_frame = int(step['frame']) + if int(step['step'])>2: + pre_positions = np.load(os.path.join(opt.temp_dir,'mosaic_positions.npy')) + return pre_positions + if int(step['step'])>=2 and resume_frame>0: + pre_positions = np.load(os.path.join(opt.temp_dir,'mosaic_positions.npy')) + continue_flag = True + imagepaths = imagepaths[resume_frame:] + + positions = [] + t1 = time.time() + if not opt.no_preview: + cv2.namedWindow('mosaic mask', cv2.WINDOW_NORMAL) + print('Step:2/4 -- Find mosaic location') + + img_read_pool = Queue(4) + def loader(imagepaths): + for imagepath in imagepaths: + img_origin = impro.imread(os.path.join(opt.temp_dir+'/video2image',imagepath)) + img_read_pool.put(img_origin) + t = Thread(target=loader,args=(imagepaths,)) + t.setDaemon(True) + t.start() + + for i,imagepath in enumerate(imagepaths,1): + img_origin = img_read_pool.get() + x,y,size,mask = runmodel.get_mosaic_position(img_origin,netM,opt) + positions.append([x,y,size]) + if savemask: + t = Thread(target=cv2.imwrite,args=(os.path.join(opt.temp_dir+'/mosaic_mask',imagepath), mask,)) + t.start() + if i%1000==0: + save_positions = np.array(positions) + if continue_flag: + save_positions = np.concatenate((pre_positions,save_positions),axis=0) + np.save(os.path.join(opt.temp_dir,'mosaic_positions.npy'),save_positions) + step = {'step':2,'frame':i+resume_frame} + util.savejson(os.path.join(opt.temp_dir,'step.json'),step) + + #preview result and print + if not opt.no_preview: + cv2.imshow('mosaic mask',mask) + cv2.waitKey(1) & 0xFF + t2 = time.time() + print('\r',str(i)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),util.counttime(t1,t2,i,len(imagepaths)),end='') + + if not opt.no_preview: + cv2.destroyAllWindows() + print('\nOptimize mosaic locations...') + positions =np.array(positions) + if continue_flag: + positions = np.concatenate((pre_positions,positions),axis=0) + for i in range(3):positions[:,i] = filt.medfilt(positions[:,i],opt.medfilt_num) + step = {'step':3,'frame':0} + util.savejson(os.path.join(opt.temp_dir,'step.json'),step) + np.save(os.path.join(opt.temp_dir,'mosaic_positions.npy'),positions) + + return positions + +def cleanmosaic_img(opt,netG,netM): + + path = opt.media_path + print('Clean Mosaic:',path) + img_origin = impro.imread(path) + x,y,size,mask = runmodel.get_mosaic_position(img_origin,netM,opt) + #cv2.imwrite('./mask/'+os.path.basename(path), mask) + img_result = img_origin.copy() + if size > 100 : + img_mosaic = img_origin[y-size:y+size,x-size:x+size] + if opt.traditional: + img_fake = runmodel.traditional_cleaner(img_mosaic,opt) + else: + img_fake = runmodel.run_pix2pix(img_mosaic,netG,opt) + img_result = impro.replace_mosaic(img_origin,img_fake,mask,x,y,size,opt.no_feather) + else: + print('Do not find mosaic') + impro.imwrite(os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_clean.jpg'),img_result) + +def cleanmosaic_img_server(opt,img_origin,netG,netM): + x,y,size,mask = runmodel.get_mosaic_position(img_origin,netM,opt) + img_result = img_origin.copy() + if size > 100 : + img_mosaic = img_origin[y-size:y+size,x-size:x+size] + if opt.traditional: + img_fake = runmodel.traditional_cleaner(img_mosaic,opt) + else: + img_fake = runmodel.run_pix2pix(img_mosaic,netG,opt) + img_result = impro.replace_mosaic(img_origin,img_fake,mask,x,y,size,opt.no_feather) + return img_result + +def cleanmosaic_video_byframe(opt,netG,netM): + path = opt.media_path + fps,imagepaths,height,width = video_init(opt,path) + start_frame = int(imagepaths[0][7:13]) + positions = get_mosaic_positions(opt,netM,imagepaths,savemask=True)[(start_frame-1):] + + t1 = time.time() + if not opt.no_preview: + cv2.namedWindow('clean', cv2.WINDOW_NORMAL) + + # clean mosaic + print('Step:3/4 -- Clean Mosaic:') + length = len(imagepaths) + for i,imagepath in enumerate(imagepaths,0): + x,y,size = positions[i][0],positions[i][1],positions[i][2] + img_origin = impro.imread(os.path.join(opt.temp_dir+'/video2image',imagepath)) + img_result = img_origin.copy() + if size > 100: + try:#Avoid unknown errors + img_mosaic = img_origin[y-size:y+size,x-size:x+size] + if opt.traditional: + img_fake = runmodel.traditional_cleaner(img_mosaic,opt) + else: + img_fake = runmodel.run_pix2pix(img_mosaic,netG,opt) + mask = cv2.imread(os.path.join(opt.temp_dir+'/mosaic_mask',imagepath),0) + img_result = impro.replace_mosaic(img_origin,img_fake,mask,x,y,size,opt.no_feather) + except Exception as e: + print('Warning:',e) + t = Thread(target=cv2.imwrite,args=(os.path.join(opt.temp_dir+'/replace_mosaic',imagepath), img_result,)) + t.start() + os.remove(os.path.join(opt.temp_dir+'/video2image',imagepath)) + + #preview result and print + if not opt.no_preview: + cv2.imshow('clean',img_result) + cv2.waitKey(1) & 0xFF + t2 = time.time() + print('\r',str(i+1)+'/'+str(length),util.get_bar(100*i/length,num=35),util.counttime(t1,t2,i+1,len(imagepaths)),end='') + print() + if not opt.no_preview: + cv2.destroyAllWindows() + print('Step:4/4 -- Convert images to video') + ffmpeg.image2video( fps, + opt.temp_dir+'/replace_mosaic/output_%06d.'+opt.tempimage_type, + opt.temp_dir+'/voice_tmp.mp3', + os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_clean.mp4')) + +def cleanmosaic_video_fusion(opt,netG,netM): + path = opt.media_path + N,T,S = 2,5,3 + LEFT_FRAME = (N*S) + POOL_NUM = LEFT_FRAME*2+1 + INPUT_SIZE = 256 + FRAME_POS = np.linspace(0, (T-1)*S,T,dtype=np.int64) + img_pool = [] + previous_frame = None + init_flag = True + + fps,imagepaths,height,width = video_init(opt,path) + start_frame = int(imagepaths[0][7:13]) + positions = get_mosaic_positions(opt,netM,imagepaths,savemask=True)[(start_frame-1):] + t1 = time.time() + if not opt.no_preview: + cv2.namedWindow('clean', cv2.WINDOW_NORMAL) + + # clean mosaic + print('Step:3/4 -- Clean Mosaic:') + length = len(imagepaths) + write_pool = Queue(4) + show_pool = Queue(4) + def write_result(): + while True: + save_ori,imagepath,img_origin,img_fake,x,y,size = write_pool.get() + if save_ori: + img_result = img_origin + else: + mask = cv2.imread(os.path.join(opt.temp_dir+'/mosaic_mask',imagepath),0) + img_result = impro.replace_mosaic(img_origin,img_fake,mask,x,y,size,opt.no_feather) + if not opt.no_preview: + show_pool.put(img_result.copy()) + cv2.imwrite(os.path.join(opt.temp_dir+'/replace_mosaic',imagepath),img_result) + os.remove(os.path.join(opt.temp_dir+'/video2image',imagepath)) + t = Thread(target=write_result,args=()) + t.setDaemon(True) + t.start() + + for i,imagepath in enumerate(imagepaths,0): + x,y,size = positions[i][0],positions[i][1],positions[i][2] + input_stream = [] + # image read stream + if i==0 :# init + for j in range(POOL_NUM): + img_pool.append(impro.imread(os.path.join(opt.temp_dir+'/video2image',imagepaths[np.clip(i+j-LEFT_FRAME,0,len(imagepaths)-1)]))) + else: # load next frame + img_pool.pop(0) + img_pool.append(impro.imread(os.path.join(opt.temp_dir+'/video2image',imagepaths[np.clip(i+LEFT_FRAME,0,len(imagepaths)-1)]))) + img_origin = img_pool[LEFT_FRAME] + + # preview result and print + if not opt.no_preview: + if show_pool.qsize()>3: + cv2.imshow('clean',show_pool.get()) + cv2.waitKey(1) & 0xFF + + if size>50: + try:#Avoid unknown errors + for pos in FRAME_POS: + input_stream.append(impro.resize(img_pool[pos][y-size:y+size,x-size:x+size], INPUT_SIZE,interpolation=cv2.INTER_CUBIC)[:,:,::-1]) + if init_flag: + init_flag = False + previous_frame = input_stream[N] + previous_frame = data.im2tensor(previous_frame,bgr2rgb=True,gpu_id=opt.gpu_id) + + input_stream = np.array(input_stream).reshape(1,T,INPUT_SIZE,INPUT_SIZE,3).transpose((0,4,1,2,3)) + input_stream = data.to_tensor(data.normalize(input_stream),gpu_id=opt.gpu_id) + with torch.no_grad(): + unmosaic_pred = netG(input_stream,previous_frame) + img_fake = data.tensor2im(unmosaic_pred,rgb2bgr = True) + previous_frame = unmosaic_pred + write_pool.put([False,imagepath,img_origin.copy(),img_fake.copy(),x,y,size]) + except Exception as e: + init_flag = True + print('Error:',e) + else: + write_pool.put([True,imagepath,img_origin.copy(),-1,-1,-1,-1]) + init_flag = True + + t2 = time.time() + print('\r',str(i+1)+'/'+str(length),util.get_bar(100*i/length,num=35),util.counttime(t1,t2,i+1,len(imagepaths)),end='') + print() + write_pool.close() + show_pool.close() + if not opt.no_preview: + cv2.destroyAllWindows() + print('Step:4/4 -- Convert images to video') + ffmpeg.image2video( fps, + opt.temp_dir+'/replace_mosaic/output_%06d.'+opt.tempimage_type, + opt.temp_dir+'/voice_tmp.mp3', + os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_clean.mp4')) \ No newline at end of file diff --git a/cores/init.py b/cores/init.py new file mode 100644 index 0000000000000000000000000000000000000000..5993c584011ce876b4cd80795515acc294923cfb --- /dev/null +++ b/cores/init.py @@ -0,0 +1,31 @@ +import os +from util import util,ffmpeg + +''' +---------------------Video Init--------------------- +''' +def video_init(opt,path): + fps,endtime,height,width = ffmpeg.get_video_infos(path) + if opt.fps !=0: + fps = opt.fps + + # resume + if os.path.isfile(os.path.join(opt.temp_dir,'step.json')): + step = util.loadjson(os.path.join(opt.temp_dir,'step.json')) + if int(step['step'])>=1: + choose = input('There is an unfinished video. Continue it? [y/n] ') + if choose.lower() =='yes' or choose.lower() == 'y': + imagepaths = os.listdir(opt.temp_dir+'/video2image') + imagepaths.sort() + return fps,imagepaths,height,width + + print('Step:1/4 -- Convert video to images') + util.file_init(opt) + ffmpeg.video2voice(path,opt.temp_dir+'/voice_tmp.mp3',opt.start_time,opt.last_time) + ffmpeg.video2image(path,opt.temp_dir+'/video2image/output_%06d.'+opt.tempimage_type,fps,opt.start_time,opt.last_time) + imagepaths = os.listdir(opt.temp_dir+'/video2image') + imagepaths.sort() + step = {'step':2,'frame':0} + util.savejson(os.path.join(opt.temp_dir,'step.json'),step) + + return fps,imagepaths,height,width \ No newline at end of file diff --git a/cores/options.py b/cores/options.py new file mode 100644 index 0000000000000000000000000000000000000000..9574a9bee8231bae827d274e970c0ac74dbd04a8 --- /dev/null +++ b/cores/options.py @@ -0,0 +1,130 @@ +import argparse +import os +import sys + + +class Options(): + def __init__(self): + self.parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + self.initialized = False + + def initialize(self): + + #base + self.parser.add_argument('--debug', action='store_true', help='if specified, start debug mode') + self.parser.add_argument('--gpu_id', type=str,default='0', help='if -1, use cpu') + self.parser.add_argument('--media_path', type=str, default='./imgs/ruoruo.jpg',help='your videos or images path') + self.parser.add_argument('-ss', '--start_time', type=str, default='00:00:00',help='start position of video, default is the beginning of video') + self.parser.add_argument('-t', '--last_time', type=str, default='00:00:00',help='duration of the video, default is the entire video') + self.parser.add_argument('--mode', type=str, default='auto',help='Program running mode. auto | add | clean | style') + self.parser.add_argument('--model_path', type=str, default='./pretrained_models/mosaic/add_face.pth',help='pretrained model path') + self.parser.add_argument('--result_dir', type=str, default='./result',help='output media will be saved here') + self.parser.add_argument('--temp_dir', type=str, default='./tmp', help='Temporary files will go here') + self.parser.add_argument('--tempimage_type', type=str, default='jpg',help='type of temp image, png | jpg, png is better but occupy more storage space') + self.parser.add_argument('--netG', type=str, default='auto', + help='select model to use for netG(Clean mosaic and Transfer style) -> auto | unet_128 | unet_256 | resnet_9blocks | HD | video') + self.parser.add_argument('--fps', type=int, default=0,help='read and output fps, if 0-> origin') + self.parser.add_argument('--no_preview', action='store_true', help='if specified,do not preview images when processing video. eg.(when run it on server)') + self.parser.add_argument('--output_size', type=int, default=0,help='size of output media, if 0 -> origin') + self.parser.add_argument('--mask_threshold', type=int, default=64,help='Mosaic detection threshold (0~255). The smaller is it, the more likely judged as a mosaic area.') + + #AddMosaic + self.parser.add_argument('--mosaic_mod', type=str, default='squa_avg',help='type of mosaic -> squa_avg | squa_random | squa_avg_circle_edge | rect_avg | random') + self.parser.add_argument('--mosaic_size', type=int, default=0,help='mosaic size,if 0 auto size') + self.parser.add_argument('--mask_extend', type=int, default=10,help='extend mosaic area') + + #CleanMosaic + self.parser.add_argument('--mosaic_position_model_path', type=str, default='auto',help='name of model use to find mosaic position') + self.parser.add_argument('--traditional', action='store_true', help='if specified, use traditional image processing methods to clean mosaic') + self.parser.add_argument('--tr_blur', type=int, default=10, help='ksize of blur when using traditional method, it will affect final quality') + self.parser.add_argument('--tr_down', type=int, default=10, help='downsample when using traditional method,it will affect final quality') + self.parser.add_argument('--no_feather', action='store_true', help='if specified, no edge feather and color correction, but run faster') + self.parser.add_argument('--all_mosaic_area', action='store_true', help='if specified, find all mosaic area, else only find the largest area') + self.parser.add_argument('--medfilt_num', type=int, default=11,help='medfilt window of mosaic movement in the video') + self.parser.add_argument('--ex_mult', type=str, default='auto',help='mosaic area expansion') + + #StyleTransfer + self.parser.add_argument('--preprocess', type=str, default='resize', help='resize and cropping of images at load time [ resize | resize_scale_width | edges | gray] or resize,edges(use comma to split)') + self.parser.add_argument('--edges', action='store_true', help='if specified, use edges to generate pictures,(input_nc = 1)') + self.parser.add_argument('--canny', type=int, default=150,help='threshold of canny') + self.parser.add_argument('--only_edges', action='store_true', help='if specified, output media will be edges') + + self.initialized = True + + + def getparse(self, test_flag = False): + if not self.initialized: + self.initialize() + self.opt = self.parser.parse_args() + + model_name = os.path.basename(self.opt.model_path) + self.opt.temp_dir = os.path.join(self.opt.temp_dir, 'DeepMosaics_temp') + + if self.opt.gpu_id != '-1': + os.environ["CUDA_VISIBLE_DEVICES"] = str(self.opt.gpu_id) + import torch + if not torch.cuda.is_available(): + self.opt.gpu_id = '-1' + # else: + # self.opt.gpu_id = '-1' + + if test_flag: + if not os.path.exists(self.opt.media_path): + print('Error: Media does not exist!') + input('Please press any key to exit.\n') + sys.exit(0) + if not os.path.exists(self.opt.model_path): + print('Error: Model does not exist!') + input('Please press any key to exit.\n') + sys.exit(0) + + if self.opt.mode == 'auto': + if 'clean' in model_name or self.opt.traditional: + self.opt.mode = 'clean' + elif 'add' in model_name: + self.opt.mode = 'add' + elif 'style' in model_name or 'edges' in model_name: + self.opt.mode = 'style' + else: + print('Please check model_path!') + input('Please press any key to exit.\n') + sys.exit(0) + + if self.opt.output_size == 0 and self.opt.mode == 'style': + self.opt.output_size = 512 + + if 'edges' in model_name or 'edges' in self.opt.preprocess: + self.opt.edges = True + + if self.opt.netG == 'auto' and self.opt.mode =='clean': + if 'unet_128' in model_name: + self.opt.netG = 'unet_128' + elif 'resnet_9blocks' in model_name: + self.opt.netG = 'resnet_9blocks' + elif 'HD' in model_name and 'video' not in model_name: + self.opt.netG = 'HD' + elif 'video' in model_name: + self.opt.netG = 'video' + else: + print('Type of Generator error!') + input('Please press any key to exit.\n') + sys.exit(0) + + if self.opt.ex_mult == 'auto': + if 'face' in model_name: + self.opt.ex_mult = 1.1 + else: + self.opt.ex_mult = 1.5 + else: + self.opt.ex_mult = float(self.opt.ex_mult) + + if self.opt.mosaic_position_model_path == 'auto' and self.opt.mode == 'clean': + _path = os.path.join(os.path.split(self.opt.model_path)[0],'mosaic_position.pth') + if os.path.isfile(_path): + self.opt.mosaic_position_model_path = _path + else: + input('Please check mosaic_position_model_path!') + input('Please press any key to exit.\n') + sys.exit(0) + + return self.opt \ No newline at end of file diff --git a/cores/style.py b/cores/style.py new file mode 100644 index 0000000000000000000000000000000000000000..32834adc125ed2f0f08679b9958e45d71e80f3c0 --- /dev/null +++ b/cores/style.py @@ -0,0 +1,50 @@ +import os +import time +import numpy as np +import cv2 +from models import runmodel +from util import mosaic,util,ffmpeg,filt +from util import image_processing as impro +from .init import video_init + +''' +---------------------Style Transfer--------------------- +''' +def styletransfer_img(opt,netG): + print('Style Transfer_img:',opt.media_path) + img = impro.imread(opt.media_path) + img = runmodel.run_styletransfer(opt, netG, img) + suffix = os.path.basename(opt.model_path).replace('.pth','').replace('style_','') + impro.imwrite(os.path.join(opt.result_dir,os.path.splitext(os.path.basename(opt.media_path))[0]+'_'+suffix+'.jpg'),img) + +def styletransfer_video(opt,netG): + path = opt.media_path + fps,imagepaths = video_init(opt,path)[:2] + print('Step:2/4 -- Transfer') + t1 = time.time() + if not opt.no_preview: + cv2.namedWindow('preview', cv2.WINDOW_NORMAL) + length = len(imagepaths) + + for i,imagepath in enumerate(imagepaths,1): + img = impro.imread(os.path.join(opt.temp_dir+'/video2image',imagepath)) + img = runmodel.run_styletransfer(opt, netG, img) + cv2.imwrite(os.path.join(opt.temp_dir+'/style_transfer',imagepath),img) + os.remove(os.path.join(opt.temp_dir+'/video2image',imagepath)) + + #preview result and print + if not opt.no_preview: + cv2.imshow('preview',img) + cv2.waitKey(1) & 0xFF + t2 = time.time() + print('\r',str(i)+'/'+str(length),util.get_bar(100*i/length,num=35),util.counttime(t1,t2,i,len(imagepaths)),end='') + + print() + if not opt.no_preview: + cv2.destroyAllWindows() + suffix = os.path.basename(opt.model_path).replace('.pth','').replace('style_','') + print('Step:4/4 -- Convert images to video') + ffmpeg.image2video( fps, + opt.temp_dir+'/style_transfer/output_%06d.'+opt.tempimage_type, + opt.temp_dir+'/voice_tmp.mp3', + os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_'+suffix+'.mp4')) \ No newline at end of file diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..1b088e2a664e6dd6cce3d80357b0e30be5a78521 --- /dev/null +++ b/cpp/CMakeLists.txt @@ -0,0 +1,38 @@ +cmake_minimum_required(VERSION 3.0 FATAL_ERROR) +set(CMAKE_CXX_STANDARD 14) + +project(DeepMosaics) +set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/lib) #链接库路径 + +set(Torch_DIR /home/hypo/libtorch/share/cmake/Torch) +find_package(Torch REQUIRED) + +set(OpenCV_DIR /home/hypo/opencv-4.4.0) +find_package(OpenCV REQUIRED) + +# Add sub directories +add_subdirectory(example) +add_subdirectory(utils) + +# set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 14) +# cmake_minimum_required(VERSION 3.0 FATAL_ERROR) +# project(main) +# set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/lib) #链接库路径 + +# set(Torch_DIR /home/hypo/libtorch/share/cmake/Torch) +# find_package(Torch REQUIRED) + +# set(OpenCV_DIR /home/hypo/opencv-4.4.0) +# find_package(OpenCV REQUIRED) + +# # 查找当前目录下的所有源文件 +# # 并将名称保存到 DIR_SRCS 变量 +# # aux_source_directory(. DIR_SRCS) +# add_subdirectory(utils) + +# add_executable(main main.cpp) +# # target_link_libraries(main ) +# # include_directories( "${OpenCV_INCLUDE_DIRS}" ) +# target_link_libraries( main "${TORCH_LIBRARIES}" "${OpenCV_LIBS}" utils) + +# set_property(TARGET main PROPERTY CXX_STANDARD 14) diff --git a/cpp/README.md b/cpp/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3be7e1b7ca308501c423ec5cf213a8e80558bf4c --- /dev/null +++ b/cpp/README.md @@ -0,0 +1,3 @@ +### C++ version for DeepMosaics +* I am learning c++ through this project... +* It is under development... \ No newline at end of file diff --git a/cpp/example/CMakeLists.txt b/cpp/example/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..8e27f616b29a3f42746e058b2e6783576c72302f --- /dev/null +++ b/cpp/example/CMakeLists.txt @@ -0,0 +1,17 @@ +# project(example) +# add_executable("${PROJECT_NAME}" deepmosaic.cpp) +# target_link_libraries( "${PROJECT_NAME}" +# "${TORCH_LIBRARIES}" +# "${OpenCV_LIBS}" +# utils) + +file(GLOB_RECURSE srcs RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp") +foreach(sourcefile IN LISTS srcs) + string( REPLACE ".cpp" "" binname ${sourcefile}) + add_executable( ${binname} ${sourcefile} ) + target_link_libraries( ${binname} + "${TORCH_LIBRARIES}" + "${OpenCV_LIBS}" + utils) + # set_property(TARGET ${binname} PROPERTY CXX_STANDARD 14) +endforeach() \ No newline at end of file diff --git a/cpp/example/deepmosaic.cpp b/cpp/example/deepmosaic.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dd7cde87b88912a3f7b45501d18378c64429c8b0 --- /dev/null +++ b/cpp/example/deepmosaic.cpp @@ -0,0 +1,52 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "data.hpp" +#include "util.hpp" + +int main() { + std::string path = util::current_path(); + + std::string net_path = "../res/models/mosaic_position.pth"; + std::string img_path = "../res/test_media/face/d.jpg"; + + cv::Mat img = cv::imread(img_path); + cv::resize(img, img, cv::Size(360, 360), 2); + // img.convertTo(img, CV_32F); + torch::Tensor img_tensor = + torch::from_blob(img.data, {1, img.rows, img.cols, 3}, torch::kByte); + img_tensor = img_tensor.permute({0, 3, 1, 2}); + img_tensor = img_tensor.toType(torch::kFloat); + img_tensor = img_tensor.div(255); + std::cout << img_tensor.sizes() << "\n"; + + // end = clock(); + // dur = (double)(end - start); + // printf("Use Time:%f\n", (dur / CLOCKS_PER_SEC)); + + // std::string net_path = "../res/models/mosaic_position.pt"; + // torch::jit::script::Module net; + // try{ + // // if (!isfile(net_path)){ + // // std::cerr<<"model does not exist\n"; + // // } + + // net = torch::jit::load(net_path); + // } + // catch(const std::exception& e){ + // std::cerr << "error loading the model\n"; + // return -1; + // } + + // torch::Tensor example = torch::ones({1,3,360,360}); + // torch::Tensor output = net.forward({example}).toTensor(); + // std::cout<<"ok"< + +namespace data { +void normalize(cv::Mat& matrix, double mean = 0.5, double std = 0.5); + +} // namespace data + +#endif \ No newline at end of file diff --git a/cpp/utils/include/util.hpp b/cpp/utils/include/util.hpp new file mode 100644 index 0000000000000000000000000000000000000000..104ebb5f7b9d09601721c88985ebdad2b8aff40b --- /dev/null +++ b/cpp/utils/include/util.hpp @@ -0,0 +1,26 @@ +#ifndef UTIL_H +#define UTIL_H +#include +#include +namespace util { + +class Timer { + private: + clock_t tstart, tend; + + public: + void start(); + void end(); +}; + +// std::string path = util::current_path(); +std::string current_path(); + +// std::string out = util::pathjoin({path, "b", "c"}); +std::string pathjoin(const std::list& strs); + +bool isfile(const std::string& name); + +} // namespace util + +#endif \ No newline at end of file diff --git a/cpp/utils/src/data.cpp b/cpp/utils/src/data.cpp new file mode 100644 index 0000000000000000000000000000000000000000..47b3b64f794c09cce465b970469adbb940733baa --- /dev/null +++ b/cpp/utils/src/data.cpp @@ -0,0 +1,10 @@ +#include "data.hpp" +#include + +namespace data { +void normalize(cv::Mat& matrix, double mean, double std) { + // matrix = (matrix / 255.0 - mean) / std; + matrix = matrix / (255.0 * std) - mean / std; +} + +} // namespace data \ No newline at end of file diff --git a/cpp/utils/src/util.cpp b/cpp/utils/src/util.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0929cb64aa5efd13e74d21060c6e7bfe468b8532 --- /dev/null +++ b/cpp/utils/src/util.cpp @@ -0,0 +1,49 @@ +#include "util.hpp" +#include +#include +#include +#include +#include +#include + +namespace util { + +void Timer::start() { + tstart = clock(); +} +void Timer::end() { + tend = clock(); + double dur; + dur = (double)(tend - tstart); + std::cout << "Cost Time:" << (dur / CLOCKS_PER_SEC) << "\n"; +} + +std::string current_path() { + char* buffer; + buffer = getcwd(NULL, 0); + return buffer; +} + +std::string pathjoin(const std::list& strs) { + std::string res = ""; + int cnt = 0; + for (std::string s : strs) { + if (cnt == 0) { + res += s; + } else { + if (s[0] != '/') { + res += ("/" + s); + } else { + res += s; + } + } + cnt++; + } + return res; +} + +bool isfile(const std::string& name) { + struct stat buffer; + return (stat(name.c_str(), &buffer) == 0); +} +} // namespace util \ No newline at end of file diff --git a/deepmosaic.py b/deepmosaic.py new file mode 100644 index 0000000000000000000000000000000000000000..5a59f19a71fedee46c5533a0ad65a95ff95c98ce --- /dev/null +++ b/deepmosaic.py @@ -0,0 +1,99 @@ +import os +import sys +import traceback +try: + from cores import Options,add,clean,style + from util import util + from models import loadmodel +except Exception as e: + print(e) + input('Please press any key to exit.\n') + sys.exit(0) + +opt = Options().getparse(test_flag = True) +if not os.path.isdir(opt.temp_dir): + util.file_init(opt) + +def main(): + + if os.path.isdir(opt.media_path): + files = util.Traversal(opt.media_path) + else: + files = [opt.media_path] + if opt.mode == 'add': + netS = loadmodel.bisenet(opt,'roi') + for file in files: + opt.media_path = file + if util.is_img(file): + add.addmosaic_img(opt,netS) + elif util.is_video(file): + add.addmosaic_video(opt,netS) + util.clean_tempfiles(opt, tmp_init = False) + else: + print('This type of file is not supported') + util.clean_tempfiles(opt, tmp_init = False) + + elif opt.mode == 'clean': + netM = loadmodel.bisenet(opt,'mosaic') + if opt.traditional: + netG = None + elif opt.netG == 'video': + netG = loadmodel.video(opt) + else: + netG = loadmodel.pix2pix(opt) + + for file in files: + opt.media_path = file + if util.is_img(file): + clean.cleanmosaic_img(opt,netG,netM) + elif util.is_video(file): + if opt.netG == 'video' and not opt.traditional: + clean.cleanmosaic_video_fusion(opt,netG,netM) + else: + clean.cleanmosaic_video_byframe(opt,netG,netM) + util.clean_tempfiles(opt, tmp_init = False) + else: + print('This type of file is not supported') + + elif opt.mode == 'style': + netG = loadmodel.style(opt) + for file in files: + opt.media_path = file + if util.is_img(file): + style.styletransfer_img(opt,netG) + elif util.is_video(file): + style.styletransfer_video(opt,netG) + util.clean_tempfiles(opt, tmp_init = False) + else: + print('This type of file is not supported') + + util.clean_tempfiles(opt, tmp_init = False) + +if __name__ == '__main__': + if opt.debug: + main() + sys.exit(0) + try: + main() + print('Finished!') + except Exception as ex: + print('--------------------ERROR--------------------') + print('--------------Environment--------------') + print('DeepMosaics: 0.5.1') + print('Python:',sys.version) + import torch + print('Pytorch:',torch.__version__) + import cv2 + print('OpenCV:',cv2.__version__) + import platform + print('Platform:',platform.platform()) + + print('--------------BUG--------------') + ex_type, ex_val, ex_stack = sys.exc_info() + print('Error Type:',ex_type) + print(ex_val) + for stack in traceback.extract_tb(ex_stack): + print(stack) + input('Please press any key to exit.\n') + #util.clean_tempfiles(tmp_init = False) + sys.exit(0) \ No newline at end of file diff --git a/docs/Release_notes.txt b/docs/Release_notes.txt new file mode 100644 index 0000000000000000000000000000000000000000..c48fbb59e02f25facf5ec48491ce7661a2ee7ba2 --- /dev/null +++ b/docs/Release_notes.txt @@ -0,0 +1,43 @@ +DeepMosaics: 0.5.1 +Core building with: + Python: 3.7.3 (default, Apr 24 2019, 15:29:51) [MSC v.1915 64 bit (AMD64)] + Pytorch: 1.7.1 + OpenCV: 4.1.2 + Platform: Windows-10-10.0.19041-SP0 + Driver Version: 461.40 + CUDA:11.0 +GUI building with C# +For more detail, please view on github: https://github.com/HypoX64/DeepMosaics + +Releases History + V0.5.1 + Fix: + 1.Fix Some BUGs when restore unfinished tasks. + 2.Fix that audio and video are not synchronized when the video is too long. + New: + 1.Speed up video processing by Asynchronous. + V0.5.0 + 1.New video model (Perform better) + V0.4.1 + 1.Allow unfinished tasks to be restored. + 2.Clean cache during processing. + 3.Support CUDA 11.0. + V0.4.0 + 1.Support GPU. + 2.Preview images when processing video. + 3.Choose start position of video. + V0.3.0 + 1. Support BiSeNet(Better recognition of mosaics). + 2. New videoHD model. + 3. Better feathering method. + V0.2.0 + 1. Add video model. + 2. Now you can input chinese path + 3. Support style transfer + 4. Support fps limit + V0.1.2 + 1. Support pix2pixHD model + V0.1.1 + 1. Check path, can't input illegal path + V0.1.0 + 1. Initial release. \ No newline at end of file diff --git a/docs/exe_help.md b/docs/exe_help.md new file mode 100644 index 0000000000000000000000000000000000000000..d5b96aaf54b9b4b26d74350c218caef5038e908e --- /dev/null +++ b/docs/exe_help.md @@ -0,0 +1,112 @@ +## DeepMosaics.exe Instructions +**[[中文版]](./exe_help_CN.md)** +This is a GUI version compiled in Windows.
+Download this version and pre-trained model via [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
+Video tutorial => [[youtube]](https://www.youtube.com/watch?v=1kEmYawJ_vk) [[bilibili]](https://www.bilibili.com/video/BV1QK4y1a7Av)
+ +Attentions:
+ + - Require Windows_x86_64, Windows10 is better.
+ - Different pre-trained models are suitable for different effects.
+ - Run time depends on computer performance.
+ - If output video cannot be played, you can try with [potplayer](https://daumpotplayer.com/download/).
+ - GUI version update slower than source.
+ +### How to install +#### CPU version +* 1.Download and install Microsoft Visual C++ + https://aka.ms/vs/16/release/vc_redist.x64.exe +#### GPU version +Only suppport NVidia GPU above gtx1060(Driver:above 460 & CUDA:11.0) +* 1.Download and install Microsoft Visual C++ + https://aka.ms/vs/16/release/vc_redist.x64.exe +* 2.Update your gpu drive to 460(or above) + https://www.nvidia.com/en-us/geforce/drivers/ +* 3.Download and install CUDA 11.0: + https://developer.nvidia.com/cuda-toolkit-archive + +You can also download them on BaiduNetdisk +https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ +Password: 1x0a + +### How to use +* step 1: Choose image or video. +* step 2: Choose model(Different pre-trained models are suitable for different effects) +* step 3: Run program and wait. +* step 4: Cheek reult in './result'. + +### Introduction to pre-trained models +* Mosaic + +| Name | Description | +| :------------------------------: | :---------------------------------------------------------: | +| add_face.pth | Add mosaic to all faces in images/videos. | +| clean_face_HD.pth | Clean mosaic to all faces in images/video.
(RAM > 8GB). | +| add_youknow.pth | Add mosaic to ... in images/videos. | +| clean_youknow_resnet_9blocks.pth | Clean mosaic to ... in images/videos. | +| clean_youknow_video.pth | Clean mosaic to ... in videos. It is better for processing video mosaics | + + +* Style Transfer + +| Name | Description | +| :---------------------: | :-------------------------------------------------------: | +| style_apple2orange.pth | Convert apples to oranges. | +| style_orange2apple.pth | Convert oranges to apples | +| style_summer2winter.pth | Convert summer to winter. | +| style_winter2summer.pth | Convert winter to summer. | +| style_cezanne.pth | Convert photos/video to Paul Cézanne style. | +| style_monet.pth | Convert photos/video to Claude Monet style. | +| style_ukiyoe.pth | Convert photos/video to Ukiyoe style. | +| style_vangogh.pth | Convert photos/video to Van Gogh style. | +### Annotation +![image](../imgs/GUI_Instructions.jpg)
+* 1. Choose image or video. +* 2. Choose model(Different pre-trained models are suitable for different effects). +* 3. Program running mode. (auto | add | clean | style) +* 4. Use GPU to run deep learning model. (The current version does not support gpu, if you need to use gpu please run source). +* 5. Limit the fps of the output video(0->original fps). +* 6. More options. +* 7. More options can be input. +* 8. Run program. +* 9. Open help file. +* 10. Sponsor our project. +* 11. Version information. +* 12. Open the URL on github. + +### Introduction to options +If you need more effects, use '--option your-parameters' to enter what you need. +* Base + +| Option | Description | Default | +| :----------: | :----------------------------------------: | :-------------------------------------: | +| --gpu_id | if -1, do not use gpu | 0 | +| --media_path | your videos or images path | ./imgs/ruoruo.jpg | +| --mode | program running mode(auto/clean/add/style) | 'auto' | +| --model_path | pretrained model path | ./pretrained_models/mosaic/add_face.pth | +| --result_dir | output media will be saved here | ./result | +| --fps | read and output fps, if 0-> origin | 0 | + +* AddMosaic + +| Option | Description | Default | +| :--------------: | :----------------------------------------------------------: | :------: | +| --mosaic_mod | type of mosaic -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg | +| --mosaic_size | mosaic size,if 0 -> auto size | 0 | +| --mask_extend | extend mosaic area | 10 | +| --mask_threshold | threshold of recognize mosaic position 0~255 | 64 | + +* CleanMosaic + +| Option | Description | Default | +| :-----------: | :----------------------------------------------------------: | :-----: | +| --traditional | if specified, use traditional image processing methods to clean mosaic | | +| --tr_blur | ksize of blur when using traditional method, it will affect final quality | 10 | +| --tr_down | downsample when using traditional method,it will affect final quality | 10 | +| --medfilt_num | medfilt window of mosaic movement in the video | 11 | + +* Style Transfer + +| Option | Description | Default | +| :-----------: | :----------------------------------: | :-----: | +| --output_size | size of output media, if 0 -> origin | 512 | \ No newline at end of file diff --git a/docs/exe_help_CN.md b/docs/exe_help_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..8ec4b06e90faf8604c53e5cd2cf60f912c0e54b9 --- /dev/null +++ b/docs/exe_help_CN.md @@ -0,0 +1,114 @@ +## DeepMosaics.exe 使用说明 +下载程序以及预训练模型 [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
+[视频教程](https://www.bilibili.com/video/BV1QK4y1a7Av)
+ +注意事项:
+ + + - 程序的运行要求在64位Windows操作系统,我们仅在Windows10运行过,其他版本暂未经过测试
+ - 请根据需求选择合适的预训练模型进行测试
+ - 运行时间取决于电脑性能,对于视频文件,我们建议使用GPU运行
+ - 如果输出的视频无法播放,这边建议您尝试[potplayer](https://daumpotplayer.com/download/).
+ - 相比于源码,该版本的更新将会延后. + +### 如何安装 +#### CPU version +* 1.下载安装 Microsoft Visual C++ + https://aka.ms/vs/16/release/vc_redist.x64.exe +#### GPU version +仅支持gtx1060及以上的NVidia显卡(要求460版本以上的驱动以及11.0版本的CUDA, 注意只能是11.0) +* 1.Download and install Microsoft Visual C++ + https://aka.ms/vs/16/release/vc_redist.x64.exe +* 2.Update your gpu drive to 460(or above) + https://www.nvidia.com/en-us/geforce/drivers/ +* 3.Download and install CUDA 11.0: + https://developer.nvidia.com/cuda-toolkit-archive + +当然这些也能在百度云上下载 +https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ +提取码: 1x0a + +### 如何使用 + +* step 1: 选择需要处理的图片或视频 +* step 2: 选择预训练模型(不同的预训练模型有不同的效果) +* step 3: 运行程序并等待 +* step 4: 查看结果(储存在result文件夹下) + +## 预训练模型说明 +当前的预训练模型分为两类——添加/移除马赛克以及风格转换. + +* 马赛克 + +| 文件名 | 描述 | +| :------------------------------: | :-------------------------------------------: | +| add_face.pth | 对图片或视频中的脸部打码 | +| clean_face_HD.pth | 对图片或视频中的脸部去码
(要求内存 > 8GB). | +| add_youknow.pth | 对图片或视频中的...内容打码 | +| clean_youknow_resnet_9blocks.pth | 对图片或视频中的...内容去码 | +| clean_youknow_video.pth | 对视频中的...内容去码,推荐使用带有'video'的模型去除视频中的马赛克 | + + +* 风格转换 + +| 文件名 | 描述 | +| :---------------------: | :-------------------------------------------------------: | +| style_apple2orange.pth | 苹果变橙子 | +| style_orange2apple.pth | 橙子变苹果 | +| style_summer2winter.pth | 夏天变冬天 | +| style_winter2summer.pth | 冬天变夏天 | +| style_cezanne.pth | 转化为Paul Cézanne 的绘画风格 | +| style_monet.pth | 转化为Claude Monet的绘画风格 | +| style_ukiyoe.pth | 转化为Ukiyoe的绘画风格 | +| style_vangogh.pth | 转化为Van Gogh的绘画风格 | + +### GUI界面注释 +![image](../imgs/GUI_Instructions.jpg)
+* 1. 选择需要处理的图片或视频 +* 2. 选择预训练模型 +* 3. 程序运行模式 (auto | add | clean | style) +* 4. 使用GPU (该版本目前不支持GPU,若需要使用GPU请使用源码运行). +* 5. 限制输出的视频帧率(0->原始帧率). +* 6. 更多的选项以及参数 +* 7. 自行输入更多参数,详见下文 +* 8. 运行 +* 9. 打开帮助文件 +* 10. 支持我们 +* 11. 版本信息 +* 12. 打开项目的github页面 + +### 参数说明 +如果需要更多的效果, 请按照 '--option your-parameters' 输入所需要的参数 +* 基本 + +| 选项 | 描述 | 默认 | +| :----------: | :------------------------: | :-------------------------------------: | +| --gpu_id | if -1, do not use gpu | 0 | +| --media_path | 需要处理的视频或者照片的路径 | ./imgs/ruoruo.jpg | +| --mode | 运行模式(auto/clean/add/style) | 'auto' | +| --model_path | 预训练模型的路径 | ./pretrained_models/mosaic/add_face.pth | +| --result_dir | 保存路径 | ./result | +| --fps | 限制视频输出的fps,0则为默认 | 0 | +* 添加马赛克 + +| 选项 | 描述 | 默认 | +| :----------: | :------------------------: | :-------------------------------------: | +| --mosaic_mod | 马赛克类型 -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg | +| --mosaic_size | 马赛克大小,0则为自动 | 0 | +| --mask_extend | 拓展马赛克区域 | 10 | +| --mask_threshold | 马赛克区域识别阈值 0~255,越小越容易被判断为马赛克区域 | 64 | + +* 去除马赛克 + +| 选项 | 描述 | 默认 | +| :----------: | :------------------------: | :-------------------------------------: | +| --traditional | 如果输入这个参数则使用传统方法清除马赛克 | | +| --tr_blur | 传统方法模糊尺寸 | 10 | +| --tr_down | 传统方法下采样尺寸 | 10 | +| --medfilt_num | medfilt window of mosaic movement in the video | 11 | + +* 风格转换 + +| 选项 | 描述 | 默认 | +| :----------: | :------------------------: | :-------------------------------------: | +| --output_size | 输出媒体的尺寸,如果是0则为原始尺寸 |512| \ No newline at end of file diff --git a/docs/options_introduction.md b/docs/options_introduction.md new file mode 100644 index 0000000000000000000000000000000000000000..3888eedb8f9fcfcc28700fbc2f478eba347c4ea8 --- /dev/null +++ b/docs/options_introduction.md @@ -0,0 +1,41 @@ +## Introduction to options +If you need more effects, use '--option your-parameters' to enter what you need. + +### Base + +| Option | Description | Default | +| :----------: | :------------------------: | :-------------------------------------: | +| --gpu_id | if -1, do not use gpu | 0 | +| --media_path | your videos or images path | ./imgs/ruoruo.jpg | +| --start_time | start position of video, default is the beginning of video | '00:00:00' | +| --last_time | limit the duration of the video, default is the entire video | '00:00:00' | +| --mode | program running mode(auto/clean/add/style) | 'auto' | +| --model_path | pretrained model path | ./pretrained_models/mosaic/add_face.pth | +| --result_dir | output media will be saved here| ./result | +| --temp_dir | Temporary files will go here | ./tmp | +| --fps | read and output fps, if 0-> origin | 0 | +| --no_preview | if specified,do not preview images when processing video. eg.(when run it on server) | Flase | + +### AddMosaic + +| Option | Description | Default | +| :----------: | :------------------------: | :-------------------------------------: | +| --mosaic_mod | type of mosaic -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg | +| --mosaic_size | mosaic size,if 0 -> auto size | 0 | +| --mask_extend | extend mosaic area | 10 | +| --mask_threshold | threshold of recognize mosaic position 0~255 | 64 | + +### CleanMosaic + +| Option | Description | Default | +| :----------: | :------------------------: | :-------------------------------------: | +| --traditional | if specified, use traditional image processing methods to clean mosaic | | +| --tr_blur | ksize of blur when using traditional method, it will affect final quality | 10 | +| --tr_down | downsample when using traditional method,it will affect final quality | 10 | +| --medfilt_num | medfilt window of mosaic movement in the video | 11 | + +### Style Transfer + +| Option | Description | Default | +| :----------: | :------------------------: | :-------------------------------------: | +| --output_size | size of output media, if 0 -> origin |512| \ No newline at end of file diff --git a/docs/options_introduction_CN.md b/docs/options_introduction_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..8557d04840f01a0e23c2b881ed496b25181771ce --- /dev/null +++ b/docs/options_introduction_CN.md @@ -0,0 +1,41 @@ +## 参数说明 +如果需要更多的效果, 请按照 '--option your-parameters' 输入所需要的参数 + +### 基本 + +| 选项 | 描述 | 默认 | +| :----------: | :------------------------: | :-------------------------------------: | +| --gpu_id | if -1, do not use gpu | 0 | +| --media_path | 需要处理的视频或者照片的路径 | ./imgs/ruoruo.jpg | +| --start_time | 视频开始处理的位置,默认从头开始 | '00:00:00' | +| --last_time | 处理的视频时长,默认是整个视频 | '00:00:00' | +| --mode | 运行模式(auto/clean/add/style) | 'auto' | +| --model_path | 预训练模型的路径 | ./pretrained_models/mosaic/add_face.pth | +| --result_dir | 保存路径 | ./result | +| --temp_dir | 临时文件存储目录 | ./tmp | +| --fps | 限制视频输出的fps,0则为默认 | 0 | +| --no_preview | 如果输入,将不会在处理视频时播放实时预览.比如当你在服务器运行的时候 | Flase | + +### 添加马赛克 + +| 选项 | 描述 | 默认 | +| :----------: | :------------------------: | :-------------------------------------: | +| --mosaic_mod | 马赛克类型 -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg | +| --mosaic_size | 马赛克大小,0则为自动 | 0 | +| --mask_extend | 拓展马赛克区域 | 10 | +| --mask_threshold | 马赛克区域识别阈值 0~255 | 64 | + +### 去除马赛克 + +| 选项 | 描述 | 默认 | +| :----------: | :------------------------: | :-------------------------------------: | +| --traditional | 如果输入这个参数则使用传统方法清除马赛克 | | +| --tr_blur | 传统方法模糊尺寸 | 10 | +| --tr_down | 传统方法下采样尺寸 | 10 | +| --medfilt_num | medfilt window of mosaic movement in the video | 11 | + +### 风格转换 + +| 选项 | 描述 | 默认 | +| :----------: | :------------------------: | :-------------------------------------: | +| --output_size | 输出媒体的尺寸,如果是0则为原始尺寸 |512| \ No newline at end of file diff --git a/docs/pre-trained_models_introduction.md b/docs/pre-trained_models_introduction.md new file mode 100644 index 0000000000000000000000000000000000000000..cc74728ab9f202ec5924fc1a958288369e89daaa --- /dev/null +++ b/docs/pre-trained_models_introduction.md @@ -0,0 +1,28 @@ +## Introduction to pre-trained models +The current pre-trained models are divided into two categories(Add/Clean mosaic and StyleTransfer). +Download pre-trained model via [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
+ +### Mosaic + +| Name | Description | +| :------------------------------: | :-----------------------------------------------------: | +| add_face.pth | Add mosaic to faces in images/videos. | +| clean_face_HD.pth | Clean mosaic to faces in images/video.
(RAM > 8GB). | +| add_youknow.pth | Add mosaic to ... in images/videos. | +| clean_youknow_resnet_9blocks.pth | Clean mosaic to ... in images/videos. | +| clean_youknow_video.pth | Clean mosaic to ... in videos. It is better for processing video mosaics | + + +### Style Transfer + +| Name | Description | +| :---------------------: | :-------------------------------------------------------: | +| style_apple2orange.pth | Convert apples to oranges. | +| style_orange2apple.pth | Convert oranges to apples | +| style_summer2winter.pth | Convert summer to winter. | +| style_winter2summer.pth | Convert winter to summer. | +| style_cezanne.pth | Convert photos/video to Paul Cézanne style. | +| style_monet.pth | Convert photos/video to Claude Monet style. | +| style_ukiyoe.pth | Convert photos/video to Ukiyoe style. | +| style_vangogh.pth | Convert photos/video to Van Gogh style. | + diff --git a/docs/pre-trained_models_introduction_CN.md b/docs/pre-trained_models_introduction_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..9b82e2a5ebf97ef6738a5c4670be6abf4a1f5bd8 --- /dev/null +++ b/docs/pre-trained_models_introduction_CN.md @@ -0,0 +1,28 @@ +## 预训练模型说明 +当前的预训练模型分为两类——添加/移除马赛克以及风格转换. +可以通过以下方式下载预训练模型 [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
+ +### 马赛克 + +| 文件名 | 描述 | +| :------------------------------: | :-------------------------------------------: | +| add_face.pth | 对图片或视频中的脸部打码 | +| clean_face_HD.pth | 对图片或视频中的脸部去码
(要求内存 > 8GB). | +| add_youknow.pth | 对图片或视频中的...内容打码 | +| clean_youknow_resnet_9blocks.pth | 对图片或视频中的...内容去码 | +| clean_youknow_video.pth | 对视频中的...内容去码,推荐使用带有'video'的模型去除视频中的马赛克 | + + +### 风格转换 + +| 文件名 | 描述 | +| :---------------------: | :-------------------------------------------------------: | +| style_apple2orange.pth | 苹果变橙子 | +| style_orange2apple.pth | 橙子变苹果 | +| style_summer2winter.pth | 夏天变冬天 | +| style_winter2summer.pth | 冬天变夏天 | +| style_cezanne.pth | 转化为Paul Cézanne 的绘画风格 | +| style_monet.pth | 转化为Claude Monet的绘画风格 | +| style_ukiyoe.pth | 转化为Ukiyoe的绘画风格 | +| style_vangogh.pth | 转化为Van Gogh的绘画风格 | + diff --git a/docs/training_with_your_own_dataset.md b/docs/training_with_your_own_dataset.md new file mode 100644 index 0000000000000000000000000000000000000000..bc2a30c734d0fd93b4bfe044fa45c8715701fea2 --- /dev/null +++ b/docs/training_with_your_own_dataset.md @@ -0,0 +1,77 @@ +# Training with your own dataset +Training with your own dataset requires a GPU with 6G memory (above GTX1060).
+We will make "face" as an example. If you don't have any picture, you can download [CelebA](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) or [WIDER](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/WiderFace_Results.html). + +## Getting Started +#### Prerequisites + - Linux, Mac OS, Windows + - Python 3.6+ + - [ffmpeg 3.4.6](http://ffmpeg.org/) + - [Pytorch 1.0+](https://pytorch.org/) + - NVIDIA GPU(with more than 6G memory) + CUDA CuDNN
+#### Dependencies +This code depends on opencv-python, torchvision, matplotlib, tensorboardX, scikit-image available via conda install. +```bash +# or +pip install -r requirements.txt +``` +#### Clone this repo +```bash +git clone https://github.com/HypoX64/DeepMosaics +cd DeepMosaics +``` +## Make training datasets +```bash +cd make_datasets +``` +### Add mosaic dataset +Please generate mask from images which you want to add mosaic(number of images should be above 1000). And then put the images in ```face/origin_image```, and masks in ```face/mask```.
+* You can use ```draw_mask.py```to generate them. +```bash +python draw_mask.py --datadir 'dir for your pictures' --savedir ../datasets/draw/face +#Press the left mouse button to draw the mask . Press 'S' to save mask, 'A' to reduce brush size, 'D' to increase brush size, 'W' to cancel drawing. +``` +* If you want to get images from videos, you can use ```get_image_from_video.py``` +```bash +python get_image_from_video.py --datadir 'dir for your videos' --savedir ../datasets/video2image --fps 1 +``` +### Clean mosaic dataset +We provide several methods for generating clean mosaic datasets. However, for better effect, we recommend train a addmosaic model in a small data first and use it to automatically generate datasets in a big data. (recommend: Method 2(for image) & Method 4(for video)) +* Method 1: Use drawn mask to make pix2pix(HD) datasets (Require``` origin_image``` and ```mask```) +```bash +python make_pix2pix_dataset.py --datadir ../datasets/draw/face --hd --outsize 512 --fold 1 --name face --savedir ../datasets/pix2pix/face --mod drawn --minsize 128 --square +``` +* Method 2: Use addmosaic model to make pix2pix(HD) datasets (Require addmosaic pre-trained model) +```bash +python make_pix2pix_dataset.py --datadir 'dir for your pictures' --hd --outsize 512 --fold 1 --name face --savedir ../datasets/pix2pix/face --mod network --model_path ../pretrained_models/mosaic/add_face.pth --minsize 128 --square --mask_threshold 128 +``` +* Method 3: Use Irregular Masks to make pix2pix(HD) datasets (Require [Irregular Masks](https://nv-adlr.github.io/publication/partialconv-inpainting)) +```bash +python make_pix2pix_dataset.py --datadir 'dir for your pictures' --hd --outsize 512 --fold 1 --name face --savedir ../datasets/pix2pix/face --mod irregular --irrholedir ../datasets/Irregular_Holes_mask --square +``` +* Method 4: Use addmosaic model to make video datasets (Require addmosaic pre-trained model. This is better for processing video mosaics) +```bash +python make_video_dataset.py --model_path ../pretrained_models/mosaic/add_face.pth --gpu_id 0 --datadir 'dir for your videos' --savedir ../datasets/video/face +``` +## Training +### Add +```bash +cd train/add +python train.py --gpu_id 0 --dataset ../../datasets/draw/face --savename face --loadsize 512 --finesize 360 --batchsize 16 +``` +### Clean +* For image datasets (generated by ```make_pix2pix_dataset.py```) +We use [pix2pix](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) or [pix2pixHD](https://github.com/NVIDIA/pix2pixHD) to train model. We just take pix2pixHD as an example. +```bash +git clone https://github.com/NVIDIA/pix2pixHD +cd pix2pixHD +pip install dominate +python train.py --name face --resize_or_crop resize_and_crop --loadSize 563 --fineSize 512 --label_nc 0 --no_instance --dataroot ../datasets/pix2pix/face +``` +* For video datasets (generated by ```make_video_dataset.py```) +```bash +cd train/clean +python train.py --dataset ../../datasets/video/face --savename face --n_blocks 4 --lambda_GAN 0.01 --loadsize 286 --finesize 256 --batchsize 16 --n_layers_D 2 --num_D 3 --n_epoch 200 --gpu_id 4,5,6,7 --load_thread 16 +``` +## Testing +Put saved network to ```./pretrained_models/mosaic/``` and rename it as ```add_face.pth``` or ```clean_face_HD.pth``` or ```clean_face_video_HD.pth```and then run ```deepmosaic.py --model_path ./pretrained_models/mosaic/your_model_name``` diff --git a/imgs/GUI.png b/imgs/GUI.png new file mode 100644 index 0000000000000000000000000000000000000000..7404e3bb52995fe04c352965f75f0ff35535c69e Binary files /dev/null and b/imgs/GUI.png differ diff --git a/imgs/GUI_Instructions.jpg b/imgs/GUI_Instructions.jpg new file mode 100644 index 0000000000000000000000000000000000000000..600815241b6a3d3e4bfe5465af0ddf43f6873744 Binary files /dev/null and b/imgs/GUI_Instructions.jpg differ diff --git a/imgs/example/SZU.jpg b/imgs/example/SZU.jpg new file mode 100644 index 0000000000000000000000000000000000000000..12c38a3a2d476fe4e9d48357214edde3a832f6df Binary files /dev/null and b/imgs/example/SZU.jpg differ diff --git a/imgs/example/SZU_summer2winter.jpg b/imgs/example/SZU_summer2winter.jpg new file mode 100644 index 0000000000000000000000000000000000000000..49af7fcc9e3ea8445c7b3841ff1d4e246f1693d2 Binary files /dev/null and b/imgs/example/SZU_summer2winter.jpg differ diff --git a/imgs/example/SZU_vangogh.jpg b/imgs/example/SZU_vangogh.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d6d7ec0a1501704f7ef41e9275c27df1cda00826 Binary files /dev/null and b/imgs/example/SZU_vangogh.jpg differ diff --git a/imgs/example/a_dcp.png b/imgs/example/a_dcp.png new file mode 100644 index 0000000000000000000000000000000000000000..1602d543ba978a97e890f7e882b7dd9cb6a99a26 --- /dev/null +++ b/imgs/example/a_dcp.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b043d673d0e2f65d809c03facb004c9ea1dafa79db6055f596beccf0da540bdc +size 120425 diff --git a/imgs/example/b_dcp.png b/imgs/example/b_dcp.png new file mode 100644 index 0000000000000000000000000000000000000000..f610a46400f38618f1e46193f1a2ebec75c14eae --- /dev/null +++ b/imgs/example/b_dcp.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4faaa995c61eff269b8a1cf4b902f96bcb10e14f8b555b76e0f7dad2902ee0d5 +size 138430 diff --git a/imgs/example/face_a_clean.jpg b/imgs/example/face_a_clean.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f00efca8faa195b6f6fe978fccb4f9e0bdab859e Binary files /dev/null and b/imgs/example/face_a_clean.jpg differ diff --git a/imgs/example/face_a_mosaic.jpg b/imgs/example/face_a_mosaic.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a898e6a40fd0d78c9b4dad8c1b3e037735609b62 Binary files /dev/null and b/imgs/example/face_a_mosaic.jpg differ diff --git a/imgs/example/face_b_clean.jpg b/imgs/example/face_b_clean.jpg new file mode 100644 index 0000000000000000000000000000000000000000..432285fb7f5dcfa8711b91d499d8d7d7ec008d5a Binary files /dev/null and b/imgs/example/face_b_clean.jpg differ diff --git a/imgs/example/face_b_mosaic.jpg b/imgs/example/face_b_mosaic.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8aabdacd38b79c086fccbc777d6ba9404fb6ea3c Binary files /dev/null and b/imgs/example/face_b_mosaic.jpg differ diff --git a/imgs/example/lena.jpg b/imgs/example/lena.jpg new file mode 100644 index 0000000000000000000000000000000000000000..bf55198823d616f1a8b8d9508cdfd4b97e8f31db Binary files /dev/null and b/imgs/example/lena.jpg differ diff --git a/imgs/example/lena_add.jpg b/imgs/example/lena_add.jpg new file mode 100644 index 0000000000000000000000000000000000000000..871a14c08a103549ce5b7ae4d20f2bd1b052e2bf Binary files /dev/null and b/imgs/example/lena_add.jpg differ diff --git a/imgs/example/lena_clean.jpg b/imgs/example/lena_clean.jpg new file mode 100644 index 0000000000000000000000000000000000000000..bfb97107c9214301089a0f018cb0523ba6e16e79 Binary files /dev/null and b/imgs/example/lena_clean.jpg differ diff --git a/imgs/example/youknow.png b/imgs/example/youknow.png new file mode 100644 index 0000000000000000000000000000000000000000..9b27ebb231757fb21e4f43a788630ad9fe5f154b Binary files /dev/null and b/imgs/example/youknow.png differ diff --git a/imgs/example/youknow_add.png b/imgs/example/youknow_add.png new file mode 100644 index 0000000000000000000000000000000000000000..17cb4c0ac55025b46b3d5d1a24584ca629dbcf52 Binary files /dev/null and b/imgs/example/youknow_add.png differ diff --git a/imgs/example/youknow_clean.png b/imgs/example/youknow_clean.png new file mode 100644 index 0000000000000000000000000000000000000000..d6dad64d7c68b3af20d4a2f34587929263e16545 Binary files /dev/null and b/imgs/example/youknow_clean.png differ diff --git a/imgs/hand.gif b/imgs/hand.gif new file mode 100644 index 0000000000000000000000000000000000000000..7617ab2047febb4388e23a470db14b37e7111275 --- /dev/null +++ b/imgs/hand.gif @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66546a3c49d60d6c5a2088b005ede50b8c3595125374bf292bb50e2a028a472d +size 2809218 diff --git a/imgs/lena.jpg b/imgs/lena.jpg new file mode 100644 index 0000000000000000000000000000000000000000..59011d8541f43d2e9716f291861d7a19068302d1 Binary files /dev/null and b/imgs/lena.jpg differ diff --git a/imgs/logo.ico b/imgs/logo.ico new file mode 100644 index 0000000000000000000000000000000000000000..75edca65882ea4a5a704dc11b6a28f06cc33f54e Binary files /dev/null and b/imgs/logo.ico differ diff --git a/imgs/logo.png b/imgs/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..41b8ee3473c7cd4ef58ed3f3a7b3764ff5f68afb Binary files /dev/null and b/imgs/logo.png differ diff --git a/imgs/logo_withwords.png b/imgs/logo_withwords.png new file mode 100644 index 0000000000000000000000000000000000000000..cc3290d3a766611b06cec9374943df4d61d5cad8 Binary files /dev/null and b/imgs/logo_withwords.png differ diff --git a/imgs/ruoruo.jpg b/imgs/ruoruo.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a5c43d7ac42975788963f586616bd04627837e75 Binary files /dev/null and b/imgs/ruoruo.jpg differ diff --git a/make_datasets/cut_video.py b/make_datasets/cut_video.py new file mode 100644 index 0000000000000000000000000000000000000000..6e3ad1ad4f73113adaf6bfbbe4df13dbfb0932ee --- /dev/null +++ b/make_datasets/cut_video.py @@ -0,0 +1,32 @@ +import os +import numpy as np +import cv2 +import random +import csv + +import sys +sys.path.append("..") +from util import util,ffmpeg +from util import image_processing as impro + +files = util.Traversal('/media/hypo/Media/download') +videos = util.is_videos(files) + + + +useable_videos = [] +video_dict = {} +reader = csv.reader(open('./csv/video_used_time.csv')) +for line in reader: + useable_videos.append(line[0]) + video_dict[line[0]]=line[1:] + +in_cnt = 0 +out_cnt = 1 +for video in videos: + if os.path.basename(video) in useable_videos: + + for i in range(len(video_dict[os.path.basename(video)])): + ffmpeg.cut_video(video, video_dict[os.path.basename(video)][i], '00:00:05', './video/'+'%04d'%out_cnt+'.mp4') + out_cnt +=1 + in_cnt += 1 diff --git a/make_datasets/draw_mask.py b/make_datasets/draw_mask.py new file mode 100644 index 0000000000000000000000000000000000000000..cb66ed9f3e1c6d2191c9a54561a24cdfbff925b5 --- /dev/null +++ b/make_datasets/draw_mask.py @@ -0,0 +1,96 @@ +import cv2 +import numpy as np +import datetime +import os +import random + +import sys +sys.path.append("..") +from cores import Options +from util import util +from util import image_processing as impro + + +opt = Options() +opt.parser.add_argument('--datadir',type=str,default=' ', help='your images dir') +opt.parser.add_argument('--savedir',type=str,default='../datasets/draw/face', help='') +opt = opt.getparse() + +mask_savedir = os.path.join(opt.savedir,'mask') +img_savedir = os.path.join(opt.savedir,'origin_image') +util.makedirs(mask_savedir) +util.makedirs(img_savedir) + +filepaths = util.Traversal(opt.datadir) +filepaths = util.is_imgs(filepaths) +random.shuffle(filepaths) +print('find image:',len(filepaths)) + +# mouse callback function +drawing = False # true if mouse is pressed +ix,iy = -1,-1 +brushsize = 20 +def draw_circle(event,x,y,flags,param): + global ix,iy,drawing,brushsize + + if event == cv2.EVENT_LBUTTONDOWN: + drawing = True + ix,iy = x,y + + elif event == cv2.EVENT_MOUSEMOVE: + if drawing == True: + cv2.circle(img_drawn,(x,y),brushsize,(0,255,0),-1) + + elif event == cv2.EVENT_LBUTTONUP: + drawing = False + cv2.circle(img_drawn,(x,y),brushsize,(0,255,0),-1) + +def makemask(img_drawn): + # starttime = datetime.datetime.now() + mask = np.zeros(img_drawn.shape, np.uint8) + for row in range(img_drawn.shape[0]): + for col in range(img_drawn.shape[1]): + # if (img_drawn[row,col,:] == [0,255,0]).all(): #too slow + if img_drawn[row,col,0] == 0: + if img_drawn[row,col,1] == 255: + if img_drawn[row,col,2] == 0: + mask[row,col,:] = [255,255,255] + return mask + +cnt = 0 +for file in filepaths: + try: + cnt += 1 + img = impro.imread(file,loadsize=512) + img_drawn = img.copy() + cv2.namedWindow('image') + cv2.setMouseCallback('image',draw_circle) #MouseCallback + while(1): + + cv2.imshow('image',img_drawn) + k = cv2.waitKey(1) & 0xFF + if k == ord('s'): + + img_drawn = impro.resize(img_drawn,256) + mask = makemask(img_drawn) + cv2.imwrite(os.path.join(mask_savedir,os.path.splitext(os.path.basename(file))[0]+'.png'),mask) + cv2.imwrite(os.path.join(img_savedir,os.path.basename(file)),img) + print('Saved:',os.path.join(mask_savedir,os.path.splitext(os.path.basename(file))[0]+'.png'),mask) + # cv2.destroyAllWindows() + print('remain:',len(filepaths)-cnt) + brushsize = 20 + break + elif k == ord('a'): + brushsize -= 5 + if brushsize<5: + brushsize = 5 + print('brushsize:',brushsize) + elif k == ord('d'): + brushsize += 5 + print('brushsize:',brushsize) + elif k == ord('w'): + print('remain:',len(filepaths)-cnt) + break + except Exception as e: + print(file,e) + diff --git a/make_datasets/get_edges_pix2pix_dataset.py b/make_datasets/get_edges_pix2pix_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..5def0ebd91e28b1fdd74f9359acca1aa1ba1deb8 --- /dev/null +++ b/make_datasets/get_edges_pix2pix_dataset.py @@ -0,0 +1,26 @@ +import numpy as np +import cv2 +import os +import sys +sys.path.append("..") +from util import image_processing as impro +from util import util + +img_dir = './datasets_img/pix2pix/edges2cat/images' +output_dir = './datasets_img/pix2pix/edges2cat/train' +util.makedirs(output_dir) + +img_names = os.listdir(img_dir) +for i,img_name in enumerate(img_names,2000): + try: + img = impro.imread(os.path.join(img_dir,img_name)) + img = impro.resize(img, 286) + h,w = img.shape[:2] + edges = cv2.Canny(img,150,250) + edges = impro.ch_one2three(edges) + out_img = np.zeros((h,w*2,3), dtype=np.uint8) + out_img[:,0:w] = edges + out_img[:,w:2*w] = img + cv2.imwrite(os.path.join(output_dir,'%05d' % i+'.jpg'), out_img) + except Exception as e: + pass diff --git a/make_datasets/get_image_from_video.py b/make_datasets/get_image_from_video.py new file mode 100644 index 0000000000000000000000000000000000000000..b8dedbc3c61fde96d153a4b9b15325d4cdf4d5cf --- /dev/null +++ b/make_datasets/get_image_from_video.py @@ -0,0 +1,17 @@ +import os +import sys +sys.path.append("..") +from cores import Options +from util import util,ffmpeg + +opt = Options() +opt.parser.add_argument('--datadir',type=str,default='', help='your video dir') +opt.parser.add_argument('--savedir',type=str,default='../datasets/video2image', help='') +opt = opt.getparse() + +files = util.Traversal(opt.datadir) +videos = util.is_videos(files) + +util.makedirs(opt.savedir) +for video in videos: + ffmpeg.continuous_screenshot(video, opt.savedir, opt.fps) \ No newline at end of file diff --git a/make_datasets/make_pix2pix_dataset.py b/make_datasets/make_pix2pix_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..c87dc305439a5a5108d557b19394d46358e26bd1 --- /dev/null +++ b/make_datasets/make_pix2pix_dataset.py @@ -0,0 +1,172 @@ +import os +import sys +sys.path.append("..") +from cores import Options +opt = Options() + +import random +import datetime +import time +import warnings +warnings.filterwarnings(action='ignore') + +import numpy as np +import cv2 +import torch + +from models import runmodel,loadmodel +import util.image_processing as impro +from util import degradater, util,mosaic,data + + +opt.parser.add_argument('--datadir',type=str,default='../datasets/draw/face', help='') +opt.parser.add_argument('--savedir',type=str,default='../datasets/pix2pix/face', help='') +opt.parser.add_argument('--name',type=str,default='', help='save name') +opt.parser.add_argument('--mod',type=str,default='drawn', help='drawn | network | irregular | drawn,irregular | network,irregular') +opt.parser.add_argument('--square', action='store_true', help='if specified, crop to square') +opt.parser.add_argument('--irrholedir',type=str,default='../datasets/Irregular_Holes_mask', help='') +opt.parser.add_argument('--hd', action='store_true', help='if false make dataset for pix2pix, if Ture for pix2pix_HD') +opt.parser.add_argument('--savemask', action='store_true', help='if specified,save mask') +opt.parser.add_argument('--outsize', type=int ,default= 512,help='') +opt.parser.add_argument('--fold', type=int ,default= 1,help='') +opt.parser.add_argument('--start', type=int ,default= 0,help='') +opt.parser.add_argument('--minsize', type=int ,default= 128,help='when [square], minimal roi size') +opt.parser.add_argument('--quality', type=int ,default= 40,help='when [square], minimal quality') + +opt = opt.getparse() + +util.makedirs(opt.savedir) +util.writelog(os.path.join(opt.savedir,'opt.txt'), + str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt)) +opt.mod = (opt.mod).split(',') + +#save dir +if opt.hd: + train_A_path = os.path.join(opt.savedir,'train_A') + train_B_path = os.path.join(opt.savedir,'train_B') + util.makedirs(train_A_path) + util.makedirs(train_B_path) +else: + train_path = os.path.join(opt.savedir,'train') + util.makedirs(train_path) +if opt.savemask: + mask_save_path = os.path.join(opt.savedir,'mask') + util.makedirs(mask_save_path) + +#read dir +if 'drawn' in opt.mod: + imgpaths = util.Traversal(os.path.join(opt.datadir,'origin_image')) + imgpaths.sort() + maskpaths = util.Traversal(os.path.join(opt.datadir,'mask')) + maskpaths.sort() +if 'network' in opt.mod or 'irregular' in opt.mod: + imgpaths = util.Traversal(opt.datadir) + imgpaths = util.is_imgs(imgpaths) + random.shuffle (imgpaths) +if 'irregular' in opt.mod: + irrpaths = util.Traversal(opt.irrholedir) + + +#def network +if 'network' in opt.mod: + net = loadmodel.bisenet(opt,'roi') + +print('Find images:',len(imgpaths)) +starttime = datetime.datetime.now() +filecnt = 0 +savecnt = opt.start +for fold in range(opt.fold): + for i in range(len(imgpaths)): + filecnt += 1 + try: + # load image and get mask + img = impro.imread(imgpaths[i]) + if 'drawn' in opt.mod: + mask_drawn = impro.imread(maskpaths[i],'gray') + mask_drawn = impro.resize_like(mask_drawn, img) + mask = mask_drawn + if 'irregular' in opt.mod: + mask_irr = impro.imread(irrpaths[random.randint(0,12000-1)],'gray') + mask_irr = data.random_transform_single_mask(mask_irr, (img.shape[0],img.shape[1])) + mask = mask_irr + if 'network' in opt.mod: + mask_net = runmodel.get_ROI_position(img,net,opt,keepsize=True)[0] + if opt.gpu_id != -1: + torch.cuda.empty_cache() + if not opt.all_mosaic_area: + mask_net = impro.find_mostlikely_ROI(mask_net) + mask = mask_net + if opt.mod == ['drawn','irregular']: + mask = cv2.bitwise_and(mask_irr, mask_drawn) + if opt.mod == ['network','irregular']: + mask = cv2.bitwise_and(mask_irr, mask_net) + + #checkandsave + # t=threading.Thread(target=checksaveimage,args=(opt,img,mask,)) + # t.start() + + saveflag = True + if opt.mod == ['drawn','irregular']: + x,y,size,area = impro.boundingSquare(mask_drawn, random.uniform(1.1,1.6)) + elif opt.mod == ['network','irregular']: + x,y,size,area = impro.boundingSquare(mask_net, random.uniform(1.1,1.6)) + else: + x,y,size,area = impro.boundingSquare(mask, random.uniform(1.1,1.6)) + + if area < 1000: + saveflag = False + else: + if opt.square: + if size < opt.minsize: + saveflag = False + else: + img = impro.resize(img[y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC) + mask = impro.resize(mask[y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC) + if impro.Q_lapulase(img)0.5: + degradate_params = degradater.get_random_degenerate_params(mod='weaker_2') + img = degradater.degradate(img,degradate_params) + img_mosaic = degradater.degradate(img_mosaic,degradate_params) + # if random.random()>0.5: + # Q = random.randint(1,15) + # img = impro.dctblur(img,Q) + # img_mosaic = impro.dctblur(img_mosaic,Q) + + savecnt += 1 + + if opt.hd: + cv2.imwrite(os.path.join(train_A_path,opt.name+'%06d' % savecnt+'.jpg'), img_mosaic) + cv2.imwrite(os.path.join(train_B_path,opt.name+'%06d' % savecnt+'.jpg'), img) + else: + merge_img = impro.makedataset(img_mosaic, img) + cv2.imwrite(os.path.join(train_path,opt.name+'%06d' % savecnt+'.jpg'), merge_img) + if opt.savemask: + cv2.imwrite(os.path.join(mask_save_path,opt.name+'%06d' % savecnt+'.png'), mask) + + # print("Processing:",imgpaths[i]," ","Remain:",len(imgpaths)*opt.fold-filecnt) + # cv2.namedWindow('image', cv2.WINDOW_NORMAL) + # cv2.imshow('image',img_mosaic) + # cv2.waitKey(0) + # cv2.destroyAllWindows() + except Exception as e: + print(imgpaths[i],e) + if filecnt%10==0: + endtime = datetime.datetime.now() + # used_time = (endtime-starttime).seconds + used_time = (endtime-starttime).seconds + all_length = len(imgpaths)*opt.fold + percent = round(100*filecnt/all_length,1) + all_time = used_time/filecnt*all_length + + print('\r','',str(filecnt)+'/'+str(all_length)+' ', + util.get_bar(percent,25),'', + util.second2stamp(used_time)+'/'+util.second2stamp(all_time), + 'f:'+str(savecnt),end= " ") \ No newline at end of file diff --git a/make_datasets/make_video_dataset.py b/make_datasets/make_video_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..84ef5faf8d0dc83c31ca27a076a8f9eeaf52a115 --- /dev/null +++ b/make_datasets/make_video_dataset.py @@ -0,0 +1,164 @@ +import os +import sys +sys.path.append("..") +from cores import Options +opt = Options() + +import random +import datetime +import time + +import numpy as np +import cv2 +import torch + +from models import runmodel,loadmodel +import util.image_processing as impro +from util import filt, util,mosaic,data,ffmpeg + + +opt.parser.add_argument('--datadir',type=str,default='your video dir', help='') +opt.parser.add_argument('--savedir',type=str,default='../datasets/video/face', help='') +opt.parser.add_argument('--interval',type=int,default=30, help='interval of split video ') +opt.parser.add_argument('--time',type=int,default=5, help='split video time') +opt.parser.add_argument('--minmaskarea',type=int,default=2000, help='') +opt.parser.add_argument('--quality', type=int ,default= 45,help='minimal quality') +opt.parser.add_argument('--outsize', type=int ,default= 286,help='') +opt.parser.add_argument('--startcnt', type=int ,default= 0,help='') +opt.parser.add_argument('--minsize', type=int ,default= 96,help='minimal roi size') +opt.parser.add_argument('--no_sclectscene', action='store_true', help='') +opt = opt.getparse() + + +util.makedirs(opt.savedir) +util.writelog(os.path.join(opt.savedir,'opt.txt'), + str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt)) + +videopaths = util.Traversal(opt.datadir) +videopaths = util.is_videos(videopaths) +random.shuffle(videopaths) + +# def network +net = loadmodel.bisenet(opt,'roi') + +result_cnt = opt.startcnt +video_cnt = 1 +starttime = datetime.datetime.now() +for videopath in videopaths: + try: + if opt.no_sclectscene: + timestamps=['00:00:00'] + else: + timestamps=[] + fps,endtime,height,width = ffmpeg.get_video_infos(videopath) + for cut_point in range(1,int((endtime-opt.time)/opt.interval)): + util.clean_tempfiles(opt) + ffmpeg.video2image(videopath, opt.temp_dir+'/video2image/%05d.'+opt.tempimage_type,fps=1, + start_time = util.second2stamp(cut_point*opt.interval),last_time = util.second2stamp(opt.time)) + imagepaths = util.Traversal(opt.temp_dir+'/video2image') + imagepaths = sorted(imagepaths) + cnt = 0 + for i in range(opt.time): + img = impro.imread(imagepaths[i]) + mask = runmodel.get_ROI_position(img,net,opt,keepsize=True)[0] + if not opt.all_mosaic_area: + mask = impro.find_mostlikely_ROI(mask) + x,y,size,area = impro.boundingSquare(mask,Ex_mul=1) + if area > opt.minmaskarea and size>opt.minsize and impro.Q_lapulase(img)>opt.quality: + cnt +=1 + if cnt == opt.time: + # print(second) + timestamps.append(util.second2stamp(cut_point*opt.interval)) + util.writelog(os.path.join(opt.savedir,'opt.txt'),videopath+'\n'+str(timestamps)) + #print(timestamps) + + #generate datasets + print('Generate datasets...') + for timestamp in timestamps: + savecnt = '%05d' % result_cnt + origindir = os.path.join(opt.savedir,savecnt,'origin_image') + maskdir = os.path.join(opt.savedir,savecnt,'mask') + util.makedirs(origindir) + util.makedirs(maskdir) + + util.clean_tempfiles(opt) + ffmpeg.video2image(videopath, opt.temp_dir+'/video2image/%05d.'+opt.tempimage_type, + start_time = timestamp,last_time = util.second2stamp(opt.time)) + + endtime = datetime.datetime.now() + print(str(video_cnt)+'/'+str(len(videopaths))+' ', + util.get_bar(100*video_cnt/len(videopaths),35),'', + util.second2stamp((endtime-starttime).seconds)+'/'+util.second2stamp((endtime-starttime).seconds/video_cnt*len(videopaths))) + + imagepaths = util.Traversal(opt.temp_dir+'/video2image') + imagepaths = sorted(imagepaths) + imgs=[];masks=[] + # mask_flag = False + # for imagepath in imagepaths: + # img = impro.imread(imagepath) + # mask = runmodel.get_ROI_position(img,net,opt,keepsize=True)[0] + # imgs.append(img) + # masks.append(mask) + # if not mask_flag: + # mask_avg = mask.astype(np.float64) + # mask_flag = True + # else: + # mask_avg += mask.astype(np.float64) + + # mask_avg = np.clip(mask_avg/len(imagepaths),0,255).astype('uint8') + # mask_avg = impro.mask_threshold(mask_avg,20,64) + # if not opt.all_mosaic_area: + # mask_avg = impro.find_mostlikely_ROI(mask_avg) + # x,y,size,area = impro.boundingSquare(mask_avg,Ex_mul=random.uniform(1.1,1.5)) + + # for i in range(len(imagepaths)): + # img = impro.resize(imgs[i][y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC) + # mask = impro.resize(masks[i][y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC) + # impro.imwrite(os.path.join(origindir,'%05d'%(i+1)+'.jpg'), img) + # impro.imwrite(os.path.join(maskdir,'%05d'%(i+1)+'.png'), mask) + ex_mul = random.uniform(1.2,1.7) + positions = [] + for imagepath in imagepaths: + img = impro.imread(imagepath) + mask = runmodel.get_ROI_position(img,net,opt,keepsize=True)[0] + imgs.append(img) + masks.append(mask) + x,y,size,area = impro.boundingSquare(mask,Ex_mul=ex_mul) + positions.append([x,y,size]) + positions =np.array(positions) + for i in range(3):positions[:,i] = filt.medfilt(positions[:,i],opt.medfilt_num) + + for i,imagepath in enumerate(imagepaths): + x,y,size = positions[i][0],positions[i][1],positions[i][2] + tmp_cnt = i + while sizeopt.minsize//4: + # if not opt.all_mosaic_area: + # mask_avg = impro.find_mostlikely_ROI(mask_avg) + # x,y,size,area = impro.boundingSquare(mask_avg,Ex_mul=ex_mul) + # img = impro.resize(imgs[i][y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC) + # mask = impro.resize(masks[i][y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC) + # impro.imwrite(os.path.join(origindir,'%05d'%(i+1)+'.jpg'), img) + # impro.imwrite(os.path.join(maskdir,'%05d'%(i+1)+'.png'), mask) + + + result_cnt+=1 + + except Exception as e: + video_cnt +=1 + util.writelog(os.path.join(opt.savedir,'opt.txt'), + videopath+'\n'+str(result_cnt)+'\n'+str(e)) + video_cnt +=1 + if opt.gpu_id != '-1': + torch.cuda.empty_cache() diff --git a/models/BVDNet.py b/models/BVDNet.py new file mode 100644 index 0000000000000000000000000000000000000000..f9ca8148d38c50a7fc03000a6ab5015c6b174b0d --- /dev/null +++ b/models/BVDNet.py @@ -0,0 +1,198 @@ +import torch +import torch.nn as nn +from .pix2pixHD_model import * +from .model_util import * +from models import model_util + +class UpBlock(nn.Module): + def __init__(self, in_channel, out_channel, kernel_size=3, padding=1): + super().__init__() + + self.convup = nn.Sequential( + nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False), + nn.ReflectionPad2d(padding), + # EqualConv2d(out_channel, out_channel, kernel_size, padding=padding), + SpectralNorm(nn.Conv2d(in_channel, out_channel, kernel_size)), + nn.LeakyReLU(0.2), + # Blur(out_channel), + ) + + def forward(self, input): + outup = self.convup(input) + return outup + +class Encoder2d(nn.Module): + def __init__(self, input_nc, ngf=64, n_downsampling=3, activation = nn.LeakyReLU(0.2)): + super(Encoder2d, self).__init__() + + model = [nn.ReflectionPad2d(3), SpectralNorm(nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0)), activation] + ### downsample + for i in range(n_downsampling): + mult = 2**i + model += [ nn.ReflectionPad2d(1), + SpectralNorm(nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=0)), + activation] + + self.model = nn.Sequential(*model) + + def forward(self, input): + return self.model(input) + +class Encoder3d(nn.Module): + def __init__(self, input_nc, ngf=64, n_downsampling=3, activation = nn.LeakyReLU(0.2)): + super(Encoder3d, self).__init__() + + model = [SpectralNorm(nn.Conv3d(input_nc, ngf, kernel_size=3, padding=1)), activation] + ### downsample + for i in range(n_downsampling): + mult = 2**i + model += [ SpectralNorm(nn.Conv3d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1)), + activation] + self.model = nn.Sequential(*model) + + def forward(self, input): + return self.model(input) + +class BVDNet(nn.Module): + def __init__(self, N=2, n_downsampling=3, n_blocks=4, input_nc=3, output_nc=3,activation=nn.LeakyReLU(0.2)): + super(BVDNet, self).__init__() + ngf = 64 + padding_type = 'reflect' + self.N = N + + ### encoder + self.encoder3d = Encoder3d(input_nc,64,n_downsampling,activation) + self.encoder2d = Encoder2d(input_nc,64,n_downsampling,activation) + + ### resnet blocks + self.blocks = [] + mult = 2**n_downsampling + for i in range(n_blocks): + self.blocks += [ResnetBlockSpectralNorm(ngf * mult, padding_type=padding_type, activation=activation)] + self.blocks = nn.Sequential(*self.blocks) + + ### decoder + self.decoder = [] + for i in range(n_downsampling): + mult = 2**(n_downsampling - i) + self.decoder += [UpBlock(ngf * mult, int(ngf * mult / 2))] + self.decoder += [nn.ReflectionPad2d(3), nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] + self.decoder = nn.Sequential(*self.decoder) + self.limiter = nn.Tanh() + + def forward(self, stream, previous): + this_shortcut = stream[:,:,self.N] + stream = self.encoder3d(stream) + stream = stream.reshape(stream.size(0),stream.size(1),stream.size(3),stream.size(4)) + previous = self.encoder2d(previous) + x = stream + previous + x = self.blocks(x) + x = self.decoder(x) + x = x+this_shortcut + x = self.limiter(x) + return x + +def define_G(N=2, n_blocks=1, gpu_id='-1'): + netG = BVDNet(N = N, n_blocks=n_blocks) + netG = model_util.todevice(netG,gpu_id) + netG.apply(model_util.init_weights) + return netG + +################################Discriminator################################ +def define_D(input_nc=6, ndf=64, n_layers_D=1, use_sigmoid=False, num_D=3, gpu_id='-1'): + netD = MultiscaleDiscriminator(input_nc, ndf, n_layers_D, use_sigmoid, num_D) + netD = model_util.todevice(netD,gpu_id) + netD.apply(model_util.init_weights) + return netD + +class MultiscaleDiscriminator(nn.Module): + def __init__(self, input_nc, ndf=64, n_layers=3, use_sigmoid=False, num_D=3): + super(MultiscaleDiscriminator, self).__init__() + self.num_D = num_D + self.n_layers = n_layers + + for i in range(num_D): + netD = NLayerDiscriminator(input_nc, ndf, n_layers, use_sigmoid) + setattr(self, 'layer'+str(i), netD.model) + self.downsample = nn.AvgPool2d(3, stride=2, padding=[1, 1], count_include_pad=False) + + def singleD_forward(self, model, input): + return [model(input)] + + def forward(self, input): + num_D = self.num_D + result = [] + input_downsampled = input + for i in range(num_D): + model = getattr(self, 'layer'+str(num_D-1-i)) + result.append(self.singleD_forward(model, input_downsampled)) + if i != (num_D-1): + input_downsampled = self.downsample(input_downsampled) + return result + +# Defines the PatchGAN discriminator with the specified arguments. +class NLayerDiscriminator(nn.Module): + def __init__(self, input_nc, ndf=64, n_layers=3, use_sigmoid=False): + super(NLayerDiscriminator, self).__init__() + self.n_layers = n_layers + + kw = 4 + padw = int(np.ceil((kw-1.0)/2)) + sequence = [[nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), nn.LeakyReLU(0.2)]] + + nf = ndf + for n in range(1, n_layers): + nf_prev = nf + nf = min(nf * 2, 512) + sequence += [[ + SpectralNorm(nn.Conv2d(nf_prev, nf, kernel_size=kw, stride=2, padding=padw)), + nn.LeakyReLU(0.2) + ]] + + nf_prev = nf + nf = min(nf * 2, 512) + sequence += [[ + SpectralNorm(nn.Conv2d(nf_prev, nf, kernel_size=kw, stride=1, padding=padw)), + nn.LeakyReLU(0.2) + ]] + + sequence += [[nn.Conv2d(nf, 1, kernel_size=kw, stride=1, padding=padw)]] + + if use_sigmoid: + sequence += [[nn.Sigmoid()]] + + sequence_stream = [] + for n in range(len(sequence)): + sequence_stream += sequence[n] + self.model = nn.Sequential(*sequence_stream) + + def forward(self, input): + return self.model(input) + +class GANLoss(nn.Module): + def __init__(self, mode='D'): + super(GANLoss, self).__init__() + if mode == 'D': + self.lossf = model_util.HingeLossD() + elif mode == 'G': + self.lossf = model_util.HingeLossG() + self.mode = mode + + def forward(self, dis_fake = None, dis_real = None): + if isinstance(dis_fake, list): + if self.mode == 'D': + loss = 0 + for i in range(len(dis_fake)): + loss += self.lossf(dis_fake[i][-1],dis_real[i][-1]) + elif self.mode =='G': + loss = 0 + weight = 2**len(dis_fake) + for i in range(len(dis_fake)): + weight = weight/2 + loss += weight*self.lossf(dis_fake[i][-1]) + return loss + else: + if self.mode == 'D': + return self.lossf(dis_fake[-1],dis_real[-1]) + elif self.mode =='G': + return self.lossf(dis_fake[-1]) diff --git a/models/BiSeNet_model.py b/models/BiSeNet_model.py new file mode 100644 index 0000000000000000000000000000000000000000..3675141f01dd970426c2795beca91970e03c1581 --- /dev/null +++ b/models/BiSeNet_model.py @@ -0,0 +1,264 @@ +# This code clone from https://github.com/ooooverflow/BiSeNet +import torch.nn as nn +import torch +import torch.nn.functional as F +from . import model_util +import warnings +warnings.filterwarnings(action='ignore') + +def flatten(tensor): + """Flattens a given tensor such that the channel axis is first. + The shapes are transformed as follows: + (N, C, D, H, W) -> (C, N * D * H * W) + """ + C = tensor.size(1) + # new axis order + axis_order = (1, 0) + tuple(range(2, tensor.dim())) + # Transpose: (N, C, D, H, W) -> (C, N, D, H, W) + transposed = tensor.permute(axis_order) + # Flatten: (C, N, D, H, W) -> (C, N * D * H * W) + return transposed.contiguous().view(C, -1) + + +class DiceLoss(nn.Module): + def __init__(self): + super().__init__() + self.epsilon = 1e-5 + + def forward(self, output, target): + assert output.size() == target.size(), "'input' and 'target' must have the same shape" + output = F.softmax(output, dim=1) + output = flatten(output) + target = flatten(target) + # intersect = (output * target).sum(-1).sum() + self.epsilon + # denominator = ((output + target).sum(-1)).sum() + self.epsilon + + intersect = (output * target).sum(-1) + denominator = (output + target).sum(-1) + dice = intersect / denominator + dice = torch.mean(dice) + return 1 - dice + # return 1 - 2. * intersect / denominator + +class resnet18(torch.nn.Module): + def __init__(self, pretrained=True): + super().__init__() + self.features = model_util.resnet18(pretrained=pretrained) + self.conv1 = self.features.conv1 + self.bn1 = self.features.bn1 + self.relu = self.features.relu + self.maxpool1 = self.features.maxpool + self.layer1 = self.features.layer1 + self.layer2 = self.features.layer2 + self.layer3 = self.features.layer3 + self.layer4 = self.features.layer4 + + def forward(self, input): + x = self.conv1(input) + x = self.relu(self.bn1(x)) + x = self.maxpool1(x) + feature1 = self.layer1(x) # 1 / 4 + feature2 = self.layer2(feature1) # 1 / 8 + feature3 = self.layer3(feature2) # 1 / 16 + feature4 = self.layer4(feature3) # 1 / 32 + # global average pooling to build tail + tail = torch.mean(feature4, 3, keepdim=True) + tail = torch.mean(tail, 2, keepdim=True) + return feature3, feature4, tail + + +class resnet101(torch.nn.Module): + def __init__(self, pretrained=True): + super().__init__() + self.features = model_util.resnet101(pretrained=pretrained) + self.conv1 = self.features.conv1 + self.bn1 = self.features.bn1 + self.relu = self.features.relu + self.maxpool1 = self.features.maxpool + self.layer1 = self.features.layer1 + self.layer2 = self.features.layer2 + self.layer3 = self.features.layer3 + self.layer4 = self.features.layer4 + + def forward(self, input): + x = self.conv1(input) + x = self.relu(self.bn1(x)) + x = self.maxpool1(x) + feature1 = self.layer1(x) # 1 / 4 + feature2 = self.layer2(feature1) # 1 / 8 + feature3 = self.layer3(feature2) # 1 / 16 + feature4 = self.layer4(feature3) # 1 / 32 + # global average pooling to build tail + tail = torch.mean(feature4, 3, keepdim=True) + tail = torch.mean(tail, 2, keepdim=True) + return feature3, feature4, tail + +def build_contextpath(name,pretrained): + model = { + 'resnet18': resnet18(pretrained=pretrained), + 'resnet101': resnet101(pretrained=pretrained) + } + return model[name] + +class ConvBlock(torch.nn.Module): + def __init__(self, in_channels, out_channels, kernel_size=3, stride=2,padding=1): + super().__init__() + self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False) + self.bn = nn.BatchNorm2d(out_channels) + self.relu = nn.ReLU() + + def forward(self, input): + x = self.conv1(input) + return self.relu(self.bn(x)) + +class Spatial_path(torch.nn.Module): + def __init__(self): + super().__init__() + self.convblock1 = ConvBlock(in_channels=3, out_channels=64) + self.convblock2 = ConvBlock(in_channels=64, out_channels=128) + self.convblock3 = ConvBlock(in_channels=128, out_channels=256) + + def forward(self, input): + x = self.convblock1(input) + x = self.convblock2(x) + x = self.convblock3(x) + return x + +class AttentionRefinementModule(torch.nn.Module): + def __init__(self, in_channels, out_channels): + super().__init__() + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1) + self.bn = nn.BatchNorm2d(out_channels) + self.sigmoid = nn.Sigmoid() + self.in_channels = in_channels + self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) + + def forward(self, input): + # global average pooling + x = self.avgpool(input) + assert self.in_channels == x.size(1), 'in_channels and out_channels should all be {}'.format(x.size(1)) + x = self.conv(x) + # x = self.sigmoid(self.bn(x)) + x = self.sigmoid(x) + # channels of input and x should be same + x = torch.mul(input, x) + return x + +class FeatureFusionModule(torch.nn.Module): + def __init__(self, num_classes, in_channels): + super().__init__() + # self.in_channels = input_1.channels + input_2.channels + # resnet101 3328 = 256(from context path) + 1024(from spatial path) + 2048(from spatial path) + # resnet18 1024 = 256(from context path) + 256(from spatial path) + 512(from spatial path) + self.in_channels = in_channels + + self.convblock = ConvBlock(in_channels=self.in_channels, out_channels=num_classes, stride=1) + self.conv1 = nn.Conv2d(num_classes, num_classes, kernel_size=1) + self.relu = nn.ReLU() + self.conv2 = nn.Conv2d(num_classes, num_classes, kernel_size=1) + self.sigmoid = nn.Sigmoid() + self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) + + + def forward(self, input_1, input_2): + x = torch.cat((input_1, input_2), dim=1) + assert self.in_channels == x.size(1), 'in_channels of ConvBlock should be {}'.format(x.size(1)) + feature = self.convblock(x) + x = self.avgpool(feature) + + x = self.relu(self.conv1(x)) + x = self.sigmoid(self.conv2(x)) + x = torch.mul(feature, x) + x = torch.add(x, feature) + return x + +class BiSeNet(torch.nn.Module): + def __init__(self, num_classes, context_path, train_flag=True): + super().__init__() + # build spatial path + self.saptial_path = Spatial_path() + self.sigmoid = nn.Sigmoid() + # build context path + if train_flag: + self.context_path = build_contextpath(name=context_path,pretrained=True) + else: + self.context_path = build_contextpath(name=context_path,pretrained=False) + + # build attention refinement module for resnet 101 + if context_path == 'resnet101': + self.attention_refinement_module1 = AttentionRefinementModule(1024, 1024) + self.attention_refinement_module2 = AttentionRefinementModule(2048, 2048) + # supervision block + self.supervision1 = nn.Conv2d(in_channels=1024, out_channels=num_classes, kernel_size=1) + self.supervision2 = nn.Conv2d(in_channels=2048, out_channels=num_classes, kernel_size=1) + # build feature fusion module + self.feature_fusion_module = FeatureFusionModule(num_classes, 3328) + + elif context_path == 'resnet18': + # build attention refinement module for resnet 18 + self.attention_refinement_module1 = AttentionRefinementModule(256, 256) + self.attention_refinement_module2 = AttentionRefinementModule(512, 512) + # supervision block + self.supervision1 = nn.Conv2d(in_channels=256, out_channels=num_classes, kernel_size=1) + self.supervision2 = nn.Conv2d(in_channels=512, out_channels=num_classes, kernel_size=1) + # build feature fusion module + self.feature_fusion_module = FeatureFusionModule(num_classes, 1024) + else: + print('Error: unspport context_path network \n') + + # build final convolution + self.conv = nn.Conv2d(in_channels=num_classes, out_channels=num_classes, kernel_size=1) + + self.init_weight() + + self.mul_lr = [] + self.mul_lr.append(self.saptial_path) + self.mul_lr.append(self.attention_refinement_module1) + self.mul_lr.append(self.attention_refinement_module2) + self.mul_lr.append(self.supervision1) + self.mul_lr.append(self.supervision2) + self.mul_lr.append(self.feature_fusion_module) + self.mul_lr.append(self.conv) + + def init_weight(self): + for name, m in self.named_modules(): + if 'context_path' not in name: + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + m.eps = 1e-5 + m.momentum = 0.1 + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + def forward(self, input): + # output of spatial path + sx = self.saptial_path(input) + + # output of context path + cx1, cx2, tail = self.context_path(input) + cx1 = self.attention_refinement_module1(cx1) + cx2 = self.attention_refinement_module2(cx2) + cx2 = torch.mul(cx2, tail) + # upsampling + cx1 = torch.nn.functional.interpolate(cx1, size=sx.size()[-2:], mode='bilinear') + cx2 = torch.nn.functional.interpolate(cx2, size=sx.size()[-2:], mode='bilinear') + cx = torch.cat((cx1, cx2), dim=1) + + if self.training == True: + cx1_sup = self.supervision1(cx1) + cx2_sup = self.supervision2(cx2) + cx1_sup = torch.nn.functional.interpolate(cx1_sup, size=input.size()[-2:], mode='bilinear') + cx2_sup = torch.nn.functional.interpolate(cx2_sup, size=input.size()[-2:], mode='bilinear') + + # output of feature fusion module + result = self.feature_fusion_module(sx, cx) + + # upsampling + result = torch.nn.functional.interpolate(result, scale_factor=8, mode='bilinear') + result = self.conv(result) + + if self.training == True: + return self.sigmoid(result), self.sigmoid(cx1_sup), self.sigmoid(cx2_sup) + + return self.sigmoid(result) \ No newline at end of file diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/models/__init__.py @@ -0,0 +1 @@ + diff --git a/models/loadmodel.py b/models/loadmodel.py new file mode 100644 index 0000000000000000000000000000000000000000..7d9b391d4c4a66e48ed511e59cb4c286b2c24c4e --- /dev/null +++ b/models/loadmodel.py @@ -0,0 +1,73 @@ +import torch +from . import model_util +from .pix2pix_model import define_G as pix2pix_G +from .pix2pixHD_model import define_G as pix2pixHD_G +# from .video_model import MosaicNet +# from .videoHD_model import MosaicNet as MosaicNet_HD +from .BiSeNet_model import BiSeNet +from .BVDNet import define_G as video_G + +def show_paramsnumber(net,netname='net'): + parameters = sum(param.numel() for param in net.parameters()) + parameters = round(parameters/1e6,2) + print(netname+' parameters: '+str(parameters)+'M') + +def pix2pix(opt): + # print(opt.model_path,opt.netG) + if opt.netG == 'HD': + netG = pix2pixHD_G(3, 3, 64, 'global' ,4) + else: + netG = pix2pix_G(3, 3, 64, opt.netG, norm='batch',use_dropout=True, init_type='normal', gpu_ids=[]) + show_paramsnumber(netG,'netG') + netG.load_state_dict(torch.load(opt.model_path)) + netG = model_util.todevice(netG,opt.gpu_id) + netG.eval() + return netG + + +def style(opt): + if opt.edges: + netG = pix2pix_G(1, 3, 64, 'resnet_9blocks', norm='instance',use_dropout=True, init_type='normal', gpu_ids=[]) + else: + netG = pix2pix_G(3, 3, 64, 'resnet_9blocks', norm='instance',use_dropout=False, init_type='normal', gpu_ids=[]) + + #in other to load old pretrain model + #https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/models/base_model.py + if isinstance(netG, torch.nn.DataParallel): + netG = netG.module + # if you are using PyTorch newer than 0.4 (e.g., built from + # GitHub source), you can remove str() on self.device + state_dict = torch.load(opt.model_path, map_location='cpu') + if hasattr(state_dict, '_metadata'): + del state_dict._metadata + + # patch InstanceNorm checkpoints prior to 0.4 + for key in list(state_dict.keys()): # need to copy keys here because we mutate in loop + model_util.patch_instance_norm_state_dict(state_dict, netG, key.split('.')) + netG.load_state_dict(state_dict) + + netG = model_util.todevice(netG,opt.gpu_id) + netG.eval() + return netG + +def video(opt): + netG = video_G(N=2,n_blocks=4,gpu_id=opt.gpu_id) + show_paramsnumber(netG,'netG') + netG.load_state_dict(torch.load(opt.model_path)) + netG = model_util.todevice(netG,opt.gpu_id) + netG.eval() + return netG + +def bisenet(opt,type='roi'): + ''' + type: roi or mosaic + ''' + net = BiSeNet(num_classes=1, context_path='resnet18',train_flag=False) + show_paramsnumber(net,'segment') + if type == 'roi': + net.load_state_dict(torch.load(opt.model_path)) + elif type == 'mosaic': + net.load_state_dict(torch.load(opt.mosaic_position_model_path)) + net = model_util.todevice(net,opt.gpu_id) + net.eval() + return net diff --git a/models/model_util.py b/models/model_util.py new file mode 100644 index 0000000000000000000000000000000000000000..2aa7f9e42abc76fe0fe4fa221cac0424a0565218 --- /dev/null +++ b/models/model_util.py @@ -0,0 +1,469 @@ +import functools +from math import exp + +import torch +import torch.nn as nn +from torch.nn import init +from torch.autograd import Variable +import torch.nn.functional as F +import torch.nn.utils.spectral_norm as SpectralNorm +from torchvision import models +import torch.utils.model_zoo as model_zoo + +################################## IO ################################## +def save(net,path,gpu_id): + if isinstance(net, nn.DataParallel): + torch.save(net.module.cpu().state_dict(),path) + else: + torch.save(net.cpu().state_dict(),path) + if gpu_id != '-1': + net.cuda() + +def todevice(net,gpu_id): + if gpu_id != '-1' and len(gpu_id) == 1: + net.cuda() + elif gpu_id != '-1' and len(gpu_id) > 1: + net = nn.DataParallel(net) + net.cuda() + return net + +# patch InstanceNorm checkpoints prior to 0.4 +def patch_instance_norm_state_dict(state_dict, module, keys, i=0): + """Fix InstanceNorm checkpoints incompatibility (prior to 0.4)""" + key = keys[i] + if i + 1 == len(keys): # at the end, pointing to a parameter/buffer + if module.__class__.__name__.startswith('InstanceNorm') and \ + (key == 'running_mean' or key == 'running_var'): + if getattr(module, key) is None: + state_dict.pop('.'.join(keys)) + if module.__class__.__name__.startswith('InstanceNorm') and \ + (key == 'num_batches_tracked'): + state_dict.pop('.'.join(keys)) + else: + patch_instance_norm_state_dict(state_dict, getattr(module, key), keys, i + 1) + +################################## initialization ################################## +def get_norm_layer(norm_type='instance',mod = '2d'): + if norm_type == 'batch': + if mod == '2d': + norm_layer = functools.partial(nn.BatchNorm2d, affine=True) + elif mod == '3d': + norm_layer = functools.partial(nn.BatchNorm3d, affine=True) + elif norm_type == 'instance': + if mod == '2d': + norm_layer = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=True) + elif mod =='3d': + norm_layer = functools.partial(nn.InstanceNorm3d, affine=False, track_running_stats=True) + elif norm_type == 'none': + norm_layer = None + else: + raise NotImplementedError('normalization layer [%s] is not found' % norm_type) + + return norm_layer + +def init_weights(net, init_type='normal', gain=0.02): + def init_func(m): + classname = m.__class__.__name__ + if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1): + if init_type == 'normal': + init.normal_(m.weight.data, 0.0, gain) + elif init_type == 'xavier': + init.xavier_normal_(m.weight.data, gain=gain) + elif init_type == 'kaiming': + init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') + elif init_type == 'orthogonal': + init.orthogonal_(m.weight.data, gain=gain) + else: + raise NotImplementedError('initialization method [%s] is not implemented' % init_type) + if hasattr(m, 'bias') and m.bias is not None: + init.constant_(m.bias.data, 0.0) + elif classname.find('BatchNorm2d') != -1: + init.normal_(m.weight.data, 1.0, gain) + init.constant_(m.bias.data, 0.0) + + # print('initialize network with %s' % init_type) + net.apply(init_func) + +################################## Network structure ################################## +################################## ResnetBlock ################################## +class ResnetBlockSpectralNorm(nn.Module): + def __init__(self, dim, padding_type, activation=nn.LeakyReLU(0.2), use_dropout=False): + super(ResnetBlockSpectralNorm, self).__init__() + self.conv_block = self.build_conv_block(dim, padding_type, activation, use_dropout) + + def build_conv_block(self, dim, padding_type, activation, use_dropout): + conv_block = [] + p = 0 + if padding_type == 'reflect': + conv_block += [nn.ReflectionPad2d(1)] + elif padding_type == 'replicate': + conv_block += [nn.ReplicationPad2d(1)] + elif padding_type == 'zero': + p = 1 + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + + conv_block += [SpectralNorm(nn.Conv2d(dim, dim, kernel_size=3, padding=p)), + activation] + if use_dropout: + conv_block += [nn.Dropout(0.5)] + + p = 0 + if padding_type == 'reflect': + conv_block += [nn.ReflectionPad2d(1)] + elif padding_type == 'replicate': + conv_block += [nn.ReplicationPad2d(1)] + elif padding_type == 'zero': + p = 1 + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + conv_block += [SpectralNorm(nn.Conv2d(dim, dim, kernel_size=3, padding=p))] + + return nn.Sequential(*conv_block) + + def forward(self, x): + out = x + self.conv_block(x) + return out + +################################## Resnet ################################## +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution""" + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=None): + super(BasicBlock, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + # Both self.conv1 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = norm_layer(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = norm_layer(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + +class Bottleneck(nn.Module): + expansion = 4 + def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=None): + super(Bottleneck, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + # Both self.conv2 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv1x1(inplanes, planes) + self.bn1 = norm_layer(planes) + self.conv2 = conv3x3(planes, planes, stride) + self.bn2 = norm_layer(planes) + self.conv3 = conv1x1(planes, planes * self.expansion) + self.bn3 = norm_layer(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, norm_layer=None): + super(ResNet, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + self.inplanes = 64 + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = norm_layer(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0], norm_layer=norm_layer) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2, norm_layer=norm_layer) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2, norm_layer=norm_layer) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2, norm_layer=norm_layer) + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + # Zero-initialize the last BN in each residual branch, + # so that the residual branch starts with zeros, and each residual block behaves like an identity. + # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 + if zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + nn.init.constant_(m.bn3.weight, 0) + elif isinstance(m, BasicBlock): + nn.init.constant_(m.bn2.weight, 0) + + def _make_layer(self, block, planes, blocks, stride=1, norm_layer=None): + if norm_layer is None: + norm_layer = nn.BatchNorm2d + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + conv1x1(self.inplanes, planes * block.expansion, stride), + norm_layer(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample, norm_layer)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes, norm_layer=norm_layer)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + x = self.fc(x) + + return x + +def resnet18(pretrained=False, **kwargs): + """Constructs a ResNet-18 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) + return model + +def resnet101(pretrained=False, **kwargs): + """Constructs a ResNet-101 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) + return model + +################################## Loss function ################################## +class HingeLossD(nn.Module): + def __init__(self): + super(HingeLossD, self).__init__() + + def forward(self, dis_fake, dis_real): + loss_real = torch.mean(F.relu(1. - dis_real)) + loss_fake = torch.mean(F.relu(1. + dis_fake)) + return loss_real + loss_fake + +class HingeLossG(nn.Module): + def __init__(self): + super(HingeLossG, self).__init__() + + def forward(self, dis_fake): + loss_fake = -torch.mean(dis_fake) + return loss_fake + +class VGGLoss(nn.Module): + def __init__(self, gpu_id): + super(VGGLoss, self).__init__() + + self.vgg = Vgg19() + if gpu_id != '-1' and len(gpu_id) == 1: + self.vgg.cuda() + elif gpu_id != '-1' and len(gpu_id) > 1: + self.vgg = nn.DataParallel(self.vgg) + self.vgg.cuda() + + self.criterion = nn.MSELoss() + self.weights = [1.0/32, 1.0/16, 1.0/8, 1.0/4, 1.0] + + def forward(self, x, y): + x_vgg, y_vgg = self.vgg(x), self.vgg(y) + loss = 0 + for i in range(len(x_vgg)): + loss += self.weights[i] * self.criterion(x_vgg[i], y_vgg[i].detach()) + return loss + +class Vgg19(torch.nn.Module): + def __init__(self, requires_grad=False): + super(Vgg19, self).__init__() + vgg_pretrained_features = models.vgg19(pretrained=True).features + self.slice1 = torch.nn.Sequential() + self.slice2 = torch.nn.Sequential() + self.slice3 = torch.nn.Sequential() + self.slice4 = torch.nn.Sequential() + self.slice5 = torch.nn.Sequential() + for x in range(2): + self.slice1.add_module(str(x), vgg_pretrained_features[x]) + for x in range(2, 7): + self.slice2.add_module(str(x), vgg_pretrained_features[x]) + for x in range(7, 12): + self.slice3.add_module(str(x), vgg_pretrained_features[x]) + for x in range(12, 21): + self.slice4.add_module(str(x), vgg_pretrained_features[x]) + for x in range(21, 30): + self.slice5.add_module(str(x), vgg_pretrained_features[x]) + if not requires_grad: + for param in self.parameters(): + param.requires_grad = False + + def forward(self, X): + h_relu1 = self.slice1(X) + h_relu2 = self.slice2(h_relu1) + h_relu3 = self.slice3(h_relu2) + h_relu4 = self.slice4(h_relu3) + h_relu5 = self.slice5(h_relu4) + out = [h_relu1, h_relu2, h_relu3, h_relu4, h_relu5] + return out + +################################## Evaluation ################################## +'''https://github.com/Po-Hsun-Su/pytorch-ssim + +img1 = Variable(torch.rand(1, 1, 256, 256)) +img2 = Variable(torch.rand(1, 1, 256, 256)) + +if torch.cuda.is_available(): + img1 = img1.cuda() + img2 = img2.cuda() + +print(pytorch_ssim.ssim(img1, img2)) + +ssim_loss = pytorch_ssim.SSIM(window_size = 11) + +print(ssim_loss(img1, img2)) +''' + +def gaussian(window_size, sigma): + gauss = torch.Tensor([exp(-(x - window_size//2)**2/float(2*sigma**2)) for x in range(window_size)]) + return gauss/gauss.sum() + +def create_window(window_size, channel): + _1D_window = gaussian(window_size, 1.5).unsqueeze(1) + _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0) + window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous()) + return window + +def _ssim(img1, img2, window, window_size, channel, size_average = True): + mu1 = F.conv2d(img1, window, padding = window_size//2, groups = channel) + mu2 = F.conv2d(img2, window, padding = window_size//2, groups = channel) + + mu1_sq = mu1.pow(2) + mu2_sq = mu2.pow(2) + mu1_mu2 = mu1*mu2 + + sigma1_sq = F.conv2d(img1*img1, window, padding = window_size//2, groups = channel) - mu1_sq + sigma2_sq = F.conv2d(img2*img2, window, padding = window_size//2, groups = channel) - mu2_sq + sigma12 = F.conv2d(img1*img2, window, padding = window_size//2, groups = channel) - mu1_mu2 + + C1 = 0.01**2 + C2 = 0.03**2 + + ssim_map = ((2*mu1_mu2 + C1)*(2*sigma12 + C2))/((mu1_sq + mu2_sq + C1)*(sigma1_sq + sigma2_sq + C2)) + + if size_average: + return ssim_map.mean() + else: + return ssim_map.mean(1).mean(1).mean(1) + +class SSIM(torch.nn.Module): + def __init__(self, window_size = 11, size_average = True): + super(SSIM, self).__init__() + self.window_size = window_size + self.size_average = size_average + self.channel = 1 + self.window = create_window(window_size, self.channel) + + def forward(self, img1, img2): + (_, channel, _, _) = img1.size() + + if channel == self.channel and self.window.data.type() == img1.data.type(): + window = self.window + else: + window = create_window(self.window_size, channel) + + if img1.is_cuda: + window = window.cuda(img1.get_device()) + window = window.type_as(img1) + + self.window = window + self.channel = channel + + + return _ssim(img1, img2, window, self.window_size, channel, self.size_average) + +def ssim(img1, img2, window_size = 11, size_average = True): + (_, channel, _, _) = img1.size() + window = create_window(window_size, channel) + + if img1.is_cuda: + window = window.cuda(img1.get_device()) + window = window.type_as(img1) + + return _ssim(img1, img2, window, window_size, channel, size_average) diff --git a/models/pix2pixHD_model.py b/models/pix2pixHD_model.py new file mode 100644 index 0000000000000000000000000000000000000000..f968940667ba3a5eeca8b50025cb8873984273cc --- /dev/null +++ b/models/pix2pixHD_model.py @@ -0,0 +1,436 @@ +# This code clone from https://github.com/NVIDIA/pix2pixHD +# LICENSE file : https://github.com/NVIDIA/pix2pixHD/blob/master/LICENSE.txt +import torch +import torch.nn as nn +import functools +from torch.autograd import Variable +import numpy as np + +############################################################################### +# Functions +############################################################################### +def weights_init(m): + classname = m.__class__.__name__ + if classname.find('Conv') != -1: + m.weight.data.normal_(0.0, 0.02) + elif classname.find('BatchNorm2d') != -1: + m.weight.data.normal_(1.0, 0.02) + m.bias.data.fill_(0) + +def get_norm_layer(norm_type='instance'): + if norm_type == 'batch': + norm_layer = functools.partial(nn.BatchNorm2d, affine=True) + elif norm_type == 'instance': + norm_layer = functools.partial(nn.InstanceNorm2d, affine=False) + else: + raise NotImplementedError('normalization layer [%s] is not found' % norm_type) + return norm_layer + +def define_G(input_nc, output_nc, ngf, netG, n_downsample_global=3, n_blocks_global=9, n_local_enhancers=1, + n_blocks_local=3, norm='instance', gpu_ids=[]): + norm_layer = get_norm_layer(norm_type=norm) + if netG == 'global': + netG = GlobalGenerator(input_nc, output_nc, ngf, n_downsample_global, n_blocks_global, norm_layer) + elif netG == 'local': + netG = LocalEnhancer(input_nc, output_nc, ngf, n_downsample_global, n_blocks_global, + n_local_enhancers, n_blocks_local, norm_layer) + elif netG == 'encoder': + netG = Encoder(input_nc, output_nc, ngf, n_downsample_global, norm_layer) + else: + raise('generator not implemented!') + # print(netG) + if len(gpu_ids) > 0: + assert(torch.cuda.is_available()) + netG.cuda(gpu_ids[0]) + netG.apply(weights_init) + return netG + +def define_D(input_nc, ndf, n_layers_D, norm='instance', use_sigmoid=False, num_D=1, getIntermFeat=False, gpu_ids=[]): + norm_layer = get_norm_layer(norm_type=norm) + netD = MultiscaleDiscriminator(input_nc, ndf, n_layers_D, norm_layer, use_sigmoid, num_D, getIntermFeat) + #print(netD) + if len(gpu_ids) > 0: + assert(torch.cuda.is_available()) + netD.cuda(gpu_ids[0]) + netD.apply(weights_init) + return netD + +def print_network(net): + if isinstance(net, list): + net = net[0] + num_params = 0 + for param in net.parameters(): + num_params += param.numel() + print(net) + print('Total number of parameters: %d' % num_params) + +############################################################################## +# Losses +############################################################################## +class GAN_Feat_loss(nn.Module): + def __init__(self, opt): + super(GAN_Feat_loss, self).__init__() + self.num_D = opt.num_D + self.n_layers_D = opt.n_layers_D + self.lambda_feat = opt.lambda_feat + self.criterionFeat = nn.L1Loss() + + def forward(self, pred_fake, pred_real): + loss_G_GAN_Feat = 0 + feat_weights = 4.0 / (self.n_layers_D + 1) + D_weights = 1.0 / self.num_D + for i in range(self.num_D): + for j in range(len(pred_fake[i])-1): + loss_G_GAN_Feat += D_weights * feat_weights * \ + self.criterionFeat(pred_fake[i][j], pred_real[i][j].detach()) * self.lambda_feat + return loss_G_GAN_Feat + +class GANLoss(nn.Module): + def __init__(self, use_lsgan=True, target_real_label=1.0, target_fake_label=0.0, + tensor=torch.FloatTensor): + super(GANLoss, self).__init__() + self.real_label = target_real_label + self.fake_label = target_fake_label + self.real_label_var = None + self.fake_label_var = None + self.Tensor = tensor + if use_lsgan: + self.loss = nn.MSELoss() + else: + self.loss = nn.BCELoss() + + def get_target_tensor(self, input, target_is_real): + target_tensor = None + if target_is_real: + create_label = ((self.real_label_var is None) or + (self.real_label_var.numel() != input.numel())) + if create_label: + real_tensor = self.Tensor(input.size()).fill_(self.real_label) + self.real_label_var = Variable(real_tensor, requires_grad=False) + target_tensor = self.real_label_var + else: + create_label = ((self.fake_label_var is None) or + (self.fake_label_var.numel() != input.numel())) + if create_label: + fake_tensor = self.Tensor(input.size()).fill_(self.fake_label) + self.fake_label_var = Variable(fake_tensor, requires_grad=False) + target_tensor = self.fake_label_var + return target_tensor + + def __call__(self, input, target_is_real): + if isinstance(input[0], list): + loss = 0 + for input_i in input: + pred = input_i[-1] + target_tensor = self.get_target_tensor(pred, target_is_real) + loss += self.loss(pred, target_tensor) + return loss + else: + target_tensor = self.get_target_tensor(input[-1], target_is_real) + return self.loss(input[-1], target_tensor) + +class VGGLoss(nn.Module): + def __init__(self, gpu_ids): + super(VGGLoss, self).__init__() + self.vgg = Vgg19().cuda() + self.criterion = nn.L1Loss() + self.weights = [1.0/32, 1.0/16, 1.0/8, 1.0/4, 1.0] + + def forward(self, x, y): + x_vgg, y_vgg = self.vgg(x), self.vgg(y) + loss = 0 + for i in range(len(x_vgg)): + loss += self.weights[i] * self.criterion(x_vgg[i], y_vgg[i].detach()) + return loss + +############################################################################## +# Generator +############################################################################## +class LocalEnhancer(nn.Module): + def __init__(self, input_nc, output_nc, ngf=32, n_downsample_global=3, n_blocks_global=9, + n_local_enhancers=1, n_blocks_local=3, norm_layer=nn.BatchNorm2d, padding_type='reflect'): + super(LocalEnhancer, self).__init__() + self.n_local_enhancers = n_local_enhancers + + ###### global generator model ##### + ngf_global = ngf * (2**n_local_enhancers) + model_global = GlobalGenerator(input_nc, output_nc, ngf_global, n_downsample_global, n_blocks_global, norm_layer).model + model_global = [model_global[i] for i in range(len(model_global)-3)] # get rid of final convolution layers + self.model = nn.Sequential(*model_global) + + ###### local enhancer layers ##### + for n in range(1, n_local_enhancers+1): + ### downsample + ngf_global = ngf * (2**(n_local_enhancers-n)) + model_downsample = [nn.ReflectionPad2d(3), nn.Conv2d(input_nc, ngf_global, kernel_size=7, padding=0), + norm_layer(ngf_global), nn.ReLU(True), + nn.Conv2d(ngf_global, ngf_global * 2, kernel_size=3, stride=2, padding=1), + norm_layer(ngf_global * 2), nn.ReLU(True)] + ### residual blocks + model_upsample = [] + for i in range(n_blocks_local): + model_upsample += [ResnetBlock(ngf_global * 2, padding_type=padding_type, norm_layer=norm_layer)] + + ### upsample + model_upsample += [nn.ConvTranspose2d(ngf_global * 2, ngf_global, kernel_size=3, stride=2, padding=1, output_padding=1), + norm_layer(ngf_global), nn.ReLU(True)] + + ### final convolution + if n == n_local_enhancers: + model_upsample += [nn.ReflectionPad2d(3), nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0), nn.Tanh()] + + setattr(self, 'model'+str(n)+'_1', nn.Sequential(*model_downsample)) + setattr(self, 'model'+str(n)+'_2', nn.Sequential(*model_upsample)) + + self.downsample = nn.AvgPool2d(3, stride=2, padding=[1, 1], count_include_pad=False) + + def forward(self, input): + ### create input pyramid + input_downsampled = [input] + for i in range(self.n_local_enhancers): + input_downsampled.append(self.downsample(input_downsampled[-1])) + + ### output at coarest level + output_prev = self.model(input_downsampled[-1]) + ### build up one layer at a time + for n_local_enhancers in range(1, self.n_local_enhancers+1): + model_downsample = getattr(self, 'model'+str(n_local_enhancers)+'_1') + model_upsample = getattr(self, 'model'+str(n_local_enhancers)+'_2') + input_i = input_downsampled[self.n_local_enhancers-n_local_enhancers] + output_prev = model_upsample(model_downsample(input_i) + output_prev) + return output_prev + +class GlobalGenerator(nn.Module): + def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=3, n_blocks=9, norm_layer=nn.BatchNorm2d, + padding_type='reflect'): + assert(n_blocks >= 0) + super(GlobalGenerator, self).__init__() + activation = nn.ReLU(True) + + model = [nn.ReflectionPad2d(3), nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0), norm_layer(ngf), activation] + ### downsample + for i in range(n_downsampling): + mult = 2**i + model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1), + norm_layer(ngf * mult * 2), activation] + + ### resnet blocks + mult = 2**n_downsampling + for i in range(n_blocks): + model += [ResnetBlock(ngf * mult, padding_type=padding_type, activation=activation, norm_layer=norm_layer)] + + ### upsample + for i in range(n_downsampling): + mult = 2**(n_downsampling - i) + model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, output_padding=1), + norm_layer(int(ngf * mult / 2)), activation] + model += [nn.ReflectionPad2d(3), nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0), nn.Tanh()] + self.model = nn.Sequential(*model) + + def forward(self, input): + return self.model(input) + +# Define a resnet block +class ResnetBlock(nn.Module): + def __init__(self, dim, padding_type, norm_layer, activation=nn.ReLU(True), use_dropout=False): + super(ResnetBlock, self).__init__() + self.conv_block = self.build_conv_block(dim, padding_type, norm_layer, activation, use_dropout) + + def build_conv_block(self, dim, padding_type, norm_layer, activation, use_dropout): + conv_block = [] + p = 0 + if padding_type == 'reflect': + conv_block += [nn.ReflectionPad2d(1)] + elif padding_type == 'replicate': + conv_block += [nn.ReplicationPad2d(1)] + elif padding_type == 'zero': + p = 1 + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + + conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p), + norm_layer(dim), + activation] + if use_dropout: + conv_block += [nn.Dropout(0.5)] + + p = 0 + if padding_type == 'reflect': + conv_block += [nn.ReflectionPad2d(1)] + elif padding_type == 'replicate': + conv_block += [nn.ReplicationPad2d(1)] + elif padding_type == 'zero': + p = 1 + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p), + norm_layer(dim)] + + return nn.Sequential(*conv_block) + + def forward(self, x): + out = x + self.conv_block(x) + return out + +class Encoder(nn.Module): + def __init__(self, input_nc, output_nc, ngf=32, n_downsampling=4, norm_layer=nn.BatchNorm2d): + super(Encoder, self).__init__() + self.output_nc = output_nc + + model = [nn.ReflectionPad2d(3), nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0), + norm_layer(ngf), nn.ReLU(True)] + ### downsample + for i in range(n_downsampling): + mult = 2**i + model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1), + norm_layer(ngf * mult * 2), nn.ReLU(True)] + + ### upsample + for i in range(n_downsampling): + mult = 2**(n_downsampling - i) + model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, output_padding=1), + norm_layer(int(ngf * mult / 2)), nn.ReLU(True)] + + model += [nn.ReflectionPad2d(3), nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0), nn.Tanh()] + self.model = nn.Sequential(*model) + + def forward(self, input, inst): + outputs = self.model(input) + + # instance-wise average pooling + outputs_mean = outputs.clone() + inst_list = np.unique(inst.cpu().numpy().astype(int)) + for i in inst_list: + for b in range(input.size()[0]): + indices = (inst[b:b+1] == int(i)).nonzero() # n x 4 + for j in range(self.output_nc): + output_ins = outputs[indices[:,0] + b, indices[:,1] + j, indices[:,2], indices[:,3]] + mean_feat = torch.mean(output_ins).expand_as(output_ins) + outputs_mean[indices[:,0] + b, indices[:,1] + j, indices[:,2], indices[:,3]] = mean_feat + return outputs_mean + +class MultiscaleDiscriminator(nn.Module): + def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, + use_sigmoid=False, num_D=3, getIntermFeat=False): + super(MultiscaleDiscriminator, self).__init__() + self.num_D = num_D + self.n_layers = n_layers + self.getIntermFeat = getIntermFeat + + for i in range(num_D): + netD = NLayerDiscriminator(input_nc, ndf, n_layers, norm_layer, use_sigmoid, getIntermFeat) + if getIntermFeat: + for j in range(n_layers+2): + setattr(self, 'scale'+str(i)+'_layer'+str(j), getattr(netD, 'model'+str(j))) + else: + setattr(self, 'layer'+str(i), netD.model) + + self.downsample = nn.AvgPool2d(3, stride=2, padding=[1, 1], count_include_pad=False) + + def singleD_forward(self, model, input): + if self.getIntermFeat: + result = [input] + for i in range(len(model)): + result.append(model[i](result[-1])) + return result[1:] + else: + return [model(input)] + + def forward(self, input): + num_D = self.num_D + result = [] + input_downsampled = input + for i in range(num_D): + if self.getIntermFeat: + model = [getattr(self, 'scale'+str(num_D-1-i)+'_layer'+str(j)) for j in range(self.n_layers+2)] + else: + model = getattr(self, 'layer'+str(num_D-1-i)) + result.append(self.singleD_forward(model, input_downsampled)) + if i != (num_D-1): + input_downsampled = self.downsample(input_downsampled) + return result + +# Defines the PatchGAN discriminator with the specified arguments. +class NLayerDiscriminator(nn.Module): + def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, use_sigmoid=False, getIntermFeat=False): + super(NLayerDiscriminator, self).__init__() + self.getIntermFeat = getIntermFeat + self.n_layers = n_layers + + kw = 4 + padw = int(np.ceil((kw-1.0)/2)) + sequence = [[nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), nn.LeakyReLU(0.2, True)]] + + nf = ndf + for n in range(1, n_layers): + nf_prev = nf + nf = min(nf * 2, 512) + sequence += [[ + nn.Conv2d(nf_prev, nf, kernel_size=kw, stride=2, padding=padw), + norm_layer(nf), nn.LeakyReLU(0.2, True) + ]] + + nf_prev = nf + nf = min(nf * 2, 512) + sequence += [[ + nn.Conv2d(nf_prev, nf, kernel_size=kw, stride=1, padding=padw), + norm_layer(nf), + nn.LeakyReLU(0.2, True) + ]] + + sequence += [[nn.Conv2d(nf, 1, kernel_size=kw, stride=1, padding=padw)]] + + if use_sigmoid: + sequence += [[nn.Sigmoid()]] + + if getIntermFeat: + for n in range(len(sequence)): + setattr(self, 'model'+str(n), nn.Sequential(*sequence[n])) + else: + sequence_stream = [] + for n in range(len(sequence)): + sequence_stream += sequence[n] + self.model = nn.Sequential(*sequence_stream) + + def forward(self, input): + if self.getIntermFeat: + res = [input] + for n in range(self.n_layers+2): + model = getattr(self, 'model'+str(n)) + res.append(model(res[-1])) + return res[1:] + else: + return self.model(input) + +from torchvision import models +class Vgg19(torch.nn.Module): + def __init__(self, requires_grad=False): + super(Vgg19, self).__init__() + vgg_pretrained_features = models.vgg19(pretrained=True).features + self.slice1 = torch.nn.Sequential() + self.slice2 = torch.nn.Sequential() + self.slice3 = torch.nn.Sequential() + self.slice4 = torch.nn.Sequential() + self.slice5 = torch.nn.Sequential() + for x in range(2): + self.slice1.add_module(str(x), vgg_pretrained_features[x]) + for x in range(2, 7): + self.slice2.add_module(str(x), vgg_pretrained_features[x]) + for x in range(7, 12): + self.slice3.add_module(str(x), vgg_pretrained_features[x]) + for x in range(12, 21): + self.slice4.add_module(str(x), vgg_pretrained_features[x]) + for x in range(21, 30): + self.slice5.add_module(str(x), vgg_pretrained_features[x]) + if not requires_grad: + for param in self.parameters(): + param.requires_grad = False + + def forward(self, X): + h_relu1 = self.slice1(X) + h_relu2 = self.slice2(h_relu1) + h_relu3 = self.slice3(h_relu2) + h_relu4 = self.slice4(h_relu3) + h_relu5 = self.slice5(h_relu4) + out = [h_relu1, h_relu2, h_relu3, h_relu4, h_relu5] + return out diff --git a/models/pix2pix_model.py b/models/pix2pix_model.py new file mode 100644 index 0000000000000000000000000000000000000000..b9e57b65c9ae7a941aafe6b2c35be65d33cbb7c8 --- /dev/null +++ b/models/pix2pix_model.py @@ -0,0 +1,634 @@ +# This code clone from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix +# LICENSE file : https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/LICENSE + +import torch +import torch.nn as nn +from torch.nn import init +import functools +from torch.optim import lr_scheduler + + + +def set_requires_grad(nets, requires_grad=False): + """Set requies_grad=Fasle for all the networks to avoid unnecessary computations + Parameters: + nets (network list) -- a list of networks + requires_grad (bool) -- whether the networks require gradients or not + """ + if not isinstance(nets, list): + nets = [nets] + for net in nets: + if net is not None: + for param in net.parameters(): + param.requires_grad = requires_grad + + +############################################################################### +# Helper Functions +############################################################################### + + +class Identity(nn.Module): + def forward(self, x): + return x + + +def get_norm_layer(norm_type='instance'): + """Return a normalization layer + + Parameters: + norm_type (str) -- the name of the normalization layer: batch | instance | none + + For BatchNorm, we use learnable affine parameters and track running statistics (mean/stddev). + For InstanceNorm, we do not use learnable affine parameters. We do not track running statistics. + """ + if norm_type == 'batch': + norm_layer = functools.partial(nn.BatchNorm2d, affine=True, track_running_stats=True) + elif norm_type == 'instance': + norm_layer = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False) + elif norm_type == 'none': + norm_layer = lambda x: Identity() + else: + raise NotImplementedError('normalization layer [%s] is not found' % norm_type) + return norm_layer + + +def get_scheduler(optimizer, opt): + """Return a learning rate scheduler + + Parameters: + optimizer -- the optimizer of the network + opt (option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions.  + opt.lr_policy is the name of learning rate policy: linear | step | plateau | cosine + + For 'linear', we keep the same learning rate for the first epochs + and linearly decay the rate to zero over the next epochs. + For other schedulers (step, plateau, and cosine), we use the default PyTorch schedulers. + See https://pytorch.org/docs/stable/optim.html for more details. + """ + if opt.lr_policy == 'linear': + def lambda_rule(epoch): + lr_l = 1.0 - max(0, epoch + opt.epoch_count - opt.niter) / float(opt.niter_decay + 1) + return lr_l + scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule) + elif opt.lr_policy == 'step': + scheduler = lr_scheduler.StepLR(optimizer, step_size=opt.lr_decay_iters, gamma=0.1) + elif opt.lr_policy == 'plateau': + scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, threshold=0.01, patience=5) + elif opt.lr_policy == 'cosine': + scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=opt.niter, eta_min=0) + else: + return NotImplementedError('learning rate policy [%s] is not implemented', opt.lr_policy) + return scheduler + + +def init_weights(net, init_type='normal', init_gain=0.02): + """Initialize network weights. + + Parameters: + net (network) -- network to be initialized + init_type (str) -- the name of an initialization method: normal | xavier | kaiming | orthogonal + init_gain (float) -- scaling factor for normal, xavier and orthogonal. + + We use 'normal' in the original pix2pix and CycleGAN paper. But xavier and kaiming might + work better for some applications. Feel free to try yourself. + """ + def init_func(m): # define the initialization function + classname = m.__class__.__name__ + if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1): + if init_type == 'normal': + init.normal_(m.weight.data, 0.0, init_gain) + elif init_type == 'xavier': + init.xavier_normal_(m.weight.data, gain=init_gain) + elif init_type == 'kaiming': + init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') + elif init_type == 'orthogonal': + init.orthogonal_(m.weight.data, gain=init_gain) + else: + raise NotImplementedError('initialization method [%s] is not implemented' % init_type) + if hasattr(m, 'bias') and m.bias is not None: + init.constant_(m.bias.data, 0.0) + elif classname.find('BatchNorm2d') != -1: # BatchNorm Layer's weight is not a matrix; only normal distribution applies. + init.normal_(m.weight.data, 1.0, init_gain) + init.constant_(m.bias.data, 0.0) + + #print('initialize network with %s' % init_type) + net.apply(init_func) # apply the initialization function + + +def init_net(net, init_type='normal', init_gain=0.02, gpu_ids=[]): + """Initialize a network: 1. register CPU/GPU device (with multi-GPU support); 2. initialize the network weights + Parameters: + net (network) -- the network to be initialized + init_type (str) -- the name of an initialization method: normal | xavier | kaiming | orthogonal + gain (float) -- scaling factor for normal, xavier and orthogonal. + gpu_ids (int list) -- which GPUs the network runs on: e.g., 0,1,2 + + Return an initialized network. + """ + if len(gpu_ids) > 0: + assert(torch.cuda.is_available()) + net.to(gpu_ids[0]) + net = torch.nn.DataParallel(net, gpu_ids) # multi-GPUs + init_weights(net, init_type, init_gain=init_gain) + return net + + +def define_G(input_nc, output_nc, ngf, netG, norm='batch', use_dropout=False, init_type='normal', init_gain=0.02, gpu_ids=[]): + """Create a generator + + Parameters: + input_nc (int) -- the number of channels in input images + output_nc (int) -- the number of channels in output images + ngf (int) -- the number of filters in the last conv layer + netG (str) -- the architecture's name: resnet_9blocks | resnet_6blocks | unet_256 | unet_128 + norm (str) -- the name of normalization layers used in the network: batch | instance | none + use_dropout (bool) -- if use dropout layers. + init_type (str) -- the name of our initialization method. + init_gain (float) -- scaling factor for normal, xavier and orthogonal. + gpu_ids (int list) -- which GPUs the network runs on: e.g., 0,1,2 + + Returns a generator + + Our current implementation provides two types of generators: + U-Net: [unet_128] (for 128x128 input images) and [unet_256] (for 256x256 input images) + The original U-Net paper: https://arxiv.org/abs/1505.04597 + + Resnet-based generator: [resnet_6blocks] (with 6 Resnet blocks) and [resnet_9blocks] (with 9 Resnet blocks) + Resnet-based generator consists of several Resnet blocks between a few downsampling/upsampling operations. + We adapt Torch code from Justin Johnson's neural style transfer project (https://github.com/jcjohnson/fast-neural-style). + + + The generator has been initialized by . It uses RELU for non-linearity. + """ + net = None + norm_layer = get_norm_layer(norm_type=norm) + + if netG == 'resnet_9blocks': + net = ResnetGenerator(input_nc, output_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=9) + elif netG == 'resnet_6blocks': + net = ResnetGenerator(input_nc, output_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=6) + elif netG == 'unet_128': + net = UnetGenerator(input_nc, output_nc, 7, ngf, norm_layer=norm_layer, use_dropout=use_dropout) + elif netG == 'unet_256': + net = UnetGenerator(input_nc, output_nc, 8, ngf, norm_layer=norm_layer, use_dropout=use_dropout) + else: + raise NotImplementedError('Generator model name [%s] is not recognized' % netG) + return init_net(net, init_type, init_gain, gpu_ids) + + +def define_D(input_nc, ndf, netD, n_layers_D=3, norm='batch', init_type='normal', init_gain=0.02, gpu_ids=[]): + """Create a discriminator + + Parameters: + input_nc (int) -- the number of channels in input images + ndf (int) -- the number of filters in the first conv layer + netD (str) -- the architecture's name: basic | n_layers | pixel + n_layers_D (int) -- the number of conv layers in the discriminator; effective when netD=='n_layers' + norm (str) -- the type of normalization layers used in the network. + init_type (str) -- the name of the initialization method. + init_gain (float) -- scaling factor for normal, xavier and orthogonal. + gpu_ids (int list) -- which GPUs the network runs on: e.g., 0,1,2 + + Returns a discriminator + + Our current implementation provides three types of discriminators: + [basic]: 'PatchGAN' classifier described in the original pix2pix paper. + It can classify whether 70×70 overlapping patches are real or fake. + Such a patch-level discriminator architecture has fewer parameters + than a full-image discriminator and can work on arbitrarily-sized images + in a fully convolutional fashion. + + [n_layers]: With this mode, you cna specify the number of conv layers in the discriminator + with the parameter (default=3 as used in [basic] (PatchGAN).) + + [pixel]: 1x1 PixelGAN discriminator can classify whether a pixel is real or not. + It encourages greater color diversity but has no effect on spatial statistics. + + The discriminator has been initialized by . It uses Leakly RELU for non-linearity. + """ + net = None + norm_layer = get_norm_layer(norm_type=norm) + + if netD == 'basic': # default PatchGAN classifier + net = NLayerDiscriminator(input_nc, ndf, n_layers=3, norm_layer=norm_layer) + elif netD == 'n_layers': # more options + net = NLayerDiscriminator(input_nc, ndf, n_layers_D, norm_layer=norm_layer) + elif netD == 'pixel': # classify if each pixel is real or fake + net = PixelDiscriminator(input_nc, ndf, norm_layer=norm_layer) + else: + raise NotImplementedError('Discriminator model name [%s] is not recognized' % net) + return init_net(net, init_type, init_gain, gpu_ids) + + +############################################################################## +# Classes +############################################################################## +class GANLoss(nn.Module): + """Define different GAN objectives. + + The GANLoss class abstracts away the need to create the target label tensor + that has the same size as the input. + """ + + def __init__(self, gan_mode, target_real_label=1.0, target_fake_label=0.0): + """ Initialize the GANLoss class. + + Parameters: + gan_mode (str) - - the type of GAN objective. It currently supports vanilla, lsgan, and wgangp. + target_real_label (bool) - - label for a real image + target_fake_label (bool) - - label of a fake image + + Note: Do not use sigmoid as the last layer of Discriminator. + LSGAN needs no sigmoid. vanilla GANs will handle it with BCEWithLogitsLoss. + """ + super(GANLoss, self).__init__() + self.register_buffer('real_label', torch.tensor(target_real_label)) + self.register_buffer('fake_label', torch.tensor(target_fake_label)) + self.gan_mode = gan_mode + if gan_mode == 'lsgan': + self.loss = nn.MSELoss() + elif gan_mode == 'vanilla': + self.loss = nn.BCEWithLogitsLoss() + elif gan_mode in ['wgangp']: + self.loss = None + else: + raise NotImplementedError('gan mode %s not implemented' % gan_mode) + + def get_target_tensor(self, prediction, target_is_real): + """Create label tensors with the same size as the input. + + Parameters: + prediction (tensor) - - tpyically the prediction from a discriminator + target_is_real (bool) - - if the ground truth label is for real images or fake images + + Returns: + A label tensor filled with ground truth label, and with the size of the input + """ + + if target_is_real: + target_tensor = self.real_label + else: + target_tensor = self.fake_label + return target_tensor.expand_as(prediction) + + def __call__(self, prediction, target_is_real): + """Calculate loss given Discriminator's output and grount truth labels. + + Parameters: + prediction (tensor) - - tpyically the prediction output from a discriminator + target_is_real (bool) - - if the ground truth label is for real images or fake images + + Returns: + the calculated loss. + """ + if self.gan_mode in ['lsgan', 'vanilla']: + target_tensor = self.get_target_tensor(prediction, target_is_real) + loss = self.loss(prediction, target_tensor) + elif self.gan_mode == 'wgangp': + if target_is_real: + loss = -prediction.mean() + else: + loss = prediction.mean() + return loss + + +def cal_gradient_penalty(netD, real_data, fake_data, device, type='mixed', constant=1.0, lambda_gp=10.0): + """Calculate the gradient penalty loss, used in WGAN-GP paper https://arxiv.org/abs/1704.00028 + + Arguments: + netD (network) -- discriminator network + real_data (tensor array) -- real images + fake_data (tensor array) -- generated images from the generator + device (str) -- GPU / CPU: from torch.device('cuda:{}'.format(self.gpu_ids[0])) if self.gpu_ids else torch.device('cpu') + type (str) -- if we mix real and fake data or not [real | fake | mixed]. + constant (float) -- the constant used in formula ( | |gradient||_2 - constant)^2 + lambda_gp (float) -- weight for this loss + + Returns the gradient penalty loss + """ + if lambda_gp > 0.0: + if type == 'real': # either use real images, fake images, or a linear interpolation of two. + interpolatesv = real_data + elif type == 'fake': + interpolatesv = fake_data + elif type == 'mixed': + alpha = torch.rand(real_data.shape[0], 1) + alpha = alpha.expand(real_data.shape[0], real_data.nelement() // real_data.shape[0]).contiguous().view(*real_data.shape) + alpha = alpha.to(device) + interpolatesv = alpha * real_data + ((1 - alpha) * fake_data) + else: + raise NotImplementedError('{} not implemented'.format(type)) + interpolatesv.requires_grad_(True) + disc_interpolates = netD(interpolatesv) + gradients = torch.autograd.grad(outputs=disc_interpolates, inputs=interpolatesv, + grad_outputs=torch.ones(disc_interpolates.size()).to(device), + create_graph=True, retain_graph=True, only_inputs=True) + gradients = gradients[0].view(real_data.size(0), -1) # flat the data + gradient_penalty = (((gradients + 1e-16).norm(2, dim=1) - constant) ** 2).mean() * lambda_gp # added eps + return gradient_penalty, gradients + else: + return 0.0, None + + +class ResnetGenerator(nn.Module): + """Resnet-based generator that consists of Resnet blocks between a few downsampling/upsampling operations. + + We adapt Torch code and idea from Justin Johnson's neural style transfer project(https://github.com/jcjohnson/fast-neural-style) + """ + + def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, padding_type='reflect'): + """Construct a Resnet-based generator + + Parameters: + input_nc (int) -- the number of channels in input images + output_nc (int) -- the number of channels in output images + ngf (int) -- the number of filters in the last conv layer + norm_layer -- normalization layer + use_dropout (bool) -- if use dropout layers + n_blocks (int) -- the number of ResNet blocks + padding_type (str) -- the name of padding layer in conv layers: reflect | replicate | zero + """ + assert(n_blocks >= 0) + super(ResnetGenerator, self).__init__() + if type(norm_layer) == functools.partial: + use_bias = norm_layer.func == nn.InstanceNorm2d + else: + use_bias = norm_layer == nn.InstanceNorm2d + + model = [nn.ReflectionPad2d(3), + nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, bias=use_bias), + norm_layer(ngf), + nn.ReLU(True)] + + n_downsampling = 2 + for i in range(n_downsampling): # add downsampling layers + mult = 2 ** i + model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1, bias=use_bias), + norm_layer(ngf * mult * 2), + nn.ReLU(True)] + + mult = 2 ** n_downsampling + for i in range(n_blocks): # add ResNet blocks + + model += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)] + + for i in range(n_downsampling): # add upsampling layers + mult = 2 ** (n_downsampling - i) + model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), + kernel_size=3, stride=2, + padding=1, output_padding=1, + bias=use_bias), + norm_layer(int(ngf * mult / 2)), + nn.ReLU(True)] + model += [nn.ReflectionPad2d(3)] + model += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] + model += [nn.Tanh()] + + self.model = nn.Sequential(*model) + + def forward(self, input): + """Standard forward""" + return self.model(input) + + +class ResnetBlock(nn.Module): + """Define a Resnet block""" + + def __init__(self, dim, padding_type, norm_layer, use_dropout, use_bias): + """Initialize the Resnet block + + A resnet block is a conv block with skip connections + We construct a conv block with build_conv_block function, + and implement skip connections in function. + Original Resnet paper: https://arxiv.org/pdf/1512.03385.pdf + """ + super(ResnetBlock, self).__init__() + self.conv_block = self.build_conv_block(dim, padding_type, norm_layer, use_dropout, use_bias) + + def build_conv_block(self, dim, padding_type, norm_layer, use_dropout, use_bias): + """Construct a convolutional block. + + Parameters: + dim (int) -- the number of channels in the conv layer. + padding_type (str) -- the name of padding layer: reflect | replicate | zero + norm_layer -- normalization layer + use_dropout (bool) -- if use dropout layers. + use_bias (bool) -- if the conv layer uses bias or not + + Returns a conv block (with a conv layer, a normalization layer, and a non-linearity layer (ReLU)) + """ + conv_block = [] + p = 0 + if padding_type == 'reflect': + conv_block += [nn.ReflectionPad2d(1)] + elif padding_type == 'replicate': + conv_block += [nn.ReplicationPad2d(1)] + elif padding_type == 'zero': + p = 1 + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + + conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), norm_layer(dim), nn.ReLU(True)] + if use_dropout: + conv_block += [nn.Dropout(0.5)] + + p = 0 + if padding_type == 'reflect': + conv_block += [nn.ReflectionPad2d(1)] + elif padding_type == 'replicate': + conv_block += [nn.ReplicationPad2d(1)] + elif padding_type == 'zero': + p = 1 + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), norm_layer(dim)] + + return nn.Sequential(*conv_block) + + def forward(self, x): + """Forward function (with skip connections)""" + out = x + self.conv_block(x) # add skip connections + return out + + +class UnetGenerator(nn.Module): + """Create a Unet-based generator""" + + def __init__(self, input_nc, output_nc, num_downs, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False): + """Construct a Unet generator + Parameters: + input_nc (int) -- the number of channels in input images + output_nc (int) -- the number of channels in output images + num_downs (int) -- the number of downsamplings in UNet. For example, # if |num_downs| == 7, + image of size 128x128 will become of size 1x1 # at the bottleneck + ngf (int) -- the number of filters in the last conv layer + norm_layer -- normalization layer + + We construct the U-Net from the innermost layer to the outermost layer. + It is a recursive process. + """ + super(UnetGenerator, self).__init__() + # construct unet structure + unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True) # add the innermost layer + for i in range(num_downs - 5): # add intermediate layers with ngf * 8 filters + unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout) + # gradually reduce the number of filters from ngf * 8 to ngf + unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer) + unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer) + unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer) + self.model = UnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer) # add the outermost layer + + def forward(self, input): + """Standard forward""" + return self.model(input) + + +class UnetSkipConnectionBlock(nn.Module): + """Defines the Unet submodule with skip connection. + X -------------------identity---------------------- + |-- downsampling -- |submodule| -- upsampling --| + """ + + def __init__(self, outer_nc, inner_nc, input_nc=None, + submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm2d, use_dropout=False): + """Construct a Unet submodule with skip connections. + + Parameters: + outer_nc (int) -- the number of filters in the outer conv layer + inner_nc (int) -- the number of filters in the inner conv layer + input_nc (int) -- the number of channels in input images/features + submodule (UnetSkipConnectionBlock) -- previously defined submodules + outermost (bool) -- if this module is the outermost module + innermost (bool) -- if this module is the innermost module + norm_layer -- normalization layer + user_dropout (bool) -- if use dropout layers. + """ + super(UnetSkipConnectionBlock, self).__init__() + self.outermost = outermost + if type(norm_layer) == functools.partial: + use_bias = norm_layer.func == nn.InstanceNorm2d + else: + use_bias = norm_layer == nn.InstanceNorm2d + if input_nc is None: + input_nc = outer_nc + downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4, + stride=2, padding=1, bias=use_bias) + downrelu = nn.LeakyReLU(0.2, True) + downnorm = norm_layer(inner_nc) + uprelu = nn.ReLU(True) + upnorm = norm_layer(outer_nc) + + if outermost: + upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, + kernel_size=4, stride=2, + padding=1) + down = [downconv] + up = [uprelu, upconv, nn.Tanh()] + model = down + [submodule] + up + elif innermost: + upconv = nn.ConvTranspose2d(inner_nc, outer_nc, + kernel_size=4, stride=2, + padding=1, bias=use_bias) + down = [downrelu, downconv] + up = [uprelu, upconv, upnorm] + model = down + up + else: + upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, + kernel_size=4, stride=2, + padding=1, bias=use_bias) + down = [downrelu, downconv, downnorm] + up = [uprelu, upconv, upnorm] + + if use_dropout: + model = down + [submodule] + up + [nn.Dropout(0.5)] + else: + model = down + [submodule] + up + + self.model = nn.Sequential(*model) + + def forward(self, x): + if self.outermost: + return self.model(x) + else: # add skip connections + return torch.cat([x, self.model(x)], 1) + + +class NLayerDiscriminator(nn.Module): + """Defines a PatchGAN discriminator""" + + def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d): + """Construct a PatchGAN discriminator + + Parameters: + input_nc (int) -- the number of channels in input images + ndf (int) -- the number of filters in the last conv layer + n_layers (int) -- the number of conv layers in the discriminator + norm_layer -- normalization layer + """ + super(NLayerDiscriminator, self).__init__() + if type(norm_layer) == functools.partial: # no need to use bias as BatchNorm2d has affine parameters + use_bias = norm_layer.func != nn.BatchNorm2d + else: + use_bias = norm_layer != nn.BatchNorm2d + + kw = 4 + padw = 1 + sequence = [nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), nn.LeakyReLU(0.2, True)] + nf_mult = 1 + nf_mult_prev = 1 + for n in range(1, n_layers): # gradually increase the number of filters + nf_mult_prev = nf_mult + nf_mult = min(2 ** n, 8) + sequence += [ + nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=2, padding=padw, bias=use_bias), + norm_layer(ndf * nf_mult), + nn.LeakyReLU(0.2, True) + ] + + nf_mult_prev = nf_mult + nf_mult = min(2 ** n_layers, 8) + sequence += [ + nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=1, padding=padw, bias=use_bias), + norm_layer(ndf * nf_mult), + nn.LeakyReLU(0.2, True) + ] + + sequence += [nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)] # output 1 channel prediction map + self.model = nn.Sequential(*sequence) + + def forward(self, input): + """Standard forward.""" + return self.model(input) + + +class PixelDiscriminator(nn.Module): + """Defines a 1x1 PatchGAN discriminator (pixelGAN)""" + + def __init__(self, input_nc, ndf=64, norm_layer=nn.BatchNorm2d): + """Construct a 1x1 PatchGAN discriminator + + Parameters: + input_nc (int) -- the number of channels in input images + ndf (int) -- the number of filters in the last conv layer + norm_layer -- normalization layer + """ + super(PixelDiscriminator, self).__init__() + if type(norm_layer) == functools.partial: # no need to use bias as BatchNorm2d has affine parameters + use_bias = norm_layer.func != nn.InstanceNorm2d + else: + use_bias = norm_layer != nn.InstanceNorm2d + + self.net = [ + nn.Conv2d(input_nc, ndf, kernel_size=1, stride=1, padding=0), + nn.LeakyReLU(0.2, True), + nn.Conv2d(ndf, ndf * 2, kernel_size=1, stride=1, padding=0, bias=use_bias), + norm_layer(ndf * 2), + nn.LeakyReLU(0.2, True), + nn.Conv2d(ndf * 2, 1, kernel_size=1, stride=1, padding=0, bias=use_bias)] + + self.net = nn.Sequential(*self.net) + + def forward(self, input): + """Standard forward.""" + return self.net(input) diff --git a/models/runmodel.py b/models/runmodel.py new file mode 100644 index 0000000000000000000000000000000000000000..3e97feec7d35177509230bb6d678b23683c70514 --- /dev/null +++ b/models/runmodel.py @@ -0,0 +1,85 @@ +import cv2 +import sys +sys.path.append("..") +import util.image_processing as impro +from util import mosaic +from util import data +import torch +import numpy as np + +def run_segment(img,net,size = 360,gpu_id = '-1'): + img = impro.resize(img,size) + img = data.im2tensor(img,gpu_id = gpu_id, bgr2rgb = False, is0_1 = True) + mask = net(img) + mask = data.tensor2im(mask, gray=True, is0_1 = True) + return mask + +def run_pix2pix(img,net,opt): + if opt.netG == 'HD': + img = impro.resize(img,512) + else: + img = impro.resize(img,128) + img = data.im2tensor(img,gpu_id=opt.gpu_id) + img_fake = net(img) + img_fake = data.tensor2im(img_fake) + return img_fake + +def traditional_cleaner(img,opt): + h,w = img.shape[:2] + img = cv2.blur(img, (opt.tr_blur,opt.tr_blur)) + img = img[::opt.tr_down,::opt.tr_down,:] + img = cv2.resize(img, (w,h),interpolation=cv2.INTER_LANCZOS4) + return img + +def run_styletransfer(opt, net, img): + + if opt.output_size != 0: + if 'resize' in opt.preprocess and 'resize_scale_width' not in opt.preprocess: + img = impro.resize(img,opt.output_size) + elif 'resize_scale_width' in opt.preprocess: + img = cv2.resize(img, (opt.output_size,opt.output_size)) + img = img[0:4*int(img.shape[0]/4),0:4*int(img.shape[1]/4),:] + + if 'edges' in opt.preprocess: + if opt.canny > 100: + canny_low = opt.canny-50 + canny_high = np.clip(opt.canny+50,0,255) + elif opt.canny < 50: + canny_low = np.clip(opt.canny-25,0,255) + canny_high = opt.canny+25 + else: + canny_low = opt.canny-int(opt.canny/2) + canny_high = opt.canny+int(opt.canny/2) + img = cv2.Canny(img,canny_low,canny_high) + if opt.only_edges: + return img + img = data.im2tensor(img,gpu_id=opt.gpu_id,gray=True) + else: + img = data.im2tensor(img,gpu_id=opt.gpu_id) + img = net(img) + img = data.tensor2im(img) + return img + +def get_ROI_position(img,net,opt,keepsize=True): + mask = run_segment(img,net,size=360,gpu_id = opt.gpu_id) + mask = impro.mask_threshold(mask,opt.mask_extend,opt.mask_threshold) + if keepsize: + mask = impro.resize_like(mask, img) + x,y,halfsize,area = impro.boundingSquare(mask, 1) + return mask,x,y,halfsize,area + +def get_mosaic_position(img_origin,net_mosaic_pos,opt): + h,w = img_origin.shape[:2] + mask = run_segment(img_origin,net_mosaic_pos,size=360,gpu_id = opt.gpu_id) + # mask_1 = mask.copy() + mask = impro.mask_threshold(mask,ex_mun=int(min(h,w)/20),threshold=opt.mask_threshold) + if not opt.all_mosaic_area: + mask = impro.find_mostlikely_ROI(mask) + x,y,size,area = impro.boundingSquare(mask,Ex_mul=opt.ex_mult) + #Location fix + rat = min(h,w)/360.0 + x,y,size = int(rat*x),int(rat*y),int(rat*size) + x,y = np.clip(x, 0, w),np.clip(y, 0, h) + size = np.clip(size, 0, min(w-x,h-y)) + # print(x,y,size) + return x,y,size,mask \ No newline at end of file diff --git a/models/unet_model.py b/models/unet_model.py new file mode 100644 index 0000000000000000000000000000000000000000..5dce46e0ef4ec3ee90a9472f70aac9085d9b1d48 --- /dev/null +++ b/models/unet_model.py @@ -0,0 +1,125 @@ +# This code clone from https://github.com/milesial/Pytorch-UNet +# LICENSE file : https://github.com/milesial/Pytorch-UNet/blob/master/LICENSE + +import torch +import torch.nn as nn +import torch.nn.functional as F + +class double_conv(nn.Module): + '''(conv => BN => ReLU) * 2''' + def __init__(self, in_ch, out_ch): + super(double_conv, self).__init__() + self.conv = nn.Sequential( + nn.Conv2d(in_ch, out_ch, 3, padding=1), + nn.BatchNorm2d(out_ch), + nn.ReLU(inplace=True), + nn.Conv2d(out_ch, out_ch, 3, padding=1), + nn.BatchNorm2d(out_ch), + nn.ReLU(inplace=True) + ) + + def forward(self, x): + x = self.conv(x) + return x + + +class inconv(nn.Module): + def __init__(self, in_ch, out_ch): + super(inconv, self).__init__() + self.conv = double_conv(in_ch, out_ch) + + def forward(self, x): + x = self.conv(x) + return x + + +class down(nn.Module): + def __init__(self, in_ch, out_ch): + super(down, self).__init__() + self.mpconv = nn.Sequential( + nn.MaxPool2d(2), + double_conv(in_ch, out_ch) + ) + + def forward(self, x): + x = self.mpconv(x) + return x + +class Upsample(nn.Module): + def __init__(self, scale_factor): + super(Upsample, self).__init__() + self.scale_factor = scale_factor + def forward(self, x): + return F.interpolate(x, scale_factor=self.scale_factor,mode='bilinear', align_corners=True) + + +class up(nn.Module): + def __init__(self, in_ch, out_ch, bilinear=True): + super(up, self).__init__() + + # would be a nice idea if the upsampling could be learned too, + # but my machine do not have enough memory to handle all those weights + if bilinear: + self.up = Upsample(scale_factor=2) + else: + self.up = nn.ConvTranspose2d(in_ch//2, in_ch//2, 2, stride=2) + + self.conv = double_conv(in_ch, out_ch) + + def forward(self, x1, x2): + x1 = self.up(x1) + + # input is CHW + diffY = x2.size()[2] - x1.size()[2] + diffX = x2.size()[3] - x1.size()[3] + + x1 = F.pad(x1, (diffX // 2, diffX - diffX//2, + diffY // 2, diffY - diffY//2)) + + # for padding issues, see + # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a + # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd + + x = torch.cat([x2, x1], dim=1) + x = self.conv(x) + return x + + +class outconv(nn.Module): + def __init__(self, in_ch, out_ch): + super(outconv, self).__init__() + self.conv = nn.Sequential( + nn.Conv2d(in_ch, out_ch, 1), + nn.Sigmoid() + ) + + def forward(self, x): + x = self.conv(x) + return x + +class UNet(nn.Module): + def __init__(self, n_channels, n_classes): + super(UNet, self).__init__() + self.inc = inconv(n_channels, 64) + self.down1 = down(64, 128) + self.down2 = down(128, 256) + self.down3 = down(256, 512) + self.down4 = down(512, 512) + self.up1 = up(1024, 256) + self.up2 = up(512, 128) + self.up3 = up(256, 64) + self.up4 = up(128, 64) + self.outc = outconv(64, n_classes) + + def forward(self, x): + x1 = self.inc(x) + x2 = self.down1(x1) + x3 = self.down2(x2) + x4 = self.down3(x3) + x5 = self.down4(x4) + x = self.up1(x5, x4) + x = self.up2(x, x3) + x = self.up3(x, x2) + x = self.up4(x, x1) + x = self.outc(x) + return x \ No newline at end of file diff --git a/pretrained_models/put_pretrained_model_here b/pretrained_models/put_pretrained_model_here new file mode 100644 index 0000000000000000000000000000000000000000..de1b14335923549d5a8f4b371b63617154925f58 --- /dev/null +++ b/pretrained_models/put_pretrained_model_here @@ -0,0 +1 @@ +put_CleanMosaic_model_here diff --git a/tools/server.py b/tools/server.py new file mode 100644 index 0000000000000000000000000000000000000000..f6fe0647c8a7ef81c062a7d3473495b0eca136da --- /dev/null +++ b/tools/server.py @@ -0,0 +1,60 @@ +import os +import sys +import traceback +import cv2 +import numpy as np +try: + from cores import Options,clean + from util import util + from util import image_processing as impro + from models import loadmodel +except Exception as e: + print(e) + input('Please press any key to exit.\n') + sys.exit(0) + +# python server.py --gpu_id 0 --model_path ./pretrained_models/mosaic/clean_face_HD.pth +opt = Options() +opt.parser.add_argument('--port',type=int,default=4000, help='') +opt = opt.getparse(True) +netM = loadmodel.bisenet(opt,'mosaic') +netG = loadmodel.pix2pix(opt) + +from flask import Flask, request +import base64 +import shutil + +app = Flask(__name__) + +@app.route("/handle", methods=["POST"]) +def handle(): + result = {} + # to opencv img + try: + imgRec = request.form['img'] + imgByte = base64.b64decode(imgRec) + img_np_arr = np.frombuffer(imgByte, np.uint8) + img = cv2.imdecode(img_np_arr, cv2.IMREAD_COLOR) + except Exception as e: + result['img'] = imgRec + result['info'] = 'readfailed' + return result + + # run model + try: + if max(img.shape)>1080: + img = impro.resize(img,720,interpolation=cv2.INTER_CUBIC) + img = clean.cleanmosaic_img_server(opt,img,netG,netM) + except Exception as e: + result['img'] = imgRec + result['info'] = 'procfailed' + return result + + # return + imgbytes = cv2.imencode('.jpg', img)[1] + imgString = base64.b64encode(imgbytes).decode('utf-8') + result['img'] = imgString + result['info'] = 'ok' + return result + +app.run("0.0.0.0", port= opt.port, debug=opt.debug) \ No newline at end of file diff --git a/tools/trace_model.py b/tools/trace_model.py new file mode 100644 index 0000000000000000000000000000000000000000..d69cb937410d2ac4d9cfe4b0f9808cc47263dc7c --- /dev/null +++ b/tools/trace_model.py @@ -0,0 +1,95 @@ +import os +import sys +import traceback +sys.path.append("..") +from util import mosaic +import torch + +try: + from cores import Options,add,clean,style + from util import util + from models import loadmodel +except Exception as e: + print(e) + input('Please press any key to exit.\n') + sys.exit(0) + +opt = Options().getparse(test_flag = False) +if not os.path.isdir(opt.temp_dir): + util.file_init(opt) + +def saveScriptModel(model,example,savepath): + model.cpu() + traced_script_module = torch.jit.trace(model, example) + # try ScriptModel + output = traced_script_module(example) + print(output) + traced_script_module.save(savepath) + +savedir = '../cpp/res/models/' +util.makedirs(savedir) + +opt.mosaic_position_model_path = '../pretrained_models/mosaic/mosaic_position.pth' +model = loadmodel.bisenet(opt,'mosaic') +example = torch.ones((1,3,360,360)) +saveScriptModel(model,example,os.path.join(savedir,'mosaic_position.pt')) + + + +# def main(): + +# if os.path.isdir(opt.media_path): +# files = util.Traversal(opt.media_path) +# else: +# files = [opt.media_path] +# if opt.mode == 'add': +# netS = loadmodel.bisenet(opt,'roi') +# for file in files: +# opt.media_path = file +# if util.is_img(file): +# add.addmosaic_img(opt,netS) +# elif util.is_video(file): +# add.addmosaic_video(opt,netS) +# util.clean_tempfiles(opt, tmp_init = False) +# else: +# print('This type of file is not supported') +# util.clean_tempfiles(opt, tmp_init = False) + +# elif opt.mode == 'clean': +# netM = loadmodel.bisenet(opt,'mosaic') +# if opt.traditional: +# netG = None +# elif opt.netG == 'video': +# netG = loadmodel.video(opt) +# else: +# netG = loadmodel.pix2pix(opt) + +# for file in files: +# opt.media_path = file +# if util.is_img(file): +# clean.cleanmosaic_img(opt,netG,netM) +# elif util.is_video(file): +# if opt.netG == 'video' and not opt.traditional: +# clean.cleanmosaic_video_fusion(opt,netG,netM) +# else: +# clean.cleanmosaic_video_byframe(opt,netG,netM) +# util.clean_tempfiles(opt, tmp_init = False) +# else: +# print('This type of file is not supported') + +# elif opt.mode == 'style': +# netG = loadmodel.style(opt) +# for file in files: +# opt.media_path = file +# if util.is_img(file): +# style.styletransfer_img(opt,netG) +# elif util.is_video(file): +# style.styletransfer_video(opt,netG) +# util.clean_tempfiles(opt, tmp_init = False) +# else: +# print('This type of file is not supported') + +# util.clean_tempfiles(opt, tmp_init = False) + +# if __name__ == '__main__': +# main() \ No newline at end of file diff --git a/train/add/train.py b/train/add/train.py new file mode 100644 index 0000000000000000000000000000000000000000..a939ee33be2a42a30f0488a42797e0f7241549bf --- /dev/null +++ b/train/add/train.py @@ -0,0 +1,207 @@ +import os +import sys +sys.path.append("..") +sys.path.append("../..") +from cores import Options +opt = Options() + +import random +import datetime +import time + +import numpy as np +import matplotlib +matplotlib.use('Agg') +from matplotlib import pyplot as plt +import cv2 + +import torch +import torch.backends.cudnn as cudnn +import torch.nn as nn +from torch import optim + +from util import mosaic,util,ffmpeg,filt,data +from util import image_processing as impro +from models import unet_model,BiSeNet_model + + +''' +--------------------------Get options-------------------------- +''' +opt.parser.add_argument('--lr',type=float,default=0.001, help='') +opt.parser.add_argument('--finesize',type=int,default=360, help='') +opt.parser.add_argument('--loadsize',type=int,default=400, help='') +opt.parser.add_argument('--batchsize',type=int,default=8, help='') +opt.parser.add_argument('--model',type=str,default='BiSeNet', help='BiSeNet or UNet') + +opt.parser.add_argument('--maxepoch',type=int,default=100, help='') +opt.parser.add_argument('--savefreq',type=int,default=5, help='') +opt.parser.add_argument('--maxload',type=int,default=1000000, help='') +opt.parser.add_argument('--continue_train', action='store_true', help='') +opt.parser.add_argument('--startepoch',type=int,default=0, help='') +opt.parser.add_argument('--dataset',type=str,default='./datasets/face/', help='') +opt.parser.add_argument('--savename',type=str,default='face', help='') + + +''' +--------------------------Init-------------------------- +''' +opt = opt.getparse() +dir_img = os.path.join(opt.dataset,'origin_image') +dir_mask = os.path.join(opt.dataset,'mask') +dir_checkpoint = os.path.join('checkpoints/',opt.savename) +util.makedirs(dir_checkpoint) +util.writelog(os.path.join(dir_checkpoint,'loss.txt'), + str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt)) + +def Totensor(img,gpu_id=True): + size=img.shape[0] + img = torch.from_numpy(img).float() + if opt.gpu_id != -1: + img = img.cuda() + return img + +def loadimage(imagepaths,maskpaths,opt,test_flag = False): + batchsize = len(imagepaths) + images = np.zeros((batchsize,3,opt.finesize,opt.finesize), dtype=np.float32) + masks = np.zeros((batchsize,1,opt.finesize,opt.finesize), dtype=np.float32) + for i in range(len(imagepaths)): + img = impro.resize(impro.imread(imagepaths[i]),opt.loadsize) + mask = impro.resize(impro.imread(maskpaths[i],mod = 'gray'),opt.loadsize) + img,mask = data.random_transform_pair_image(img, mask, opt.finesize, test_flag) + images[i] = (img.transpose((2, 0, 1))/255.0) + masks[i] = (mask.reshape(1,1,opt.finesize,opt.finesize)/255.0) + images = data.to_tensor(images,opt.gpu_id) + masks = data.to_tensor(masks,opt.gpu_id) + + return images,masks + + +''' +--------------------------checking dataset-------------------------- +''' +print('checking dataset...') +imagepaths = sorted(util.Traversal(dir_img))[:opt.maxload] +maskpaths = sorted(util.Traversal(dir_mask))[:opt.maxload] +data.shuffledata(imagepaths, maskpaths) +if len(imagepaths) != len(maskpaths) : + print('dataset error!') + exit(0) +img_num = len(imagepaths) +print('find images:',img_num) +imagepaths_train = (imagepaths[0:int(img_num*0.8)]).copy() +maskpaths_train = (maskpaths[0:int(img_num*0.8)]).copy() +imagepaths_eval = (imagepaths[int(img_num*0.8):]).copy() +maskpaths_eval = (maskpaths[int(img_num*0.8):]).copy() + +''' +--------------------------def network-------------------------- +''' +if opt.model =='UNet': + net = unet_model.UNet(n_channels = 3, n_classes = 1) +elif opt.model =='BiSeNet': + net = BiSeNet_model.BiSeNet(num_classes=1, context_path='resnet18') + +if opt.continue_train: + if not os.path.isfile(os.path.join(dir_checkpoint,'last.pth')): + opt.continue_train = False + print('can not load last.pth, training on init weight.') +if opt.continue_train: + net.load_state_dict(torch.load(os.path.join(dir_checkpoint,'last.pth'))) + f = open(os.path.join(dir_checkpoint,'epoch_log.txt'),'r') + opt.startepoch = int(f.read()) + f.close() +if opt.gpu_id != -1: + net.cuda() + cudnn.benchmark = True + +optimizer = torch.optim.Adam(net.parameters(), lr=opt.lr) + +if opt.model =='UNet': + criterion = nn.BCELoss() +elif opt.model =='BiSeNet': + criterion = nn.BCELoss() + # criterion = BiSeNet_model.DiceLoss() + +''' +--------------------------train-------------------------- +''' +loss_plot = {'train':[],'eval':[]} +print('begin training......') +for epoch in range(opt.startepoch,opt.maxepoch): + random_save = random.randint(0, int(img_num*0.8/opt.batchsize)) + data.shuffledata(imagepaths_train, maskpaths_train) + + starttime = datetime.datetime.now() + util.writelog(os.path.join(dir_checkpoint,'loss.txt'),'Epoch {}/{}.'.format(epoch + 1, opt.maxepoch),True) + net.train() + if opt.gpu_id != -1: + net.cuda() + epoch_loss = 0 + for i in range(int(img_num*0.8/opt.batchsize)): + img,mask = loadimage(imagepaths_train[i*opt.batchsize:(i+1)*opt.batchsize], maskpaths_train[i*opt.batchsize:(i+1)*opt.batchsize], opt) + + if opt.model =='UNet': + mask_pred = net(img) + loss = criterion(mask_pred, mask) + epoch_loss += loss.item() + elif opt.model =='BiSeNet': + mask_pred, mask_pred_sup1, mask_pred_sup2 = net(img) + loss1 = criterion(mask_pred, mask) + loss2 = criterion(mask_pred_sup1, mask) + loss3 = criterion(mask_pred_sup2, mask) + loss = loss1 + loss2 + loss3 + epoch_loss += loss1.item() + + optimizer.zero_grad() + loss.backward() + optimizer.step() + + if i%100 == 0: + data.showresult(img,mask,mask_pred,os.path.join(dir_checkpoint,'result.png'),True) + if i == random_save: + data.showresult(img,mask,mask_pred,os.path.join(dir_checkpoint,'epoch_'+str(epoch+1)+'.png'),True) + epoch_loss = epoch_loss/int(img_num*0.8/opt.batchsize) + loss_plot['train'].append(epoch_loss) + + #val + epoch_loss_eval = 0 + with torch.no_grad(): + # net.eval() + for i in range(int(img_num*0.2/opt.batchsize)): + img,mask = loadimage(imagepaths_eval[i*opt.batchsize:(i+1)*opt.batchsize], maskpaths_eval[i*opt.batchsize:(i+1)*opt.batchsize], opt,test_flag=True) + if opt.model =='UNet': + mask_pred = net(img) + elif opt.model =='BiSeNet': + mask_pred, _, _ = net(img) + # mask_pred = net(img) + loss= criterion(mask_pred, mask) + epoch_loss_eval += loss.item() + epoch_loss_eval = epoch_loss_eval/int(img_num*0.2/opt.batchsize) + loss_plot['eval'].append(epoch_loss_eval) + # torch.cuda.empty_cache() + + #savelog + endtime = datetime.datetime.now() + util.writelog(os.path.join(dir_checkpoint,'loss.txt'), + '--- Epoch train_loss: {0:.6f} eval_loss: {1:.6f} Cost time: {2:} s'.format( + epoch_loss, + epoch_loss_eval, + (endtime - starttime).seconds), + True) + #plot + plt.plot(np.linspace(opt.startepoch+1,epoch+1,epoch+1-opt.startepoch),loss_plot['train'],label='train') + plt.plot(np.linspace(opt.startepoch+1,epoch+1,epoch+1-opt.startepoch),loss_plot['eval'],label='eval') + plt.xlabel('Epoch') + plt.ylabel('BCELoss') + plt.legend(loc=1) + plt.savefig(os.path.join(dir_checkpoint,'loss.jpg')) + plt.close() + #save network + torch.save(net.cpu().state_dict(),os.path.join(dir_checkpoint,'last.pth')) + f = open(os.path.join(dir_checkpoint,'epoch_log.txt'),'w+') + f.write(str(epoch+1)) + f.close() + if (epoch+1)%opt.savefreq == 0: + torch.save(net.cpu().state_dict(),os.path.join(dir_checkpoint,'epoch'+str(epoch+1)+'.pth')) + print('network saved.') diff --git a/train/clean/train.py b/train/clean/train.py new file mode 100644 index 0000000000000000000000000000000000000000..6ea23b79eb8b0618288bf8fb95c4658646e186bf --- /dev/null +++ b/train/clean/train.py @@ -0,0 +1,241 @@ +import os +import sys +sys.path.append("..") +sys.path.append("../..") +from cores import Options +opt = Options() + +import numpy as np +import cv2 +import random +import torch +import torch.nn as nn +import time + +from util import util,data,dataloader +from util import image_processing as impro +from models import BVDNet,model_util +from skimage.metrics import structural_similarity +from tensorboardX import SummaryWriter + +''' +--------------------------Get options-------------------------- +''' +opt.parser.add_argument('--N',type=int,default=2, help='The input tensor shape is H×W×T×C, T = 2N+1') +opt.parser.add_argument('--S',type=int,default=3, help='Stride of 3 frames') +# opt.parser.add_argument('--T',type=int,default=7, help='T = 2N+1') +opt.parser.add_argument('--M',type=int,default=100, help='How many frames read from each videos') +opt.parser.add_argument('--lr',type=float,default=0.0002, help='') +opt.parser.add_argument('--beta1',type=float,default=0.9, help='') +opt.parser.add_argument('--beta2',type=float,default=0.999, help='') +opt.parser.add_argument('--finesize',type=int,default=256, help='') +opt.parser.add_argument('--loadsize',type=int,default=286, help='') +opt.parser.add_argument('--batchsize',type=int,default=1, help='') +opt.parser.add_argument('--no_gan', action='store_true', help='if specified, do not use gan') +opt.parser.add_argument('--n_blocks',type=int,default=4, help='') +opt.parser.add_argument('--n_layers_D',type=int,default=2, help='') +opt.parser.add_argument('--num_D',type=int,default=3, help='') +opt.parser.add_argument('--lambda_L2',type=float,default=100, help='') +opt.parser.add_argument('--lambda_VGG',type=float,default=1, help='') +opt.parser.add_argument('--lambda_GAN',type=float,default=0.01, help='') +opt.parser.add_argument('--lambda_D',type=float,default=1, help='') +opt.parser.add_argument('--load_thread',type=int,default=16, help='number of thread for loading data') + +opt.parser.add_argument('--dataset',type=str,default='./datasets/face/', help='') +opt.parser.add_argument('--dataset_test',type=str,default='./datasets/face_test/', help='') +opt.parser.add_argument('--n_epoch',type=int,default=200, help='') +opt.parser.add_argument('--save_freq',type=int,default=10000, help='') +opt.parser.add_argument('--continue_train', action='store_true', help='') +opt.parser.add_argument('--savename',type=str,default='face', help='') +opt.parser.add_argument('--showresult_freq',type=int,default=1000, help='') +opt.parser.add_argument('--showresult_num',type=int,default=4, help='') + +def ImageQualityEvaluation(tensor1,tensor2,showiter,writer,tag): + batch_len = len(tensor1) + psnr,ssmi = 0,0 + for i in range(len(tensor1)): + img1,img2 = data.tensor2im(tensor1,rgb2bgr=False,batch_index=i), data.tensor2im(tensor2,rgb2bgr=False,batch_index=i) + psnr += impro.psnr(img1,img2) + ssmi += structural_similarity(img1,img2,multichannel=True) + writer.add_scalars('quality/psnr', {tag:psnr/batch_len}, showiter) + writer.add_scalars('quality/ssmi', {tag:ssmi/batch_len}, showiter) + return psnr/batch_len,ssmi/batch_len + +def ShowImage(tensor1,tensor2,tensor3,showiter,max_num,writer,tag): + show_imgs = [] + for i in range(max_num): + show_imgs += [ data.tensor2im(tensor1,rgb2bgr = False,batch_index=i), + data.tensor2im(tensor2,rgb2bgr = False,batch_index=i), + data.tensor2im(tensor3,rgb2bgr = False,batch_index=i)] + show_img = impro.splice(show_imgs, (opt.showresult_num,3)) + writer.add_image(tag, show_img,showiter,dataformats='HWC') + +''' +--------------------------Init-------------------------- +''' +opt = opt.getparse() +opt.T = 2*opt.N+1 +if opt.showresult_num >opt.batchsize: + opt.showresult_num = opt.batchsize +dir_checkpoint = os.path.join('checkpoints',opt.savename) +util.makedirs(dir_checkpoint) +# start tensorboard +localtime = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime()) +tensorboard_savedir = os.path.join('checkpoints/tensorboard',localtime+'_'+opt.savename) +TBGlobalWriter = SummaryWriter(tensorboard_savedir) +print('Please run "tensorboard --logdir checkpoints/tensorboardX --host=your_server_ip" and input "'+localtime+'" to filter outputs') + +''' +--------------------------Init Network-------------------------- +''' +if opt.gpu_id != '-1' and len(opt.gpu_id) == 1: + torch.backends.cudnn.benchmark = True + +netG = BVDNet.define_G(opt.N,opt.n_blocks,gpu_id=opt.gpu_id) +optimizer_G = torch.optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, opt.beta2)) +lossfun_L2 = nn.MSELoss() +lossfun_VGG = model_util.VGGLoss(opt.gpu_id) +if not opt.no_gan: + netD = BVDNet.define_D(n_layers_D=opt.n_layers_D,num_D=opt.num_D,gpu_id=opt.gpu_id) + optimizer_D = torch.optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, opt.beta2)) + lossfun_GAND = BVDNet.GANLoss('D') + lossfun_GANG = BVDNet.GANLoss('G') + +''' +--------------------------Init DataLoader-------------------------- +''' +videolist_tmp = os.listdir(opt.dataset) +videolist = [] +for video in videolist_tmp: + if os.path.isdir(os.path.join(opt.dataset,video)): + if len(os.listdir(os.path.join(opt.dataset,video,'mask')))>=opt.M: + videolist.append(video) +sorted(videolist) +videolist_train = videolist[:int(len(videolist)*0.8)].copy() +videolist_eval = videolist[int(len(videolist)*0.8):].copy() + +Videodataloader_train = dataloader.VideoDataLoader(opt, videolist_train) +Videodataloader_eval = dataloader.VideoDataLoader(opt, videolist_eval) + +''' +--------------------------Train-------------------------- +''' +previous_predframe_tmp = 0 +for train_iter in range(Videodataloader_train.n_iter): + t_start = time.time() + # train + ori_stream,mosaic_stream,previous_frame = Videodataloader_train.get_data() + ori_stream = data.to_tensor(ori_stream, opt.gpu_id) + mosaic_stream = data.to_tensor(mosaic_stream, opt.gpu_id) + if previous_frame is None: + previous_frame = data.to_tensor(previous_predframe_tmp, opt.gpu_id) + else: + previous_frame = data.to_tensor(previous_frame, opt.gpu_id) + + ############### Forward #################### + # Fake Generator + out = netG(mosaic_stream,previous_frame) + # Discriminator + if not opt.no_gan: + dis_real = netD(torch.cat((mosaic_stream[:,:,opt.N],ori_stream[:,:,opt.N].detach()),dim=1)) + dis_fake_D = netD(torch.cat((mosaic_stream[:,:,opt.N],out.detach()),dim=1)) + loss_D = lossfun_GAND(dis_fake_D,dis_real) * opt.lambda_GAN * opt.lambda_D + # Generator + loss_L2 = lossfun_L2(out,ori_stream[:,:,opt.N]) * opt.lambda_L2 + loss_VGG = lossfun_VGG(out,ori_stream[:,:,opt.N]) * opt.lambda_VGG + loss_G = loss_L2+loss_VGG + if not opt.no_gan: + dis_fake_G = netD(torch.cat((mosaic_stream[:,:,opt.N],out),dim=1)) + loss_GANG = lossfun_GANG(dis_fake_G) * opt.lambda_GAN + loss_G = loss_G + loss_GANG + + ############### Backward Pass #################### + optimizer_G.zero_grad() + loss_G.backward() + optimizer_G.step() + + if not opt.no_gan: + optimizer_D.zero_grad() + loss_D.backward() + optimizer_D.step() + + previous_predframe_tmp = out.detach().cpu().numpy() + + if not opt.no_gan: + TBGlobalWriter.add_scalars('loss/train', {'L2':loss_L2.item(),'VGG':loss_VGG.item(), + 'loss_D':loss_D.item(),'loss_G':loss_G.item()}, train_iter) + else: + TBGlobalWriter.add_scalars('loss/train', {'L2':loss_L2.item(),'VGG':loss_VGG.item()}, train_iter) + + # save network + if train_iter%opt.save_freq == 0 and train_iter != 0: + model_util.save(netG, os.path.join('checkpoints',opt.savename,str(train_iter)+'_G.pth'), opt.gpu_id) + if not opt.no_gan: + model_util.save(netD, os.path.join('checkpoints',opt.savename,str(train_iter)+'_D.pth'), opt.gpu_id) + + # Image quality evaluation + if train_iter%(opt.showresult_freq//10) == 0: + ImageQualityEvaluation(out,ori_stream[:,:,opt.N],train_iter,TBGlobalWriter,'train') + + # Show result + if train_iter % opt.showresult_freq == 0: + ShowImage(mosaic_stream[:,:,opt.N],out,ori_stream[:,:,opt.N],train_iter,opt.showresult_num,TBGlobalWriter,'train') + + ''' + --------------------------Eval-------------------------- + ''' + if (train_iter)%5 ==0: + ori_stream,mosaic_stream,previous_frame = Videodataloader_eval.get_data() + ori_stream = data.to_tensor(ori_stream, opt.gpu_id) + mosaic_stream = data.to_tensor(mosaic_stream, opt.gpu_id) + if previous_frame is None: + previous_frame = data.to_tensor(previous_predframe_tmp, opt.gpu_id) + else: + previous_frame = data.to_tensor(previous_frame, opt.gpu_id) + with torch.no_grad(): + out = netG(mosaic_stream,previous_frame) + loss_L2 = lossfun_L2(out,ori_stream[:,:,opt.N]) * opt.lambda_L2 + loss_VGG = lossfun_VGG(out,ori_stream[:,:,opt.N]) * opt.lambda_VGG + #TBGlobalWriter.add_scalars('loss/eval', {'L2':loss_L2.item(),'VGG':loss_VGG.item()}, train_iter) + previous_predframe_tmp = out.detach().cpu().numpy() + + # Image quality evaluation + if train_iter%(opt.showresult_freq//10) == 0: + psnr,ssmi = ImageQualityEvaluation(out,ori_stream[:,:,opt.N],train_iter,TBGlobalWriter,'eval') + + # Show result + if train_iter % opt.showresult_freq == 0: + ShowImage(mosaic_stream[:,:,opt.N],out,ori_stream[:,:,opt.N],train_iter,opt.showresult_num,TBGlobalWriter,'eval') + t_end = time.time() + print('iter:{0:d} t:{1:.2f} L2:{2:.4f} vgg:{3:.4f} psnr:{4:.2f} ssmi:{5:.3f}'.format(train_iter,t_end-t_start, + loss_L2.item(),loss_VGG.item(),psnr,ssmi) ) + t_strat = time.time() + + ''' + --------------------------Test-------------------------- + ''' + if train_iter % opt.showresult_freq == 0 and os.path.isdir(opt.dataset_test): + show_imgs = [] + videos = os.listdir(opt.dataset_test) + sorted(videos) + for video in videos: + frames = os.listdir(os.path.join(opt.dataset_test,video,'image')) + sorted(frames) + for step in range(5): + mosaic_stream = [] + for i in range(opt.T): + _mosaic = impro.imread(os.path.join(opt.dataset_test,video,'image',frames[i*opt.S+step]),loadsize=opt.finesize,rgb=True) + mosaic_stream.append(_mosaic) + if step == 0: + previous = impro.imread(os.path.join(opt.dataset_test,video,'image',frames[opt.N*opt.S-1]),loadsize=opt.finesize,rgb=True) + previous = data.im2tensor(previous,bgr2rgb = False, gpu_id = opt.gpu_id, is0_1 = False) + mosaic_stream = (np.array(mosaic_stream).astype(np.float32)/255.0-0.5)/0.5 + mosaic_stream = mosaic_stream.reshape(1,opt.T,opt.finesize,opt.finesize,3).transpose((0,4,1,2,3)) + mosaic_stream = data.to_tensor(mosaic_stream, opt.gpu_id) + with torch.no_grad(): + out = netG(mosaic_stream,previous) + previous = out + show_imgs+= [data.tensor2im(mosaic_stream[:,:,opt.N],rgb2bgr = False),data.tensor2im(out,rgb2bgr = False)] + + show_img = impro.splice(show_imgs, (len(videos),2)) + TBGlobalWriter.add_image('test', show_img,train_iter,dataformats='HWC') \ No newline at end of file diff --git a/util/__init__.py b/util/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/util/clean_cache.py b/util/clean_cache.py new file mode 100644 index 0000000000000000000000000000000000000000..80c47b93d2693997cba79b3f4328d970baa572a3 --- /dev/null +++ b/util/clean_cache.py @@ -0,0 +1,51 @@ +import os +import shutil + +def findalldir(rootdir): + dir_list = [] + for root,dirs,files in os.walk(rootdir): + for dir in dirs: + dir_list.append(os.path.join(root,dir)) + return(dir_list) + +def Traversal(filedir): + file_list=[] + dir_list = [] + for root,dirs,files in os.walk(filedir): + for file in files: + file_list.append(os.path.join(root,file)) + for dir in dirs: + dir_list.append(os.path.join(root,dir)) + Traversal(dir) + return file_list,dir_list + +def is_img(path): + ext = os.path.splitext(path)[1] + ext = ext.lower() + if ext in ['.jpg','.png','.jpeg','.bmp']: + return True + else: + return False + +def is_video(path): + ext = os.path.splitext(path)[1] + ext = ext.lower() + if ext in ['.mp4','.flv','.avi','.mov','.mkv','.wmv','.rmvb']: + return True + else: + return False + +def cleanall(): + file_list,dir_list = Traversal('./') + for file in file_list: + if ('tmp' in file) | ('pth' in file)|('pycache' in file) | is_video(file) | is_img(file): + if os.path.exists(file): + if 'imgs' not in file: + os.remove(file) + print('remove file:',file) + + for dir in dir_list: + if ('tmp'in dir)|('pycache'in dir): + if os.path.exists(dir): + shutil.rmtree(dir) + print('remove dir:',dir) \ No newline at end of file diff --git a/util/data.py b/util/data.py new file mode 100644 index 0000000000000000000000000000000000000000..8a7865ef9f997cb3cc4c56fea523e9c7df59b28a --- /dev/null +++ b/util/data.py @@ -0,0 +1,160 @@ +import random +import os +from util.mosaic import get_random_parameter +import numpy as np +import torch +import torchvision.transforms as transforms +import cv2 +from . import image_processing as impro +from . import degradater + +def to_tensor(data,gpu_id): + data = torch.from_numpy(data) + if gpu_id != '-1': + data = data.cuda() + return data + +def normalize(data): + ''' + normalize to -1 ~ 1 + ''' + return (data.astype(np.float32)/255.0-0.5)/0.5 + +def anti_normalize(data): + return np.clip((data*0.5+0.5)*255,0,255).astype(np.uint8) + +def tensor2im(image_tensor, gray=False, rgb2bgr = True ,is0_1 = False, batch_index=0): + image_tensor =image_tensor.data + image_numpy = image_tensor[batch_index].cpu().float().numpy() + + if not is0_1: + image_numpy = (image_numpy + 1)/2.0 + image_numpy = np.clip(image_numpy * 255.0,0,255) + + # gray -> output 1ch + if gray: + h, w = image_numpy.shape[1:] + image_numpy = image_numpy.reshape(h,w) + return image_numpy.astype(np.uint8) + + # output 3ch + if image_numpy.shape[0] == 1: + image_numpy = np.tile(image_numpy, (3, 1, 1)) + image_numpy = image_numpy.transpose((1, 2, 0)) + if rgb2bgr and not gray: + image_numpy = image_numpy[...,::-1]-np.zeros_like(image_numpy) + return image_numpy.astype(np.uint8) + + +def im2tensor(image_numpy, gray=False,bgr2rgb = True, reshape = True, gpu_id = '-1',is0_1 = False): + if gray: + h, w = image_numpy.shape + image_numpy = (image_numpy/255.0-0.5)/0.5 + image_tensor = torch.from_numpy(image_numpy).float() + if reshape: + image_tensor = image_tensor.reshape(1,1,h,w) + else: + h, w ,ch = image_numpy.shape + if bgr2rgb: + image_numpy = image_numpy[...,::-1]-np.zeros_like(image_numpy) + if is0_1: + image_numpy = image_numpy/255.0 + else: + image_numpy = (image_numpy/255.0-0.5)/0.5 + image_numpy = image_numpy.transpose((2, 0, 1)) + image_tensor = torch.from_numpy(image_numpy).float() + if reshape: + image_tensor = image_tensor.reshape(1,ch,h,w) + if gpu_id != '-1': + image_tensor = image_tensor.cuda() + return image_tensor + +def shuffledata(data,target): + state = np.random.get_state() + np.random.shuffle(data) + np.random.set_state(state) + np.random.shuffle(target) + +def random_transform_single_mask(img,out_shape): + out_h,out_w = out_shape + img = cv2.resize(img,(int(out_w*random.uniform(1.1, 1.5)),int(out_h*random.uniform(1.1, 1.5)))) + h,w = img.shape[:2] + h_move = int((h-out_h)*random.random()) + w_move = int((w-out_w)*random.random()) + img = img[h_move:h_move+out_h,w_move:w_move+out_w] + if random.random()<0.5: + if random.random()<0.5: + img = img[:,::-1] + else: + img = img[::-1,:] + if img.shape[0] != out_h or img.shape[1]!= out_w : + img = cv2.resize(img,(out_w,out_h)) + return img + +def get_transform_params(): + crop_flag = True + rotat_flag = np.random.random()<0.2 + color_flag = True + flip_flag = np.random.random()<0.2 + degradate_flag = np.random.random()<0.5 + flag_dict = {'crop':crop_flag,'rotat':rotat_flag,'color':color_flag,'flip':flip_flag,'degradate':degradate_flag} + + crop_rate = [np.random.random(),np.random.random()] + rotat_rate = np.random.random() + color_rate = [np.random.uniform(-0.05,0.05),np.random.uniform(-0.05,0.05),np.random.uniform(-0.05,0.05), + np.random.uniform(-0.05,0.05),np.random.uniform(-0.05,0.05)] + flip_rate = np.random.random() + degradate_params = degradater.get_random_degenerate_params(mod='weaker_2') + rate_dict = {'crop':crop_rate,'rotat':rotat_rate,'color':color_rate,'flip':flip_rate,'degradate':degradate_params} + + return {'flag':flag_dict,'rate':rate_dict} + +def random_transform_single_image(img,finesize,params=None,test_flag = False): + if params is None: + params = get_transform_params() + + if params['flag']['degradate']: + img = degradater.degradate(img,params['rate']['degradate']) + + if params['flag']['crop']: + h,w = img.shape[:2] + h_move = int((h-finesize)*params['rate']['crop'][0]) + w_move = int((w-finesize)*params['rate']['crop'][1]) + img = img[h_move:h_move+finesize,w_move:w_move+finesize] + + if test_flag: + return img + + if params['flag']['rotat']: + h,w = img.shape[:2] + M = cv2.getRotationMatrix2D((w/2,h/2),90*int(4*params['rate']['rotat']),1) + img = cv2.warpAffine(img,M,(w,h)) + + if params['flag']['color']: + img = impro.color_adjust(img,params['rate']['color'][0],params['rate']['color'][1], + params['rate']['color'][2],params['rate']['color'][3],params['rate']['color'][4]) + + if params['flag']['flip']: + img = img[:,::-1] + + #check shape + if img.shape[0]!= finesize or img.shape[1]!= finesize: + img = cv2.resize(img,(finesize,finesize)) + print('warning! shape error.') + return img + +def random_transform_pair_image(img,mask,finesize,test_flag = False): + params = get_transform_params() + img = random_transform_single_image(img,finesize,params) + params['flag']['degradate'] = False + params['flag']['color'] = False + mask = random_transform_single_image(mask,finesize,params) + return img,mask + +def showresult(img1,img2,img3,name,is0_1 = False): + size = img1.shape[3] + showimg=np.zeros((size,size*3,3)) + showimg[0:size,0:size] = tensor2im(img1,rgb2bgr = False, is0_1 = is0_1) + showimg[0:size,size:size*2] = tensor2im(img2,rgb2bgr = False, is0_1 = is0_1) + showimg[0:size,size*2:size*3] = tensor2im(img3,rgb2bgr = False, is0_1 = is0_1) + cv2.imwrite(name, showimg) diff --git a/util/dataloader.py b/util/dataloader.py new file mode 100644 index 0000000000000000000000000000000000000000..334b38fa2d795a435891df611177cb6f69d0a10e --- /dev/null +++ b/util/dataloader.py @@ -0,0 +1,141 @@ +import os +import random +import numpy as np +from multiprocessing import Process, Queue +from . import image_processing as impro +from . import mosaic,data + +class VideoLoader(object): + """docstring for VideoLoader + Load a single video(Converted to images) + How to use: + 1.Init VideoLoader as loader + 2.Get data by loader.ori_stream + 3.loader.next() to get next stream + """ + def __init__(self, opt, video_dir, test_flag=False): + super(VideoLoader, self).__init__() + self.opt = opt + self.test_flag = test_flag + self.video_dir = video_dir + self.t = 0 + self.n_iter = self.opt.M -self.opt.S*(self.opt.T+1) + self.transform_params = data.get_transform_params() + self.ori_load_pool = [] + self.mosaic_load_pool = [] + self.previous_pred = None + feg_ori = impro.imread(os.path.join(video_dir,'origin_image','00001.jpg'),loadsize=self.opt.loadsize,rgb=True) + feg_mask = impro.imread(os.path.join(video_dir,'mask','00001.png'),mod='gray',loadsize=self.opt.loadsize) + self.mosaic_size,self.mod,self.rect_rat,self.feather = mosaic.get_random_parameter(feg_ori,feg_mask) + self.startpos = [random.randint(0,self.mosaic_size),random.randint(0,self.mosaic_size)] + self.loadsize = self.opt.loadsize + #Init load pool + for i in range(self.opt.S*self.opt.T): + _ori_img = impro.imread(os.path.join(video_dir,'origin_image','%05d' % (i+1)+'.jpg'),loadsize=self.loadsize,rgb=True) + _mask = impro.imread(os.path.join(video_dir,'mask','%05d' % (i+1)+'.png' ),mod='gray',loadsize=self.loadsize) + _mosaic_img = mosaic.addmosaic_base(_ori_img, _mask, self.mosaic_size,0, self.mod,self.rect_rat,self.feather,self.startpos) + _ori_img = data.random_transform_single_image(_ori_img,opt.finesize,self.transform_params) + _mosaic_img = data.random_transform_single_image(_mosaic_img,opt.finesize,self.transform_params) + + self.ori_load_pool.append(self.normalize(_ori_img)) + self.mosaic_load_pool.append(self.normalize(_mosaic_img)) + self.ori_load_pool = np.array(self.ori_load_pool) + self.mosaic_load_pool = np.array(self.mosaic_load_pool) + + #Init frist stream + self.ori_stream = self.ori_load_pool [np.linspace(0, (self.opt.T-1)*self.opt.S,self.opt.T,dtype=np.int64)].copy() + self.mosaic_stream = self.mosaic_load_pool[np.linspace(0, (self.opt.T-1)*self.opt.S,self.opt.T,dtype=np.int64)].copy() + # stream B,T,H,W,C -> B,C,T,H,W + self.ori_stream = self.ori_stream.reshape (1,self.opt.T,opt.finesize,opt.finesize,3).transpose((0,4,1,2,3)) + self.mosaic_stream = self.mosaic_stream.reshape(1,self.opt.T,opt.finesize,opt.finesize,3).transpose((0,4,1,2,3)) + + #Init frist previous frame + self.previous_pred = self.ori_load_pool[self.opt.S*self.opt.N-1].copy() + # previous B,C,H,W + self.previous_pred = self.previous_pred.reshape(1,opt.finesize,opt.finesize,3).transpose((0,3,1,2)) + + def normalize(self,data): + ''' + normalize to -1 ~ 1 + ''' + return (data.astype(np.float32)/255.0-0.5)/0.5 + + def anti_normalize(self,data): + return np.clip((data*0.5+0.5)*255,0,255).astype(np.uint8) + + def next(self): + # random + if np.random.random()<0.05: + self.startpos = [random.randint(0,self.mosaic_size),random.randint(0,self.mosaic_size)] + if np.random.random()<0.02: + self.transform_params['rate']['crop'] = [np.random.random(),np.random.random()] + if np.random.random()<0.02: + self.loadsize = np.random.randint(self.opt.finesize,self.opt.loadsize) + + if self.t != 0: + self.previous_pred = None + self.ori_load_pool [:self.opt.S*self.opt.T-1] = self.ori_load_pool [1:self.opt.S*self.opt.T] + self.mosaic_load_pool[:self.opt.S*self.opt.T-1] = self.mosaic_load_pool[1:self.opt.S*self.opt.T] + #print(os.path.join(self.video_dir,'origin_image','%05d' % (self.opt.S*self.opt.T+self.t)+'.jpg')) + _ori_img = impro.imread(os.path.join(self.video_dir,'origin_image','%05d' % (self.opt.S*self.opt.T+self.t)+'.jpg'),loadsize=self.loadsize,rgb=True) + _mask = impro.imread(os.path.join(self.video_dir,'mask','%05d' % (self.opt.S*self.opt.T+self.t)+'.png' ),mod='gray',loadsize=self.loadsize) + _mosaic_img = mosaic.addmosaic_base(_ori_img, _mask, self.mosaic_size,0, self.mod,self.rect_rat,self.feather,self.startpos) + _ori_img = data.random_transform_single_image(_ori_img,self.opt.finesize,self.transform_params) + _mosaic_img = data.random_transform_single_image(_mosaic_img,self.opt.finesize,self.transform_params) + + _ori_img,_mosaic_img = self.normalize(_ori_img),self.normalize(_mosaic_img) + self.ori_load_pool [self.opt.S*self.opt.T-1] = _ori_img + self.mosaic_load_pool[self.opt.S*self.opt.T-1] = _mosaic_img + + self.ori_stream = self.ori_load_pool [np.linspace(0, (self.opt.T-1)*self.opt.S,self.opt.T,dtype=np.int64)].copy() + self.mosaic_stream = self.mosaic_load_pool[np.linspace(0, (self.opt.T-1)*self.opt.S,self.opt.T,dtype=np.int64)].copy() + + # stream B,T,H,W,C -> B,C,T,H,W + self.ori_stream = self.ori_stream.reshape (1,self.opt.T,self.opt.finesize,self.opt.finesize,3).transpose((0,4,1,2,3)) + self.mosaic_stream = self.mosaic_stream.reshape(1,self.opt.T,self.opt.finesize,self.opt.finesize,3).transpose((0,4,1,2,3)) + + self.t += 1 + +class VideoDataLoader(object): + """VideoDataLoader""" + def __init__(self, opt, videolist, test_flag=False): + super(VideoDataLoader, self).__init__() + self.videolist = [] + self.opt = opt + self.test_flag = test_flag + for i in range(self.opt.n_epoch): + self.videolist += videolist.copy() + random.shuffle(self.videolist) + self.each_video_n_iter = self.opt.M -self.opt.S*(self.opt.T+1) + self.n_iter = len(self.videolist)//self.opt.load_thread//self.opt.batchsize*self.each_video_n_iter*self.opt.load_thread + self.queue = Queue(self.opt.load_thread) + self.ori_stream = np.zeros((self.opt.batchsize,3,self.opt.T,self.opt.finesize,self.opt.finesize),dtype=np.float32)# B,C,T,H,W + self.mosaic_stream = np.zeros((self.opt.batchsize,3,self.opt.T,self.opt.finesize,self.opt.finesize),dtype=np.float32)# B,C,T,H,W + self.previous_pred = np.zeros((self.opt.batchsize,3,self.opt.finesize,self.opt.finesize),dtype=np.float32) + self.load_init() + + def load(self,videolist): + for load_video_iter in range(len(videolist)//self.opt.batchsize): + iter_videolist = videolist[load_video_iter*self.opt.batchsize:(load_video_iter+1)*self.opt.batchsize] + videoloaders = [VideoLoader(self.opt,os.path.join(self.opt.dataset,iter_videolist[i]),self.test_flag) for i in range(self.opt.batchsize)] + for each_video_iter in range(self.each_video_n_iter): + for i in range(self.opt.batchsize): + self.ori_stream[i] = videoloaders[i].ori_stream + self.mosaic_stream[i] = videoloaders[i].mosaic_stream + if each_video_iter == 0: + self.previous_pred[i] = videoloaders[i].previous_pred + videoloaders[i].next() + if each_video_iter == 0: + self.queue.put([self.ori_stream.copy(),self.mosaic_stream.copy(),self.previous_pred]) + else: + self.queue.put([self.ori_stream.copy(),self.mosaic_stream.copy(),None]) + + def load_init(self): + ptvn = len(self.videolist)//self.opt.load_thread #pre_thread_video_num + for i in range(self.opt.load_thread): + p = Process(target=self.load,args=(self.videolist[i*ptvn:(i+1)*ptvn],)) + p.daemon = True + p.start() + + def get_data(self): + return self.queue.get() \ No newline at end of file diff --git a/util/degradater.py b/util/degradater.py new file mode 100644 index 0000000000000000000000000000000000000000..9e4d227ffdf62f9501a96acd3075c46ecebc6a75 --- /dev/null +++ b/util/degradater.py @@ -0,0 +1,119 @@ +''' +https://github.com/sonack/GFRNet_pytorch_new +''' +import random +import cv2 +import numpy as np + +def gaussian_blur(img, sigma=3, size=13): + if sigma > 0: + if isinstance(size, int): + size = (size, size) + img = cv2.GaussianBlur(img, size, sigma) + return img + +def down(img, scale, shape): + if scale > 1: + h, w, _ = shape + scaled_h, scaled_w = int(h / scale), int(w / scale) + img = cv2.resize(img, (scaled_w, scaled_h), interpolation = cv2.INTER_CUBIC) + return img + +def up(img, scale, shape): + if scale > 1: + h, w, _ = shape + img = cv2.resize(img, (w, h), interpolation = cv2.INTER_CUBIC) + return img + +def awgn(img, level): + if level > 0: + noise = np.random.randn(*img.shape) * level + img = (img + noise).clip(0,255).astype(np.uint8) + return img + +def jpeg_compressor(img,quality): + if quality > 0: # 0 indicating no lossy compression (i.e losslessly compression) + encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), quality] + img = cv2.imdecode(cv2.imencode('.jpg', img, encode_param)[1], 1) + return img + +def get_random_degenerate_params(mod='strong'): + ''' + mod : strong | only_downsample | only_4x | weaker_1 | weaker_2 + ''' + params = {} + gaussianBlur_size_list = list(range(3,14,2)) + + if mod == 'strong': + gaussianBlur_sigma_list = [1 + x for x in range(3)] + gaussianBlur_sigma_list += [0] + downsample_scale_list = [1 + x * 0.1 for x in range(0,71)] + awgn_level_list = list(range(1, 8, 1)) + jpeg_quality_list = list(range(10, 41, 1)) + jpeg_quality_list += int(len(jpeg_quality_list) * 0.33) * [0] + + elif mod == 'only_downsample': + gaussianBlur_sigma_list = [0] + downsample_scale_list = [1 + x * 0.1 for x in range(0,71)] + awgn_level_list = [0] + jpeg_quality_list = [0] + + elif mod == 'only_4x': + gaussianBlur_sigma_list = [0] + downsample_scale_list = [4] + awgn_level_list = [0] + jpeg_quality_list = [0] + + elif mod == 'weaker_1': # 0.5 trigger prob + gaussianBlur_sigma_list = [1 + x for x in range(3)] + gaussianBlur_sigma_list += int(len(gaussianBlur_sigma_list)) * [0] # 1/2 trigger this degradation + + downsample_scale_list = [1 + x * 0.1 for x in range(0,71)] + downsample_scale_list += int(len(downsample_scale_list)) * [1] + + awgn_level_list = list(range(1, 8, 1)) + awgn_level_list += int(len(awgn_level_list)) * [0] + + jpeg_quality_list = list(range(10, 41, 1)) + jpeg_quality_list += int(len(jpeg_quality_list)) * [0] + + elif mod == 'weaker_2': # weaker than weaker_1, jpeg [20,40] + gaussianBlur_sigma_list = [1 + x for x in range(3)] + gaussianBlur_sigma_list += int(len(gaussianBlur_sigma_list)) * [0] # 1/2 trigger this degradation + + downsample_scale_list = [1 + x * 0.1 for x in range(0,71)] + downsample_scale_list += int(len(downsample_scale_list)) * [1] + + awgn_level_list = list(range(1, 8, 1)) + awgn_level_list += int(len(awgn_level_list)) * [0] + + jpeg_quality_list = list(range(20, 41, 1)) + jpeg_quality_list += int(len(jpeg_quality_list)) * [0] + + params['blur_sigma'] = random.choice(gaussianBlur_sigma_list) + params['blur_size'] = random.choice(gaussianBlur_size_list) + params['updown_scale'] = random.choice(downsample_scale_list) + params['awgn_level'] = random.choice(awgn_level_list) + params['jpeg_quality'] = random.choice(jpeg_quality_list) + + return params + +def degradate(img,params,jpeg_last = True): + shape = img.shape + if not params: + params = get_random_degenerate_params('original') + + if jpeg_last: + img = gaussian_blur(img,params['blur_sigma'],params['blur_size']) + img = down(img,params['updown_scale'],shape) + img = awgn(img,params['awgn_level']) + img = up(img,params['updown_scale'],shape) + img = jpeg_compressor(img,params['jpeg_quality']) + else: + img = gaussian_blur(img,params['blur_sigma'],params['blur_size']) + img = down(img,params['updown_scale'],shape) + img = awgn(img,params['awgn_level']) + img = jpeg_compressor(img,params['jpeg_quality']) + img = up(img,params['updown_scale'],shape) + + return img \ No newline at end of file diff --git a/util/ffmpeg.py b/util/ffmpeg.py new file mode 100644 index 0000000000000000000000000000000000000000..6efd686d1dac7f92cec549f0ce27eb3c634a2595 --- /dev/null +++ b/util/ffmpeg.py @@ -0,0 +1,92 @@ +import os,json +import subprocess +# ffmpeg 3.4.6 + +def args2cmd(args): + cmd = '' + for arg in args: + cmd += (arg+' ') + return cmd + +def run(args,mode = 0): + + if mode == 0: + cmd = args2cmd(args) + os.system(cmd) + + elif mode == 1: + ''' + out_string = os.popen(cmd_str).read() + For chinese path in Windows + https://blog.csdn.net/weixin_43903378/article/details/91979025 + ''' + cmd = args2cmd(args) + stream = os.popen(cmd)._stream + sout = stream.buffer.read().decode(encoding='utf-8') + return sout + + elif mode == 2: + cmd = args2cmd(args) + p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + sout = p.stdout.readlines() + return sout + +def video2image(videopath, imagepath, fps=0, start_time='00:00:00', last_time='00:00:00'): + args = ['ffmpeg'] + if last_time != '00:00:00': + args += ['-ss', start_time] + args += ['-t', last_time] + args += ['-i', '"'+videopath+'"'] + if fps != 0: + args += ['-r', str(fps)] + args += ['-f', 'image2','-q:v','-0',imagepath] + run(args) + +def video2voice(videopath, voicepath, start_time='00:00:00', last_time='00:00:00'): + args = ['ffmpeg', '-i', '"'+videopath+'"','-async 1 -f mp3','-b:a 320k'] + if last_time != '00:00:00': + args += ['-ss', start_time] + args += ['-t', last_time] + args += [voicepath] + run(args) + +def image2video(fps,imagepath,voicepath,videopath): + os.system('ffmpeg -y -r '+str(fps)+' -i '+imagepath+' -vcodec libx264 '+os.path.split(voicepath)[0]+'/video_tmp.mp4') + if os.path.exists(voicepath): + os.system('ffmpeg -i '+os.path.split(voicepath)[0]+'/video_tmp.mp4'+' -i "'+voicepath+'" -vcodec copy -acodec aac '+videopath) + else: + os.system('ffmpeg -i '+os.path.split(voicepath)[0]+'/video_tmp.mp4 '+videopath) + +def get_video_infos(videopath): + args = ['ffprobe -v quiet -print_format json -show_format -show_streams', '-i', '"'+videopath+'"'] + out_string = run(args,mode=1) + infos = json.loads(out_string) + try: + fps = eval(infos['streams'][0]['avg_frame_rate']) + endtime = float(infos['format']['duration']) + width = int(infos['streams'][0]['width']) + height = int(infos['streams'][0]['height']) + except Exception as e: + fps = eval(infos['streams'][1]['r_frame_rate']) + endtime = float(infos['format']['duration']) + width = int(infos['streams'][1]['width']) + height = int(infos['streams'][1]['height']) + + return fps,endtime,height,width + +def cut_video(in_path,start_time,last_time,out_path,vcodec='h265'): + if vcodec == 'copy': + os.system('ffmpeg -ss '+start_time+' -t '+last_time+' -i "'+in_path+'" -vcodec copy -acodec copy '+out_path) + elif vcodec == 'h264': + os.system('ffmpeg -ss '+start_time+' -t '+last_time+' -i "'+in_path+'" -vcodec libx264 -b 12M '+out_path) + elif vcodec == 'h265': + os.system('ffmpeg -ss '+start_time+' -t '+last_time+' -i "'+in_path+'" -vcodec libx265 -b 12M '+out_path) + +def continuous_screenshot(videopath,savedir,fps): + ''' + videopath: input video path + savedir: images will save here + fps: save how many images per second + ''' + videoname = os.path.splitext(os.path.basename(videopath))[0] + os.system('ffmpeg -i "'+videopath+'" -vf fps='+str(fps)+' -q:v -0 '+savedir+'/'+videoname+'_%06d.jpg') diff --git a/util/filt.py b/util/filt.py new file mode 100644 index 0000000000000000000000000000000000000000..b99d6e1a100927c4ef6decd1db468b16cc69f73c --- /dev/null +++ b/util/filt.py @@ -0,0 +1,77 @@ +import numpy as np + +def less_zero(arr,num = 7): + index = np.linspace(0,len(arr)-1,len(arr),dtype='int') + cnt = 0 + for i in range(2,len(arr)-2): + if arr[i] != 0: + arr[i] = arr[i] + if cnt != 0: + if cnt <= num*2: + arr[i-cnt:round(i-cnt/2)] = arr[i-cnt-1-2] + arr[round(i-cnt/2):i] = arr[i+2] + index[i-cnt:round(i-cnt/2)] = i-cnt-1-2 + index[round(i-cnt/2):i] = i+2 + else: + arr[i-cnt:i-cnt+num] = arr[i-cnt-1-2] + arr[i-num:i] = arr[i+2] + index[i-cnt:i-cnt+num] = i-cnt-1-2 + index[i-num:i] = i+2 + cnt = 0 + else: + cnt += 1 + return arr,index + +def medfilt(data,window): + if window%2 == 0 or window < 0: + print('Error: the medfilt window must be even number') + exit(0) + pad = int((window-1)/2) + pad_data = np.zeros(len(data)+window-1, dtype = type(data[0])) + result = np.zeros(len(data),dtype = type(data[0])) + pad_data[pad:pad+len(data)]=data[:] + for i in range(len(data)): + result[i] = np.median(pad_data[i:i+window]) + return result + +def position_medfilt(positions,window): + + x,mask_index = less_zero(positions[:,0],window) + y = less_zero(positions[:,1],window)[0] + area = less_zero(positions[:,2],window)[0] + x_filt = medfilt(x, window) + y_filt = medfilt(y, window) + area_filt = medfilt(area, window) + cnt = 0 + for i in range(1,len(x)): + if 0.8original + ''' + if system_type == 'Linux': + if mod == 'normal': + img = cv2.imread(file_path,1) + elif mod == 'gray': + img = cv2.imread(file_path,0) + elif mod == 'all': + img = cv2.imread(file_path,-1) + + #In windows, for chinese path, use cv2.imdecode insteaded. + #It will loss EXIF, I can't fix it + else: + if mod == 'normal': + img = cv2.imdecode(np.fromfile(file_path,dtype=np.uint8),1) + elif mod == 'gray': + img = cv2.imdecode(np.fromfile(file_path,dtype=np.uint8),0) + elif mod == 'all': + img = cv2.imdecode(np.fromfile(file_path,dtype=np.uint8),-1) + + if loadsize != 0: + img = resize(img, loadsize, interpolation=cv2.INTER_CUBIC) + + if rgb and img.ndim==3: + img = img[:,:,::-1] + + return img + +def imwrite(file_path,img,use_thread=False): + ''' + in other to save chinese path images in windows, + this fun just for save final output images + ''' + def subfun(file_path,img): + if system_type == 'Linux': + cv2.imwrite(file_path, img) + else: + cv2.imencode('.jpg', img)[1].tofile(file_path) + if use_thread: + t = Thread(target=subfun,args=(file_path, img,)) + t.daemon() + t.start + else: + subfun(file_path,img) + +def resize(img,size,interpolation=cv2.INTER_LINEAR): + ''' + cv2.INTER_NEAREST      最邻近插值点法 + cv2.INTER_LINEAR        双线性插值法 + cv2.INTER_AREA         邻域像素再取样插补 + cv2.INTER_CUBIC        双立方插补,4*4大小的补点 + cv2.INTER_LANCZOS4 8x8像素邻域的Lanczos插值 + ''' + h, w = img.shape[:2] + if np.min((w,h)) ==size: + return img + if w >= h: + res = cv2.resize(img,(int(size*w/h), size),interpolation=interpolation) + else: + res = cv2.resize(img,(size, int(size*h/w)),interpolation=interpolation) + return res + +def resize_like(img,img_like): + h, w = img_like.shape[:2] + img = cv2.resize(img, (w,h)) + return img + +def ch_one2three(img): + res = cv2.merge([img, img, img]) + return res + +def color_adjust(img,alpha=0,beta=0,b=0,g=0,r=0,ran = False): + ''' + g(x) = (1+α)g(x)+255*β, + g(x) = g(x[:+b*255,:+g*255,:+r*255]) + + Args: + img : input image + alpha : contrast + beta : brightness + b : blue hue + g : green hue + r : red hue + ran : if True, randomly generated color correction parameters + Retuens: + img : output image + ''' + img = img.astype('float') + if ran: + alpha = random.uniform(-0.1,0.1) + beta = random.uniform(-0.1,0.1) + b = random.uniform(-0.05,0.05) + g = random.uniform(-0.05,0.05) + r = random.uniform(-0.05,0.05) + img = (1+alpha)*img+255.0*beta + bgr = [b*255.0,g*255.0,r*255.0] + for i in range(3): img[:,:,i]=img[:,:,i]+bgr[i] + + return (np.clip(img,0,255)).astype('uint8') + +def CAdaIN(src,dst): + ''' + make src has dst's style + ''' + return np.std(dst)*((src-np.mean(src))/np.std(src))+np.mean(dst) + +def makedataset(target_image,orgin_image): + target_image = resize(target_image,256) + orgin_image = resize(orgin_image,256) + img = np.zeros((256,512,3), dtype = "uint8") + w = orgin_image.shape[1] + img[0:256,0:256] = target_image[0:256,int(w/2-256/2):int(w/2+256/2)] + img[0:256,256:512] = orgin_image[0:256,int(w/2-256/2):int(w/2+256/2)] + return img + +def find_mostlikely_ROI(mask): + contours,hierarchy=cv2.findContours(mask, cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE) + if len(contours)>0: + areas = [] + for contour in contours: + areas.append(cv2.contourArea(contour)) + index = areas.index(max(areas)) + mask = np.zeros_like(mask) + mask = cv2.fillPoly(mask,[contours[index]],(255)) + return mask + +def boundingSquare(mask,Ex_mul): + # thresh = mask_threshold(mask,10,threshold) + area = mask_area(mask) + if area == 0 : + return 0,0,0,0 + + x,y,w,h = cv2.boundingRect(mask) + + center = np.array([int(x+w/2),int(y+h/2)]) + size = max(w,h) + point0=np.array([x,y]) + point1=np.array([x+size,y+size]) + + h, w = mask.shape[:2] + if size*Ex_mul > min(h, w): + size = min(h, w) + halfsize = int(min(h, w)/2) + else: + size = Ex_mul*size + halfsize = int(size/2) + size = halfsize*2 + point0 = center - halfsize + point1 = center + halfsize + if point0[0]<0: + point0[0]=0 + point1[0]=size + if point0[1]<0: + point0[1]=0 + point1[1]=size + if point1[0]>w: + point1[0]=w + point0[0]=w-size + if point1[1]>h: + point1[1]=h + point0[1]=h-size + center = ((point0+point1)/2).astype('int') + return center[0],center[1],halfsize,area + +def mask_threshold(mask,ex_mun,threshold): + mask = cv2.threshold(mask,threshold,255,cv2.THRESH_BINARY)[1] + mask = cv2.blur(mask, (ex_mun, ex_mun)) + mask = cv2.threshold(mask,threshold/5,255,cv2.THRESH_BINARY)[1] + return mask + +def mask_area(mask): + mask = cv2.threshold(mask,127,255,0)[1] + # contours= cv2.findContours(mask,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)[1] #for opencv 3.4 + contours= cv2.findContours(mask,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)[0]#updata to opencv 4.0 + try: + area = cv2.contourArea(contours[0]) + except: + area = 0 + return area + +def replace_mosaic(img_origin,img_fake,mask,x,y,size,no_feather): + img_fake = cv2.resize(img_fake,(size*2,size*2),interpolation=cv2.INTER_CUBIC) + if no_feather: + img_origin[y-size:y+size,x-size:x+size]=img_fake + return img_origin + else: + # #color correction + # RGB_origin = img_origin[y-size:y+size,x-size:x+size].mean(0).mean(0) + # RGB_fake = img_fake.mean(0).mean(0) + # for i in range(3):img_fake[:,:,i] = np.clip(img_fake[:,:,i]+RGB_origin[i]-RGB_fake[i],0,255) + #eclosion + eclosion_num = int(size/10)+2 + + mask_crop = cv2.resize(mask,(img_origin.shape[1],img_origin.shape[0]))[y-size:y+size,x-size:x+size] + mask_crop = ch_one2three(mask_crop) + + mask_crop = (cv2.blur(mask_crop, (eclosion_num, eclosion_num))) + mask_crop = mask_crop/255.0 + + img_crop = img_origin[y-size:y+size,x-size:x+size] + img_origin[y-size:y+size,x-size:x+size] = np.clip((img_crop*(1-mask_crop)+img_fake*mask_crop),0,255).astype('uint8') + + return img_origin + + +def Q_lapulase(resImg): + ''' + Evaluate image quality + score > 20 normal + score > 50 clear + ''' + img2gray = cv2.cvtColor(resImg, cv2.COLOR_BGR2GRAY) + img2gray = resize(img2gray,512) + res = cv2.Laplacian(img2gray, cv2.CV_64F) + score = res.var() + return score + +def psnr(img1,img2): + mse = np.mean((img1/255.0-img2/255.0)**2) + if mse < 1e-10: + return 100 + psnr_v = 20*np.log10(1/np.sqrt(mse)) + return psnr_v + +def splice(imgs,splice_shape): + '''Stitching multiple images, all imgs must have the same size + imgs : [img1,img2,img3,img4] + splice_shape: (2,2) + ''' + h,w,ch = imgs[0].shape + output = np.zeros((h*splice_shape[0],w*splice_shape[1],ch),np.uint8) + cnt = 0 + for i in range(splice_shape[0]): + for j in range(splice_shape[1]): + if cnt < len(imgs): + output[h*i:h*(i+1),w*j:w*(j+1)] = imgs[cnt] + cnt += 1 + return output + diff --git a/util/mosaic.py b/util/mosaic.py new file mode 100644 index 0000000000000000000000000000000000000000..c76a24841e285c5d0d0b60f9f708602dd5c4b8ad --- /dev/null +++ b/util/mosaic.py @@ -0,0 +1,164 @@ +import cv2 +import numpy as np +import os +import random +from .image_processing import resize,ch_one2three,mask_area + +def addmosaic(img,mask,opt): + if opt.mosaic_mod == 'random': + img = addmosaic_random(img,mask) + elif opt.mosaic_size == 0: + img = addmosaic_autosize(img, mask, opt.mosaic_mod) + else: + img = addmosaic_base(img,mask,opt.mosaic_size,opt.output_size,model = opt.mosaic_mod) + return img + +def addmosaic_base(img,mask,n,out_size = 0,model = 'squa_avg',rect_rat = 1.6,feather=0,start_point=[0,0]): + ''' + img: input image + mask: input mask + n: mosaic size + out_size: output size 0->original + model : squa_avg squa_mid squa_random squa_avg_circle_edge rect_avg + rect_rat: if model==rect_avg , mosaic w/h=rect_rat + feather : feather size, -1->no 0->auto + start_point : [0,0], please not input this parameter + ''' + n = int(n) + + h_start = np.clip(start_point[0], 0, n) + w_start = np.clip(start_point[1], 0, n) + pix_mid_h = n//2+h_start + pix_mid_w = n//2+w_start + h, w = img.shape[:2] + h_step = (h-h_start)//n + w_step = (w-w_start)//n + if out_size: + img = resize(img,out_size) + if mask.shape[0] != h: + mask = cv2.resize(mask,(w,h)) + img_mosaic = img.copy() + + if model=='squa_avg': + for i in range(h_step): + for j in range(w_step): + if mask[i*n+pix_mid_h,j*n+pix_mid_w]: + img_mosaic[i*n+h_start:(i+1)*n+h_start,j*n+w_start:(j+1)*n+w_start,:]=\ + img[i*n+h_start:(i+1)*n+h_start,j*n+w_start:(j+1)*n+w_start,:].mean(axis=(0,1)) + + elif model=='squa_mid': + for i in range(h_step): + for j in range(w_step): + if mask[i*n+pix_mid_h,j*n+pix_mid_w]: + img_mosaic[i*n+h_start:(i+1)*n+h_start,j*n+w_start:(j+1)*n+w_start,:]=\ + img[i*n+n//2+h_start,j*n+n//2+w_start,:] + + elif model == 'squa_random': + for i in range(h_step): + for j in range(w_step): + if mask[i*n+pix_mid_h,j*n+pix_mid_w]: + img_mosaic[i*n+h_start:(i+1)*n+h_start,j*n+w_start:(j+1)*n+w_start,:]=\ + img[h_start+int(i*n-n/2+n*random.random()),w_start+int(j*n-n/2+n*random.random()),:] + + elif model == 'squa_avg_circle_edge': + for i in range(h_step): + for j in range(w_step): + img_mosaic[i*n+h_start:(i+1)*n+h_start,j*n+w_start:(j+1)*n+w_start,:]=\ + img[i*n+h_start:(i+1)*n+h_start,j*n+w_start:(j+1)*n+w_start,:].mean(axis=(0,1)) + mask = cv2.threshold(mask,127,255,cv2.THRESH_BINARY)[1] + _mask = ch_one2three(mask) + mask_inv = cv2.bitwise_not(_mask) + imgroi1 = cv2.bitwise_and(_mask,img_mosaic) + imgroi2 = cv2.bitwise_and(mask_inv,img) + img_mosaic = cv2.add(imgroi1,imgroi2) + + elif model =='rect_avg': + n_h = n + n_w = int(n*rect_rat) + n_h_half = n_h//2+h_start + n_w_half = n_w//2+w_start + for i in range((h-h_start)//n_h): + for j in range((w-w_start)//n_w): + if mask[i*n_h+n_h_half,j*n_w+n_w_half]: + img_mosaic[i*n_h+h_start:(i+1)*n_h+h_start,j*n_w+w_start:(j+1)*n_w+w_start,:]=\ + img[i*n_h+h_start:(i+1)*n_h+h_start,j*n_w+w_start:(j+1)*n_w+w_start,:].mean(axis=(0,1)) + + if feather != -1: + if feather==0: + mask = (cv2.blur(mask, (n, n))) + else: + mask = (cv2.blur(mask, (feather, feather))) + mask = mask/255.0 + for i in range(3):img_mosaic[:,:,i] = (img[:,:,i]*(1-mask)+img_mosaic[:,:,i]*mask) + img_mosaic = img_mosaic.astype(np.uint8) + + return img_mosaic + +def get_autosize(img,mask,area_type = 'normal'): + h,w = img.shape[:2] + size = np.min([h,w]) + mask = resize(mask,size) + alpha = size/512 + try: + if area_type == 'normal': + area = mask_area(mask) + elif area_type == 'bounding': + w,h = cv2.boundingRect(mask)[2:] + area = w*h + except: + area = 0 + area = area/(alpha*alpha) + if area>50000: + size = alpha*((area-50000)/50000+12) + elif 20000