| | import os, subprocess, glob, pandas, tqdm, cv2, numpy
|
| | from scipy.io import wavfile
|
| |
|
| | def init_args(args):
|
| |
|
| | args.modelSavePath = os.path.join(args.savePath, 'model')
|
| | args.scoreSavePath = os.path.join(args.savePath, 'score.txt')
|
| | args.trialPathAVA = os.path.join(args.dataPathAVA, 'csv')
|
| | args.audioOrigPathAVA = os.path.join(args.dataPathAVA, 'orig_audios')
|
| | args.visualOrigPathAVA= os.path.join(args.dataPathAVA, 'orig_videos')
|
| | args.audioPathAVA = os.path.join(args.dataPathAVA, 'clips_audios')
|
| | args.visualPathAVA = os.path.join(args.dataPathAVA, 'clips_videos')
|
| | args.trainTrialAVA = os.path.join(args.trialPathAVA, 'train_loader.csv')
|
| |
|
| | if args.evalDataType == 'val':
|
| | args.evalTrialAVA = os.path.join(args.trialPathAVA, 'val_loader.csv')
|
| | args.evalOrig = os.path.join(args.trialPathAVA, 'val_orig.csv')
|
| | args.evalCsvSave = os.path.join(args.savePath, 'val_res.csv')
|
| | else:
|
| | args.evalTrialAVA = os.path.join(args.trialPathAVA, 'test_loader.csv')
|
| | args.evalOrig = os.path.join(args.trialPathAVA, 'test_orig.csv')
|
| | args.evalCsvSave = os.path.join(args.savePath, 'test_res.csv')
|
| |
|
| | os.makedirs(args.modelSavePath, exist_ok = True)
|
| | os.makedirs(args.dataPathAVA, exist_ok = True)
|
| | return args
|
| |
|
| |
|
| | def download_pretrain_model_AVA():
|
| | if os.path.isfile('pretrain_AVA.model') == False:
|
| | Link = "1NVIkksrD3zbxbDuDbPc_846bLfPSZcZm"
|
| | cmd = "gdown --id %s -O %s"%(Link, 'pretrain_AVA.model')
|
| | subprocess.call(cmd, shell=True, stdout=None)
|
| |
|
| | def preprocess_AVA(args):
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | download_csv(args)
|
| | download_videos(args)
|
| | extract_audio(args)
|
| | extract_audio_clips(args)
|
| | extract_video_clips(args)
|
| |
|
| | def download_csv(args):
|
| |
|
| | Link = "1C1cGxPHaJAl1NQ2i7IhRgWmdvsPhBCUy"
|
| | cmd = "gdown --id %s -O %s"%(Link, args.dataPathAVA + '/csv.tar.gz')
|
| | subprocess.call(cmd, shell=True, stdout=None)
|
| | cmd = "tar -xzvf %s -C %s"%(args.dataPathAVA + '/csv.tar.gz', args.dataPathAVA)
|
| | subprocess.call(cmd, shell=True, stdout=None)
|
| | os.remove(args.dataPathAVA + '/csv.tar.gz')
|
| |
|
| | def download_videos(args):
|
| |
|
| | for dataType in ['trainval', 'test']:
|
| | fileList = open('%s/%s_file_list.txt'%(args.trialPathAVA, dataType)).read().splitlines()
|
| | outFolder = '%s/%s'%(args.visualOrigPathAVA, dataType)
|
| | for fileName in fileList:
|
| | cmd = "wget -P %s https://s3.amazonaws.com/ava-dataset/%s/%s"%(outFolder, dataType, fileName)
|
| | subprocess.call(cmd, shell=True, stdout=None)
|
| |
|
| | def extract_audio(args):
|
| |
|
| | for dataType in ['trainval', 'test']:
|
| | inpFolder = '%s/%s'%(args.visualOrigPathAVA, dataType)
|
| | outFolder = '%s/%s'%(args.audioOrigPathAVA, dataType)
|
| | os.makedirs(outFolder, exist_ok = True)
|
| | videos = glob.glob("%s/*"%(inpFolder))
|
| | for videoPath in tqdm.tqdm(videos):
|
| | audioPath = '%s/%s'%(outFolder, videoPath.split('/')[-1].split('.')[0] + '.wav')
|
| | cmd = ("ffmpeg -y -i %s -async 1 -ac 1 -vn -acodec pcm_s16le -ar 16000 -threads 8 %s -loglevel panic" % (videoPath, audioPath))
|
| | subprocess.call(cmd, shell=True, stdout=None)
|
| |
|
| |
|
| | def extract_audio_clips(args):
|
| |
|
| | dic = {'train':'trainval', 'val':'trainval', 'test':'test'}
|
| | for dataType in ['train', 'val', 'test']:
|
| | df = pandas.read_csv(os.path.join(args.trialPathAVA, '%s_orig.csv'%(dataType)), engine='python')
|
| | dfNeg = pandas.concat([df[df['label_id'] == 0], df[df['label_id'] == 2]])
|
| | dfPos = df[df['label_id'] == 1]
|
| | insNeg = dfNeg['instance_id'].unique().tolist()
|
| | insPos = dfPos['instance_id'].unique().tolist()
|
| | df = pandas.concat([dfPos, dfNeg]).reset_index(drop=True)
|
| | df = df.sort_values(['entity_id', 'frame_timestamp']).reset_index(drop=True)
|
| | entityList = df['entity_id'].unique().tolist()
|
| | df = df.groupby('entity_id')
|
| | audioFeatures = {}
|
| | outDir = os.path.join(args.audioPathAVA, dataType)
|
| | audioDir = os.path.join(args.audioOrigPathAVA, dic[dataType])
|
| | for l in df['video_id'].unique().tolist():
|
| | d = os.path.join(outDir, l[0])
|
| | if not os.path.isdir(d):
|
| | os.makedirs(d)
|
| | for entity in tqdm.tqdm(entityList, total = len(entityList)):
|
| | insData = df.get_group(entity)
|
| | videoKey = insData.iloc[0]['video_id']
|
| | start = insData.iloc[0]['frame_timestamp']
|
| | end = insData.iloc[-1]['frame_timestamp']
|
| | entityID = insData.iloc[0]['entity_id']
|
| | insPath = os.path.join(outDir, videoKey, entityID+'.wav')
|
| | if videoKey not in audioFeatures.keys():
|
| | audioFile = os.path.join(audioDir, videoKey+'.wav')
|
| | sr, audio = wavfile.read(audioFile)
|
| | audioFeatures[videoKey] = audio
|
| | audioStart = int(float(start)*sr)
|
| | audioEnd = int(float(end)*sr)
|
| | audioData = audioFeatures[videoKey][audioStart:audioEnd]
|
| | wavfile.write(insPath, sr, audioData)
|
| |
|
| | def extract_video_clips(args):
|
| |
|
| |
|
| |
|
| |
|
| | dic = {'train':'trainval', 'val':'trainval', 'test':'test'}
|
| | for dataType in ['train', 'val', 'test']:
|
| | df = pandas.read_csv(os.path.join(args.trialPathAVA, '%s_orig.csv'%(dataType)))
|
| | dfNeg = pandas.concat([df[df['label_id'] == 0], df[df['label_id'] == 2]])
|
| | dfPos = df[df['label_id'] == 1]
|
| | insNeg = dfNeg['instance_id'].unique().tolist()
|
| | insPos = dfPos['instance_id'].unique().tolist()
|
| | df = pandas.concat([dfPos, dfNeg]).reset_index(drop=True)
|
| | df = df.sort_values(['entity_id', 'frame_timestamp']).reset_index(drop=True)
|
| | entityList = df['entity_id'].unique().tolist()
|
| | df = df.groupby('entity_id')
|
| | outDir = os.path.join(args.visualPathAVA, dataType)
|
| | audioDir = os.path.join(args.visualOrigPathAVA, dic[dataType])
|
| | for l in df['video_id'].unique().tolist():
|
| | d = os.path.join(outDir, l[0])
|
| | if not os.path.isdir(d):
|
| | os.makedirs(d)
|
| | for entity in tqdm.tqdm(entityList, total = len(entityList)):
|
| | insData = df.get_group(entity)
|
| | videoKey = insData.iloc[0]['video_id']
|
| | entityID = insData.iloc[0]['entity_id']
|
| | videoDir = os.path.join(args.visualOrigPathAVA, dic[dataType])
|
| | videoFile = glob.glob(os.path.join(videoDir, '{}.*'.format(videoKey)))[0]
|
| | V = cv2.VideoCapture(videoFile)
|
| | insDir = os.path.join(os.path.join(outDir, videoKey, entityID))
|
| | if not os.path.isdir(insDir):
|
| | os.makedirs(insDir)
|
| | j = 0
|
| | for _, row in insData.iterrows():
|
| | imageFilename = os.path.join(insDir, str("%.2f"%row['frame_timestamp'])+'.jpg')
|
| | V.set(cv2.CAP_PROP_POS_MSEC, row['frame_timestamp'] * 1e3)
|
| | _, frame = V.read()
|
| | h = numpy.size(frame, 0)
|
| | w = numpy.size(frame, 1)
|
| | x1 = int(row['entity_box_x1'] * w)
|
| | y1 = int(row['entity_box_y1'] * h)
|
| | x2 = int(row['entity_box_x2'] * w)
|
| | y2 = int(row['entity_box_y2'] * h)
|
| | face = frame[y1:y2, x1:x2, :]
|
| | j = j+1
|
| | cv2.imwrite(imageFilename, face)
|
| |
|