| import os.path |
| import sys |
|
|
| sys.path.append('..') |
|
|
| import options |
|
|
| import cv2 |
| import dlib |
| import numpy as np |
| import options as opt |
| import matplotlib.pyplot as plt |
|
|
| from tqdm.auto import tqdm |
| from multiprocessing import Pool |
|
|
| predictor_path = '../pretrain/shape_predictor_68_face_landmarks.dat' |
| predictor = dlib.shape_predictor(predictor_path) |
| detector = dlib.get_frontal_face_detector() |
|
|
| RUN_PARALLEL = True |
| FORCE_RATIO = True |
| BORDER = 10 |
|
|
| base = os.path.abspath('..') |
| image_dir = os.path.join(base, options.images_dir) |
| anno_dir = os.path.join(base, options.alignments_dir) |
| crop_dir = os.path.join(base, options.crop_images_dir) |
|
|
|
|
| def get_mouth_marks(shape): |
| marks = np.zeros((2, 20)) |
| co = 0 |
|
|
| |
| for ii in range(48, 68): |
| """ |
| This for loop is going over all mouth-related features. |
| X and Y coordinates are extracted and stored separately. |
| """ |
| X = shape.part(ii) |
| A = (X.x, X.y) |
| marks[0, co] = X.x |
| marks[1, co] = X.y |
| co += 1 |
|
|
| |
| X_left, Y_left, X_right, Y_right = [ |
| int(np.amin(marks, axis=1)[0]), |
| int(np.amin(marks, axis=1)[1]), |
| int(np.amax(marks, axis=1)[0]), |
| int(np.amax(marks, axis=1)[1]) |
| ] |
|
|
| return X_left, Y_left, X_right, Y_right |
|
|
|
|
| translate_pairs = [] |
|
|
| for speaker_no in range(1, 35): |
| speaker_name = f's{speaker_no}' |
| speaker_image_dir = os.path.join(image_dir, speaker_name) |
| speaker_crop_dir = os.path.join(crop_dir, speaker_name) |
| speaker_anno_dir = os.path.join(anno_dir, speaker_name) |
|
|
| if not os.path.exists(speaker_image_dir): |
| continue |
| if not os.path.exists(speaker_crop_dir): |
| os.mkdir(speaker_crop_dir) |
|
|
| sentence_dirs = os.listdir(speaker_image_dir) |
|
|
| for sentence in sentence_dirs: |
| anno_filepath = os.path.join(speaker_anno_dir, f'{sentence}.align') |
| if not os.path.exists(anno_filepath): |
| continue |
|
|
| translate_pairs.append((speaker_no, sentence)) |
|
|
|
|
| print('PAIRS', len(translate_pairs)) |
| bad_sentences = set() |
| bad_filepaths = [] |
|
|
|
|
| def extract_mouth_image(speaker_no, sentence): |
| speaker_name = f's{speaker_no}' |
| speaker_image_dir = os.path.join(image_dir, speaker_name) |
| speaker_crop_dir = os.path.join(crop_dir, speaker_name) |
|
|
| img_sentence_dir = os.path.join(speaker_image_dir, sentence) |
| crop_sentence_dir = os.path.join(speaker_crop_dir, sentence) |
| filenames = os.listdir(img_sentence_dir) |
|
|
| if not os.path.exists(crop_sentence_dir): |
| os.mkdir(crop_sentence_dir) |
|
|
| for filename in filenames: |
| img_filepath = os.path.join(img_sentence_dir, filename) |
| if not img_filepath.endswith('.jpg'): |
| continue |
|
|
| crop_filepath = os.path.join(crop_sentence_dir, filename) |
| if not os.path.exists(crop_filepath): |
| bad_filepaths.append(crop_filepath) |
| print('BAD FILEPATH', (speaker_no, sentence, filename)) |
| bad_sentences.add((speaker_no, sentence)) |
|
|
|
|
| for translate_pair in tqdm(translate_pairs): |
| extract_mouth_image(*translate_pair) |
|
|
|
|
| print('BAD SENTENCES', list(bad_sentences)) |
| print('>>>') |
| |