from copy import copy from http.client import IM_USED import pathlib import shutil import PIL import cv2 import numpy as np import os.path as osp import os from PIL import Image, ImageColor, ImageFont, ImageDraw, ImageFilter, ImageOps import random from numpy.random import rand from trdg.utils import load_dict, load_fonts from tqdm import tqdm import pandas as pd import sys sys.path.append(os.getcwd()) from utils.io_utils import find_all_imgs, imread, imwrite from utils.imgproc_utils import * import copy ALIGN_LEFT = 0 ALIGN_CENTER = 1 ALIGN_RIGHT = 2 ORIENTATION_HOR = 0 ORIENTATION_VER = 1 def get_textlines_from_langdict(lang_dict, num_line, line_len, sampler=None): textlines = [] dict_len = len(lang_dict) for ii in range(num_line): line = '' for jj in range(line_len): line += lang_dict[random.randrange(dict_len)] + ' ' textlines.append(line[:line_len]) if sampler is None: return textlines return textlines def draw_text_polygons(img, text_polygons, color=None): if isinstance(img, PIL.Image.Image): img = np.array(img) img = np.copy(img) for poly in text_polygons: if color is None: randcolor = (random.randint(0,255), random.randint(0,255), random.randint(0,255)) else: randcolor = color cv2.polylines(img,[poly.reshape((-1, 1, 2))],True,randcolor, thickness=2) return img def draw_textblk(textlines, font, fill='black', stroke_width=0, stroke_fill='grey', spacing=0, rotation=0, orientation=ORIENTATION_HOR, alignment=ALIGN_LEFT): text_size = np.array([font.getsize(line) for line in textlines]) if orientation == ORIENTATION_HOR: line_widths, line_heights = text_size[:, 0], text_size[:, 1] textblk_w = max(text_size[:, 0]) + 3*stroke_width textblk_h = (len(textlines) - 1) * spacing + text_size[:, 1].sum() + 3*stroke_width else: line_widths, line_heights = text_size[:, 1], text_size[:, 0] textblk_w = line_widths.sum() + 3*stroke_width textblk_h = max(line_heights) + 3*stroke_width if orientation == ORIENTATION_VER: textblk_h += font.size * 3 # some fonts are not correctly aligned txtblk_img = Image.new("RGBA", (textblk_w, textblk_h), (255, 255, 255, 255)) txtblk_draw = ImageDraw.Draw(txtblk_img) txtblk_draw.fontmode = '1' # disable anti-aliasing txtblk_mask = Image.new("L", (textblk_w, textblk_h), (0)) tmp_msk = txtblk_mask.copy() tmp_msk_draw = ImageDraw.Draw(tmp_msk) tmp_msk_draw.fontmode = '1' textpolygons = [] if orientation == ORIENTATION_VER: for ii, line in enumerate(textlines): x_offset = sum(line_widths[:ii]) + stroke_width for jj, char in enumerate(line): txtblk_draw.text((x_offset, jj*font.size), char, font=font, fill=fill, stroke_width=stroke_width, stroke_fill=stroke_fill) tmp_msk_draw.text((x_offset, jj*font.size), char, font=font, fill='white', stroke_width=stroke_width, stroke_fill='white') valid_bbox = tmp_msk.getbbox() if valid_bbox is None: continue txtblk_mask.paste(tmp_msk, mask=tmp_msk) tmp_msk.paste('black', [0, 0, tmp_msk.size[0],tmp_msk.size[1]]) textpolygons.append([valid_bbox[0], valid_bbox[1], valid_bbox[2]-valid_bbox[0], valid_bbox[3]-valid_bbox[1]]) else: for ii, line in enumerate(textlines): x_offset = stroke_width y_offset = sum(line_heights[0:ii]) + stroke_width if alignment == ALIGN_CENTER: x_offset += (textblk_w - line_widths[ii]) / 2 txtblk_draw.text((x_offset, y_offset), line, font=font, fill=fill, stroke_width=stroke_width, stroke_fill=stroke_fill) tmp_msk_draw.text((x_offset, y_offset), line, font=font, fill='white', stroke_width=stroke_width, stroke_fill='white') valid_bbox = tmp_msk.getbbox() if valid_bbox is None: continue txtblk_mask.paste(tmp_msk, mask=tmp_msk) tmp_msk.paste('black', [0, 0, tmp_msk.size[0],tmp_msk.size[1]]) textpolygons.append([valid_bbox[0], valid_bbox[1], valid_bbox[2]-valid_bbox[0], valid_bbox[3]-valid_bbox[1]]) bbox = txtblk_mask.getbbox() if bbox is None: return None, None, None textpolygons = np.array(textpolygons) textpolygons = xywh2xyxypoly(textpolygons) txtblk_img, txtblk_mask = txtblk_img.crop(bbox), txtblk_mask.crop(bbox) textpolygons[:, ::2] = np.clip(textpolygons[:, ::2] - bbox[0], 0, txtblk_mask.width-1) textpolygons[:, 1::2] = np.clip(textpolygons[:, 1::2] - bbox[1], 0, txtblk_mask.height-1) if rotation != 0: center = (txtblk_img.width/2, txtblk_img.height/2) txtblk_img = txtblk_img.rotate(rotation, Image.BICUBIC, expand=1) txtblk_mask = txtblk_mask.rotate(rotation, Image.BICUBIC, expand=1) new_center = (txtblk_img.width / 2, txtblk_img.height / 2) textpolygons = rotate_polygons(center, textpolygons, rotation, new_center) # txtblk_img, txtblk_mask = txtblk_img.crop(bbox), txtblk_mask.crop(bbox) # textpolygons[:, ::2] = np.clip(textpolygons[:, ::2] - bbox[0], 0, txtblk_mask.width-1) # textpolygons[:, 1::2] = np.clip(textpolygons[:, 1::2] - bbox[1], 0, txtblk_mask.height-1) return txtblk_img, txtblk_mask, textpolygons def create_random_sampler(value, prob): if isinstance(prob, list): prob = np.array(prob).astype(np.float32) prob /= prob.sum() sampler = lambda : np.random.choice(value, replace=False, p=prob) return sampler class ScaledSampler: def __init__(self, func_args, func='default'): if func == 'default': self.sampler_func = create_random_sampler(**func_args) else: raise NotImplementedError() pass def __call__(self, scaler=None, to_int=True): value = self.sampler_func() if scaler is not None: value = scaler * value if to_int: value = int(round(value)) return value pass class RandColorSampler: def __init__(self, func_args, func='default'): if func == 'default': self.sampler_func = create_random_sampler(**func_args) else: raise NotImplementedError() pass def __call__(self, scaler=None): value = self.sampler_func() if value == 'random': return (random.randint(0,255), random.randint(0,255), random.randint(0,255), 255) return value class TextLinesSampler: def __init__(self, page_size, sampler_dict): self.page_w, self.page_h = page_size self.lang = sampler_dict['lang'] self.lang_dict = load_dict(lang=self.lang) self.orientation_sampler = ScaledSampler(sampler_dict['orientation']) self.numlines_sampler = ScaledSampler(sampler_dict['num_lines']) self.length_sampler = ScaledSampler(sampler_dict['length']) self.min_num_lines = sampler_dict['min_num_lines'] self.min_length = sampler_dict['min_length'] self.alignment_sampler = create_random_sampler(**sampler_dict['alignment']) self.rotation_sampler = create_random_sampler(**sampler_dict['rotation']) def __call__(self, page_w=None, page_h=None, font_size=1): if page_w == None: page_w = self.page_w if page_h == None: page_h = self.page_h orientation = self.orientation_sampler() rotation = self.rotation_sampler() if rotation != 0: rotation = random.randint(-rotation, rotation) num_lines = max(self.numlines_sampler(page_h/font_size), self.min_num_lines) num_lines = random.randint(self.min_num_lines, num_lines) max_length = max(self.length_sampler(page_h/font_size), self.min_length) textlines = [] dict_len = len(self.lang_dict) for ii in range(num_lines): line = '' length = random.randint(self.min_length, max_length) for jj in range(length): line += self.lang_dict[random.randrange(dict_len)] + ' ' textlines.append(line[:length]) return textlines, orientation, self.alignment_sampler(), rotation class FontSampler: def __init__(self, font_dict, page_size) -> None: font_statics = font_dict['font_statics'] font_dir = font_dict['font_dir'] self.page_size = page_size self.size_sampler = ScaledSampler(font_dict['size']) self.color_sampler = RandColorSampler(font_dict['color']) self.sw_sampler = ScaledSampler(font_dict['stroke_width']) self.font_dir = font_dir self.sampler_range = font_dict['num'] self.font_idx = 0 font_statics = pd.read_csv(font_statics) self.font_list = list() for fontname in font_statics['font']: if osp.exists(osp.join(self.font_dir, fontname)): self.font_list.append(fontname) if len(self.font_list) >= self.sampler_range: break assert len(self.font_list) > 0 def __call__(self, page_size = None): if page_size is None: page_size = self.page_size page_w, page_h = page_size fontsize = self.size_sampler(page_h) stroke_width = self.sw_sampler(fontsize) color = self.color_sampler() if color == 'black': sw_color = (255, 255, 255, 255) elif color == 'white': sw_color = (0, 0, 0, 255) else: sw_color = self.color_sampler() # while (True): # self.font_idx = random.randrange(0, self.sampler_range) # fontname = self.font_statics.iloc[self.font_idx]['font'] # font_path = osp.join(self.font_dir, fontname) # if osp.exists(font_path): # break self.font_idx = random.randrange(0, self.sampler_range) % len(self.font_list) font_path = osp.join(self.font_dir, self.font_list[self.font_idx]) font = ImageFont.truetype(font_path, fontsize) return font, color, stroke_width, sw_color class TextBlkSampler: def __init__(self, page_size, max_tries, bboxlist=[]): self.page_w, self.page_h = page_size self.bboxlist = bboxlist self.max_tries = max_tries self.max_padding = int(round(0.05 * self.page_h)) def __call__(self, bbox_w, bbox_h, padding=0, page_size=None): padding = int(round(padding)) if page_size is not None: page_w, page_h = page_size else: page_w, page_h = self.page_w, self.page_h padding = min(self.max_padding, padding) bbox_w += 2*padding bbox_h += 2*padding x_range = page_w-bbox_w-1 y_range = page_h-bbox_h-1 if x_range < 0 or y_range < 0: return None for ii in range(self.max_tries): x, y = random.randint(0, x_range), random.randint(0, y_range) bbox_padded = [x, y, x + bbox_w, y + bbox_h] collide = False for bbox_exist in self.bboxlist: if union_area(bbox_exist, bbox_padded) > 0: collide = True break if not collide: break if not collide: bbox = [bbox_padded[0]+padding, bbox_padded[1]+padding, bbox_padded[2]-padding, bbox_padded[3]-padding] # bbox = [int(bb) for bb in bbox] self.bboxlist.append(bbox) return bbox return None def initialize(self, page_w, page_h, bboxlist=None, to_xywh=False): if bboxlist is None: self.bboxlist = [] else: if to_xywh: self.bboxlist = yolo_xywh2xyxy(bboxlist, page_w, page_h) if self.bboxlist is not None: self.bboxlist = self.bboxlist.tolist() else: self.bboxlist = [] LANG_DICT = {'en': 0, 'ja': 1} def lang2cls(lang: str) -> int: return LANG_DICT[lang] def cls2lang(cls: int) -> str: return list(LANG_DICT.keys())[cls] def get_max_var_color(mean_bgcolor): color_candidate = np.clip(np.array([mean_bgcolor-127, mean_bgcolor+127]), 0, 255).astype(np.int64) max_var_color = [c[0] if abs(c[0]-mean_bgcolor[ii]) > abs(c[1]-mean_bgcolor[ii]) else c[1] for ii, c in enumerate(zip(color_candidate[0], color_candidate[1]))] max_var_color = (max_var_color[0], max_var_color[1], max_var_color[2]) return max_var_color class ComicTextSampler: def __init__(self, page_size, sampler_dict, seed=None): if seed is not None: random.seed(seed) np.random.seed(seed) self.page_size = page_size self.num_txtblk = sampler_dict['num_txtblk'] self.font_dict = sampler_dict['font'] self.text_dict = sampler_dict['text'] self.textlines_sampler = TextLinesSampler(page_size, sampler_dict['text']) self.font_sampler = FontSampler(self.font_dict, self.page_size) self.textblk_sampler = TextBlkSampler(page_size, max_tries=20) self.lang = sampler_dict['text']['lang'] def drawtext_one_page(self, page_size=None, bboxlist=None, im_in=None, adaptive_color=False): if page_size is not None: page_w, page_h = page_size else: page_w, page_h = self.page_size if im_in is None: canvas = Image.new("RGBA", (page_w, page_h), 'white') else: canvas = Image.fromarray(cv2.cvtColor(im_in, cv2.COLOR_BGR2RGB)) page_w, page_h = canvas.width, canvas.height canvas_msk = Image.new("L", (page_w, page_h), 'black') canvas_draw = ImageDraw.Draw(canvas) block_dicts = {} yolo_labels = [] textpolylines = [] self.textblk_sampler.initialize(page_w, page_h, bboxlist, True) for ii in range(self.num_txtblk): font, color, stroke_width, sw_color = self.font_sampler(page_size=self.page_size) textlines, orientation, alignment, rotation = self.textlines_sampler(font_size=font.size) txtblk_img, txtblk_mask, textpolygons = draw_textblk(textlines, font, fill=color, stroke_width=stroke_width, stroke_fill=sw_color, orientation=orientation, alignment=alignment, rotation=rotation) if txtblk_mask is None: continue bbox = self.textblk_sampler(txtblk_img.width, txtblk_img.height, font.size*1.2, page_size=(page_w, page_h)) if bbox is not None: x1, y1, x2, y2 = bbox[0], bbox[1], bbox[0] + txtblk_mask.width, bbox[1] + txtblk_mask.height re_draw = False if im_in is not None: mean_bgcolor = np.mean(im_in[y1: y2, x1: x2], axis=(0, 1)) max_var_color = get_max_var_color(mean_bgcolor) # color_candidate = np.clip(np.array([mean_bgcolor-127, mean_bgcolor+127]), 0, 255).astype(np.int64) # max_var_color = [c[0] if abs(c[0]-mean_bgcolor[ii]) > abs(c[1]-mean_bgcolor[ii]) else c[1] for ii, c in enumerate(zip(color_candidate[0], color_candidate[1]))] # max_var_color = (max_var_color[0], max_var_color[1], max_var_color[2]) if color == 'black': color_rep = np.array([0, 0, 0]) elif color == 'white': color_rep = np.array([255, 255, 255]) else: color_rep = np.array(color[:3]) color_var = np.sum(np.abs(mean_bgcolor - color_rep)) if not adaptive_color: if color_var < 127: color = max_var_color sw_color = get_max_var_color(np.array(color)) re_draw = True else: color = max_var_color sw_color = get_max_var_color(np.array(color)) re_draw = True if stroke_width != 0 and im_in is not None: # sw_color = get_max_var_color(color) re_draw = True if re_draw: txtblk_img, txtblk_mask, textpolygons = draw_textblk(textlines, font, fill=color, stroke_width=stroke_width, stroke_fill=sw_color, orientation=orientation, alignment=alignment, rotation=rotation) blk_dict = { 'lang': self.lang, 'lang_cls': lang2cls(self.lang), 'xyxy': [x1, y1, x2, y2], 'polylines': textpolygons } block_dicts[str(ii)+'-'+self.lang] = blk_dict textpolygons[:, ::2] += x1 textpolygons[:, 1::2] += y1 textpolylines += textpolygons.astype(np.int64).tolist() yolo_labels += [[x1, y1, x2, y2]] canvas.paste(txtblk_img, (bbox[0], bbox[1]), mask=txtblk_mask) canvas_msk.paste(txtblk_mask, (bbox[0], bbox[1]), mask=txtblk_mask) rst = cv2.cvtColor(np.array(canvas), cv2.COLOR_RGB2BGR) rst_msk = np.array(canvas_msk) yolo_labels = xyxy2yolo(np.array(yolo_labels), page_w, page_h) if yolo_labels is not None: cls = np.ones((yolo_labels.shape[0], 1)) * lang2cls(self.lang) yolo_labels = np.concatenate((cls, yolo_labels), axis=1) return rst, rst_msk, block_dicts, yolo_labels, np.array(textpolylines) def render_comictext(comic_sampler_list, img_dir, label_dir=None, render_num=700, save_dir=None, save_prefix=None, show=False): if osp.exists(osp.join(img_dir, 'statistics.csv')): statistics = pd.read_csv(osp.join(img_dir, 'statistics.csv')) else: statistics = None imglist = find_all_imgs(img_dir) # render_num = min(render_num, len(imglist)) num_im = len(imglist) for ii in tqdm(range(render_num)): im_idx = ii % num_im if statistics is not None: imgname = statistics.loc[im_idx]['name'] else: imgname = imglist[im_idx] img = imread(osp.join(img_dir, imgname)) cs_idx = ii % len(comic_sampler_list) bboxlist = [] labels = None if label_dir is not None: labelname = imgname.replace(pathlib.Path(imgname).suffix, '.txt') label_path = osp.join(label_dir, labelname) labels = np.loadtxt(label_path) if len(labels) != 0: if len(labels.shape) == 1: labels = np.array([labels]) clslist, bboxlist = labels[:, 0], np.copy(labels[:, 1:]) else: labels = None bboxlist = [] rst, rst_msk, block_dicts, yolo_labels, textpolylines = comic_sampler_list[cs_idx].drawtext_one_page(im_in=img, bboxlist=bboxlist, adaptive_color=True) if save_dir is not None: if save_prefix is not None: save_name = save_prefix + '{0:09d}'.format(ii) + '.jpg' else: save_name = 'syn-' + imgname yolo_save_path = osp.join(save_dir, save_name.replace(pathlib.Path(save_name).suffix, '.txt')) content = '' if yolo_labels is not None: if labels is None: content = get_yololabel_strings(yolo_labels[:, 0], yolo_labels[:, 1:]) else: yolo_labels = np.concatenate((labels, yolo_labels)) content = get_yololabel_strings(yolo_labels[:, 0], yolo_labels[:, 1:]) if content == '' and label_dir is not None: shutil.copy(label_path, yolo_save_path) else: with open(yolo_save_path, 'w', encoding='utf8') as f: f.write(content) linepoly_save_path = osp.join(save_dir, 'line-'+osp.basename(yolo_save_path)) np.savetxt(linepoly_save_path, textpolylines, fmt='%d') imwrite(osp.join(save_dir, save_name), rst, ext='.jpg') imwrite(osp.join(save_dir, 'mask-'+save_name), rst_msk) if show: for pts in textpolylines: rst = cv2.polylines(rst, [np.array(pts).reshape((-1, 1, 2))], color=(255, 0, 0), isClosed=True, thickness=2) cv2.imshow('rst', rst) cv2.waitKey(0) if __name__ == '__main__': eng_sampler_dict = { 'num_txtblk': 20, 'font': { 'font_dir': 'data/fonts', 'font_statics': 'data/font_statics_en.csv', 'num': 500, 'size': {'value': [0.02, 0.03, 0.15], 'prob': [1, 0.4, 0.15]}, 'stroke_width': {'value': [0, 0.1, 0.15], 'prob': [1, 0.2, 0.2]}, 'color': {'value': ['black', 'random'], 'prob': [1, 0.4]}, }, 'text': { 'lang': 'en', 'orientation': {'value': [1, 0], 'prob': [0, 1]}, 'rotation': {'value': [0, 30, 60], 'prob': [1, 0.3, 0.1]}, 'num_lines': {'value': [0.15], 'prob': [1]}, 'length': {'value': [1], 'prob': [1]}, 'min_num_lines': 1, 'min_length': 3, 'alignment': {'value': [ALIGN_LEFT, ALIGN_CENTER], 'prob': [0.3, 1]} } } ja_sampler_dict = { 'num_txtblk': 20, 'font': { 'font_dir': 'data/fonts', # font file directory 'font_statics': 'data/font_statics_jp.csv', # Just a font list to use, please create your own list and ignore the last two cols. 'num': 500, # first 500 of the fontlist will be used # params to 'size': {'value': [0.02, 0.03, 0.15], 'prob': [1, 0.4, 0.15]}, 'stroke_width': {'value': [0, 0.1, 0.15], 'prob': [1, 0.5, 0.2]}, 'color': {'value': ['black', 'white', 'random'], 'prob': [1, 1, 0.4]}, }, 'text': { 'lang': 'ja', # render japanese, 'en' for english 'orientation': {'value': [1, 0], # 1 is vertical text. 'prob': [1, 0.3]}, 'rotation': {'value': [0, 30, 60], 'prob': [1, 0.3, 0.1]}, 'num_lines': {'value': [0.15], 'prob': [1]}, 'length': {'value': [0.3], 'prob': [1]}, 'min_num_lines': 1, 'min_length': 3, 'alignment': {'value': [ALIGN_LEFT, ALIGN_CENTER], 'prob': [0.3, 1]} } } # random.seed(0) # cts = ComicTextSampler((845, 1280), sampler_dict, seed=0) # jp_cts = ComicTextSampler((845, 1280), ja_sampler_dict, seed=0) # img_dir = r'../../datasets/pixanimegirls' # save_dir = r'../../datasets/pixanimegirls/processed' # os.makedirs(save_dir, exist_ok=True) # img_dir = r'../../datasets/ComicErased' # label_dir = img_dir # save_dir = r'../../datasets/ComicErased/processed' # os.makedirs(save_dir, exist_ok=True) # render_comictext([jp_cts, cts], img_dir, save_dir=save_dir, save_prefix=None, render_num=4000, label_dir=None)