Spaces:
Runtime error
Runtime error
| # ------------------------------------------ | |
| # TextDiffuser: Diffusion Models as Text Painters | |
| # Paper Link: https://arxiv.org/abs/2305.10855 | |
| # Code Link: https://github.com/microsoft/unilm/tree/master/textdiffuser | |
| # Copyright (c) Microsoft Corporation. | |
| # This file provides the inference script. | |
| # ------------------------------------------ | |
| import json | |
| import os | |
| import numpy as np | |
| import argparse | |
| from clipscore import cal_clipscore | |
| from fid_score import calculate_fid_given_paths | |
| def eval_clipscore(root_eval, root_res, dataset, device="cuda:0", num_images_per_prompt=4): | |
| with open(os.path.join(root_eval, dataset, dataset + '.txt'), 'r') as fr: | |
| text_list = fr.readlines() | |
| text_list = [_.strip() for _ in text_list] | |
| clip_scores = [] | |
| scores = [] | |
| for seed in range(num_images_per_prompt): | |
| if 'stablediffusion' in root_res: | |
| format = '.png' | |
| else: | |
| format = '.jpg' | |
| image_list = [os.path.join(root_res, dataset, 'images_' + str(seed), | |
| str(idx) + '_' + str(seed) + format) for idx in range(len(text_list))] | |
| image_ids = [str(idx) + '_' + str(seed) + format for idx in range(len(text_list))] | |
| score = cal_clipscore(image_ids=image_ids, image_paths=image_list, text_list=text_list, device=device) | |
| clip_score = np.mean([s['CLIPScore'] for s in score.values()]) | |
| clip_scores.append(clip_score) | |
| scores.append(score) | |
| print("clip_score:", np.mean(clip_scores), clip_scores) | |
| return np.mean(clip_scores), scores | |
| def MARIOEval_evaluate_results(root, datasets_with_images, datasets, methods, gpu, | |
| eval_clipscore_flag=True, eval_fid_flag=True, num_images_per_prompt=4): | |
| root_eval = os.path.join(root, "MARIOEval") | |
| method_res = {} | |
| device = "cuda:" + str(gpu) | |
| for method_idx, method in enumerate(methods): | |
| if method_idx != gpu: # running in different gpus simultaneously to save time | |
| continue | |
| print("\nmethod:", method) | |
| dataset_res = {} | |
| root_res = os.path.join(root, 'generation', method) | |
| for dataset in datasets: | |
| print("dataset:", dataset) | |
| dataset_res[dataset] = {} | |
| if eval_clipscore_flag: | |
| dataset_res[dataset]['clipscore'], dataset_res[dataset]['scores'] =\ | |
| eval_clipscore(root_eval, root_res, dataset, device, num_images_per_prompt) | |
| if eval_fid_flag and dataset in datasets_with_images: | |
| gt_path = os.path.join(root_eval, dataset, 'images') | |
| fids = [] | |
| for idx in range(num_images_per_prompt): | |
| gen_path = os.path.join(root_res, dataset, 'images_' + str(idx)) | |
| fids.append(calculate_fid_given_paths(paths=[gt_path, gen_path])) | |
| print("fid:", np.mean(fids), fids) | |
| dataset_res[dataset]['fid'] = np.mean(fids) | |
| if eval_clipscore_flag: | |
| method_clipscores = [] | |
| for seed in range(num_images_per_prompt): | |
| clipscore_list = [] | |
| for dataset in dataset_res.keys(): | |
| clipscore_list += [_['CLIPScore'] for _ in dataset_res[dataset]['scores'][seed].values()] | |
| method_clipscores.append(np.mean(clipscore_list)) | |
| method_clipscore = np.mean(method_clipscores) | |
| dataset_res['clipscore'] = method_clipscore | |
| if eval_fid_flag: | |
| method_fids = [] | |
| for idx in range(num_images_per_prompt): | |
| gt_paths = [] | |
| gen_paths = [] | |
| for dataset in dataset_res.keys(): | |
| if dataset in datasets_with_images: | |
| gt_paths.append(os.path.join(root_eval, dataset, 'images')) | |
| gen_paths.append(os.path.join(root_res, dataset, 'images_' + str(idx))) | |
| if len(gt_paths): | |
| method_fids.append(calculate_fid_given_paths(paths=[gt_paths, gen_paths])) | |
| print("fid:", np.mean(method_fids), method_fids) | |
| method_fid = np.mean(method_fids) | |
| dataset_res['fid'] = method_fid | |
| method_res[method] = dataset_res | |
| with open(os.path.join(root_res, 'eval.json'), 'w') as fw: | |
| json.dump(dataset_res, fw) | |
| print(method_res) | |
| with open(os.path.join(root, 'generation', 'eval.json'), 'w') as fw: | |
| json.dump(method_res, fw) | |
| def merge_eval_results(root, methods): | |
| method_res = {} | |
| for method_idx, method in enumerate(methods): | |
| root_res = os.path.join(root, 'generation', method) | |
| with open(os.path.join(root_res, 'eval.json'), 'r') as fr: | |
| dataset_res = json.load(fr) | |
| for k, v in dataset_res.items(): | |
| if type(v) is dict: | |
| del v['scores'] # too long | |
| method_res[method] = dataset_res | |
| with open(os.path.join(root, 'generation', 'eval.json'), 'w') as fw: | |
| json.dump(method_res, fw) | |
| def parse_args(): | |
| parser = argparse.ArgumentParser(description="Simple example of a training script.") | |
| parser.add_argument( | |
| "--dataset", | |
| type=str, | |
| default='TMDBEval500', | |
| required=False, | |
| choices=['TMDBEval500', 'OpenLibraryEval500', 'LAIONEval4000', | |
| 'ChineseDrawText', 'DrawBenchText', 'DrawTextCreative'] | |
| ) | |
| parser.add_argument( | |
| "--root", | |
| type=str, | |
| default="/path/to/data/TextDiffuser/evaluation/", | |
| required=True, | |
| ) | |
| parser.add_argument( | |
| "--method", | |
| type=str, | |
| default='controlnet', | |
| required=False, | |
| choices=['controlnet', 'deepfloyd', 'stablediffusion', 'textdiffuser'] | |
| ) | |
| parser.add_argument( | |
| "--gpu", | |
| type=int, | |
| default=0, | |
| required=False, | |
| ) | |
| parser.add_argument( | |
| "--split", | |
| type=int, | |
| default=0, | |
| required=False, | |
| ) | |
| parser.add_argument( | |
| "--total_split", | |
| type=int, | |
| default=1, | |
| required=False, | |
| ) | |
| args = parser.parse_args() | |
| return args | |
| if __name__ == "__main__": | |
| args = parse_args() | |
| os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) | |
| datasets_with_images = ['TMDBEval500', 'OpenLibraryEval500', 'LAIONEval4000'] | |
| datasets = datasets_with_images + ['ChineseDrawText', 'DrawBenchText', 'DrawTextCreative'] | |
| methods = ['textdiffuser', 'controlnet', 'deepfloyd', 'stablediffusion'] | |
| MARIOEval_evaluate_results(args.root, datasets_with_images, datasets, methods, args.gpu, | |
| eval_clipscore_flag=True, eval_fid_flag=True, num_images_per_prompt=4) | |
| merge_eval_results(args.root, methods) | |