Spaces:
Runtime error
Runtime error
| import json | |
| import os | |
| import random | |
| import shutil | |
| import tempfile | |
| from collections import defaultdict | |
| from glob import glob | |
| from typing import Literal | |
| import func_argparse | |
| import pytorch_fid.fid_score as fid | |
| import torch | |
| from jinja2 import Template | |
| from pytorch_fid.fid_score import compute_statistics_of_path | |
| from rich import print | |
| from tqdm import tqdm | |
| from transformers import GPT2LMHeadModel, GPT2TokenizerFast | |
| import llms | |
| from presentation import Picture, Presentation, SlidePage | |
| from utils import Config, pexists, pjoin | |
| fid.tqdm = lambda x: x | |
| judges = [ | |
| (llms.gpt4o, llms.gpt4o, "gpt4o"), | |
| (llms.qwen2_5, llms.intern_vl, "qwen+intern"), | |
| (llms.qwen2_5, llms.qwen_vl, "Qwen"), | |
| (llms.qwen_vl, llms.qwen_vl, "qwen_vl"), | |
| (llms.intern_vl, llms.intern_vl, "intern_vl"), | |
| ] | |
| DEVICES = torch.cuda.device_count() | |
| def get_ppl(slide: SlidePage, model: GPT2LMHeadModel, tokenizer: GPT2TokenizerFast): | |
| ppl = [] | |
| text = slide.to_text() | |
| if len(text) == 0: | |
| return ppl | |
| tokenized = tokenizer(text, return_tensors="pt").to(model.device) | |
| with torch.no_grad(): | |
| outputs = model(tokenized.input_ids, labels=tokenized.input_ids) | |
| loss = outputs.loss | |
| perplexity = torch.exp(loss) | |
| ppl.append(perplexity.item()) | |
| return ppl | |
| def eval_general(presentations: list[Presentation], evals: dict[str, list[int]]): | |
| for prs in presentations: | |
| if prs.source_file in evals["pages"]: | |
| continue | |
| evals["pages"][prs.source_file] = len(prs) | |
| evals["characters"][prs.source_file] = sum( | |
| [len(slide.to_text()) for slide in prs.slides] | |
| ) | |
| evals["figures"][prs.source_file] = sum( | |
| [len(list(slide.shape_filter(Picture))) for slide in prs.slides] | |
| ) | |
| def eval_feature( | |
| presentations: list[Presentation], | |
| evals: dict, | |
| setting: str, | |
| ): | |
| device = f"cuda:{random.randint(0, DEVICES - 1)}" | |
| print("start scoring ppl") | |
| model = GPT2LMHeadModel.from_pretrained("gpt2").to(device) | |
| tokenizer = GPT2TokenizerFast.from_pretrained("gpt2") | |
| for prs in tqdm(presentations): | |
| try: | |
| if prs.source_file in evals["ppl"]: | |
| continue | |
| if ( | |
| prs.source_file | |
| == "data/culture/pptx/ChemBio-in-the-HUB-public/PPTCrew_wo_SchemaInduction/SSRN-id2933553_Management of Systems Engineering and Technical Assistance of DARPA Research Programs/final.pptx" | |
| ): | |
| continue | |
| ppl = [] | |
| for slide in prs.slides: | |
| ppl.extend(get_ppl(slide, model, tokenizer)) | |
| if len(ppl) == 0: | |
| continue | |
| evals["ppl"][prs.source_file] = sum(ppl) / len(ppl) | |
| except Exception as e: | |
| print(e, "\n", "happended in ", prs.source_file) | |
| model = fid.InceptionV3([fid.InceptionV3.BLOCK_INDEX_BY_DIM[64]]).to(device) | |
| for ppt_folder in tqdm(sorted(glob(f"data/*/pptx/*/"))): | |
| if ppt_folder in evals["fid"]: | |
| continue | |
| source_folder = pjoin(ppt_folder, "source_slides") | |
| m1, s1 = compute_statistics_of_path(source_folder, model, 128, 64, device) | |
| try: | |
| with tempfile.TemporaryDirectory(prefix="ppteval_fid_") as temp_dir: | |
| for result_folder in glob( | |
| pjoin(ppt_folder, f"final_images/{setting}/*") | |
| ): | |
| folder_base = os.path.basename(result_folder) | |
| for image_file in os.listdir(result_folder): | |
| image_path = os.path.join(result_folder, image_file) | |
| temp_image_path = os.path.join( | |
| temp_dir, folder_base + "_" + image_file | |
| ).replace(" ", "_") | |
| shutil.copyfile(image_path, temp_image_path) | |
| if len(os.listdir(temp_dir)) < 10: | |
| continue | |
| m2, s2 = compute_statistics_of_path(temp_dir, model, 32, 64, device) | |
| evals["fid"][ppt_folder] = fid.calculate_frechet_distance( | |
| m1, s1, m2, s2 | |
| ) | |
| except Exception as e: | |
| print(e, "\n", "happended in ", ppt_folder, "on:", setting) | |
| def merge_evals(folders: list[str], evals: dict): | |
| for folder in folders: | |
| sub_eval = json.load(open(pjoin(folder, "evals.json"))) | |
| for dimension in ["content", "vision", "logic"]: | |
| evals[dimension] |= sub_eval[dimension] | |
| return evals | |
| def slide_score(slide_folder: str): | |
| eval_file = pjoin(slide_folder, "evals.json") | |
| evals = defaultdict(dict) | |
| if pexists(eval_file): | |
| evals |= json.load(open(eval_file)) | |
| text_scorer = Template(open("prompts/ppteval_content.txt", "r").read()) | |
| vision_scorer = Template(open("prompts/ppteval_style.txt", "r").read()) | |
| style_descriptor = open("prompts/ppteval_describe_style.txt", "r").read() | |
| content_descriptor = open("prompts/ppteval_describe_content.txt", "r").read() | |
| for slide_image in glob(pjoin(slide_folder, "slide_*.jpg")): | |
| slide_descr = slide_image.replace(".jpg", ".json") | |
| if not os.path.exists(slide_descr): | |
| style_descr = llms.vision_model(style_descriptor, slide_image) | |
| content_descr = llms.vision_model(content_descriptor, slide_image) | |
| json.dump( | |
| {"content": content_descr, "style": style_descr}, | |
| open(slide_descr, "w"), | |
| indent=4, | |
| ) | |
| else: | |
| descr = json.load(open(slide_descr)) | |
| style_descr = descr["style"] | |
| content_descr = descr["content"] | |
| if slide_image not in evals["vision"]: | |
| evals["vision"][slide_image] = llms.language_model( | |
| vision_scorer.render(descr=style_descr), return_json=True | |
| ) | |
| if slide_image not in evals["content"]: | |
| evals["content"][slide_image] = llms.language_model( | |
| text_scorer.render(descr=content_descr), return_json=True | |
| ) | |
| def pres_score(prs_source: str): | |
| if "/pptx/" in prs_source: # ours | |
| source, setting, pdf, _ = prs_source.rsplit("/", 3) | |
| slide_folder = os.path.join(source, "final_images", setting, pdf) | |
| else: # baseline | |
| slide_folder = os.path.dirname(prs_source) | |
| eval_file = pjoin(slide_folder, "evals.json") | |
| evals = defaultdict(dict) | |
| if pexists(eval_file): | |
| try: | |
| evals |= json.load(open(eval_file)) | |
| except: | |
| pass | |
| evals.pop("logic", None) # ? debug | |
| slide_descr = pjoin(slide_folder, "extracted.json") | |
| if not pexists(slide_descr): | |
| config = Config("/tmp") | |
| presentation = Presentation.from_file(prs_source, config) | |
| ppt_extractor = Template(open("prompts/ppteval_extract.txt", "r").read()) | |
| extracted = llms.language_model( | |
| ppt_extractor.render(presentation=presentation.to_text()), | |
| return_json=True, | |
| ) | |
| json.dump(extracted, open(slide_descr, "w"), indent=4) | |
| else: | |
| extracted = json.load(open(slide_descr)) | |
| if presentation.source_file not in evals["logic"]: | |
| logic_scorer = Template(open("ppteval_coherence.txt", "r").read()) | |
| evals["logic"][presentation.source_file] = llms.language_model( | |
| logic_scorer.render( | |
| background_information=extracted.pop("metadata"), | |
| logical_structure=extracted, | |
| ), | |
| return_json=True, | |
| ) | |
| json.dump(evals, open(eval_file, "w"), indent=4) | |
| # ppt eval | |
| def eval_experiment( | |
| setting: str, | |
| general_eval: bool = False, | |
| feature_eval: bool = False, | |
| ppt_eval: bool = False, | |
| ): | |
| assert setting != "*" | |
| llms.language_model, llms.vision_model, judge_name = judges[0] | |
| print(f"evaluating {setting} under {judge_name}") | |
| print( | |
| "eval config :", | |
| f"general_eval: {general_eval}, feature_eval: {feature_eval}, ppt_eval: {ppt_eval}", | |
| ) | |
| eval_file = f"data/evals/{setting}_{judge_name}.json" | |
| eval_stats = defaultdict(dict) | |
| if pexists(eval_file): | |
| eval_stats |= json.load(open(eval_file)) | |
| config = Config("/tmp") | |
| prs_files = glob(f"data/*/pptx/*/{setting}/*/final.pptx") | |
| # filename dimension score | |
| print("start evaluation") | |
| if general_eval or feature_eval: | |
| presentations = [Presentation.from_file(i, config) for i in prs_files] | |
| if general_eval: | |
| eval_general(presentations, eval_stats) | |
| if feature_eval: | |
| eval_feature(presentations, eval_stats, setting) | |
| if ppt_eval: | |
| slide_image_folders = glob(f"data/*/pptx/*/final_images/{setting}/*") | |
| for presentation in prs_files: | |
| pres_score(presentation) | |
| eval_stats = merge_evals(slide_image_folders, eval_stats) | |
| json.dump(eval_stats, open(eval_file, "w"), indent=4) | |
| def eval_baseline( | |
| setting: str, | |
| model: Literal["Qwen2.5", "gpt-4o"], | |
| general_eval: bool = False, | |
| feature_eval: bool = False, | |
| ppt_eval: bool = False, | |
| ): | |
| evals = defaultdict(dict) | |
| prs_files = glob(f"data/*/pdf/*/{setting}/{model}/final.pptx") | |
| slide_folders = [os.path.dirname(i) for i in prs_files] | |
| if general_eval or feature_eval: | |
| config = Config("/tmp") | |
| presentations = [Presentation.from_file(i, config) for i in prs_files] | |
| if general_eval: | |
| eval_general(presentations, evals) | |
| if feature_eval: | |
| eval_feature(presentations, evals, setting, fid_eval=False) | |
| if ppt_eval: | |
| for slide_folder in slide_folders: | |
| slide_score(slide_folder) | |
| for presentation in prs_files: | |
| pres_score(presentation) | |
| merge_evals(slide_folders, evals) | |
| json.dump(evals, open(f"data/evals/{setting}_{model}.json", "w"), indent=4) | |
| if __name__ == "__main__": | |
| func_argparse.main( | |
| eval_experiment, | |
| eval_baseline, | |
| pres_score, | |
| slide_score, | |
| ) | |