import time import shutil import os, sys import argparse import subprocess from os import path from pdf2image import convert_from_path from pathlib import Path print("Initializing...") from Paper2Video.src.slide_code_gen import latex_code_gen from Paper2Video.src.wei_utils import get_agent_config from posterbuilder import build_poster as build_poster from posterbuilder.build_poster import IMAGES_DIR_NAME ROOT_DIR = Path(__file__).resolve().parent P2V_ASSETS = ROOT_DIR / "Paper2Video" / "assets" / "demo" / "latex_proj" P2P_ROOT = ROOT_DIR / "Paper2Poster" PB_ROOT = ROOT_DIR / "posterbuilder" sys.path.append(str(P2P_ROOT)) def copy_folder(src_dir, dst_dir): src_dir = Path(src_dir) dst_dir = Path(dst_dir) if not src_dir.exists(): raise FileNotFoundError(f"no such dir: {src_dir}") if dst_dir.exists(): shutil.rmtree(dst_dir) shutil.copytree(src_dir, dst_dir) print(f"โœ… Copied folder {src_dir} โ†’ {dst_dir}") def copytree_overwrite(src: Path, dst: Path): if dst.exists(): shutil.rmtree(dst) shutil.copytree(src, dst) def safe_copy(src: Path, dst: Path): dst.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(src, dst) def str2list(s): return [int(x) for x in s.split(',')] def run_paper2poster_content_build(): print("๐Ÿงฉ Step 1.5: Preparing Paper2Poster inputs & generating poster contents ...") src_pdf = ROOT_DIR / "input" / "paper.pdf" dst_pdf = P2P_ROOT / "input" / "paper" / "paper.pdf" dst_pdf.parent.mkdir(parents=True, exist_ok=True) safe_copy(src_pdf, dst_pdf) print(f" ๐Ÿ“„ Copied paper: {src_pdf.relative_to(ROOT_DIR)} โ†’ {dst_pdf.relative_to(ROOT_DIR)}") cmd = [ sys.executable, "-m", "PosterAgent.new_pipeline", f'--poster_path={dst_pdf.relative_to(P2P_ROOT)}', '--model_name_t=4o', '--model_name_v=4o', '--poster_width_inches=48', '--poster_height_inches=36' ] print(" โ–ถ Running: python -m PosterAgent.new_pipeline ...") subprocess.run(cmd, cwd=str(P2P_ROOT), check=True) print(" โœ… PosterAgent.new_pipeline finished.") tag_prefix = IMAGES_DIR_NAME.split("_images_and_tables")[0] src_raw_content = P2P_ROOT / "contents" / f"{tag_prefix}_paper_raw_content.json" src_tree_split = P2P_ROOT / "tree_splits" / f"{tag_prefix}_paper_tree_split_0.json" src_images_json = P2P_ROOT / IMAGES_DIR_NAME / "paper_images.json" dst_contents_dir = PB_ROOT / "contents" dst_raw_content = dst_contents_dir / "poster_content.json" dst_tree_split = dst_contents_dir / "arrangement.json" dst_fig_caption = dst_contents_dir / "figure_caption.json" dst_root_raw = PB_ROOT / "poster_content.json" dst_root_tree = PB_ROOT / "arrangement.json" dst_root_figcap = PB_ROOT / "figure_caption.json" safe_copy(src_raw_content, dst_raw_content) safe_copy(src_tree_split, dst_tree_split) safe_copy(src_images_json, dst_fig_caption) safe_copy(src_raw_content, dst_root_raw) safe_copy(src_tree_split, dst_root_tree) safe_copy(src_images_json, dst_root_figcap) print(" ๐Ÿ“ฆ JSON copied & renamed.") print(" โœ… Step 1.5 done.\n") if __name__ == '__main__': parser = argparse.ArgumentParser(description='Paper2Video Generation Pipeline') parser.add_argument('--result_dir', type=str, default='./result/zeyu') parser.add_argument('--model_name_t', type=str, default='gpt-4.1') parser.add_argument('--model_name_v', type=str, default='gpt-4.1') parser.add_argument('--paper_latex_root', type=str, default=str(P2V_ASSETS)) parser.add_argument('--ref_text', type=str, default=None) parser.add_argument('--if_tree_search', type=bool, default=True) parser.add_argument('--beamer_templete_prompt', type=str, default=None) parser.add_argument('--stage', type=str, default='["0"]') parser.add_argument('--arxiv_url', type=str, default=None) parser.add_argument('--openai_key', type=str, required=True, help='Your OpenAI API key') parser.add_argument('--gemini_key', type=str, required=True, help='Your Gemini API key') args = parser.parse_args() print("start") # โœ… ไฝฟ็”จไผ ๅ…ฅ็š„ key ่ฎพ็ฝฎ็Žฏๅขƒๅ˜้‡ os.environ["OPENAI_API_KEY"] = args.openai_key os.environ["GEMINI_API_KEY"] = args.gemini_key # ๆธ…็ฉบ output output_dir = ROOT_DIR / "output" if output_dir.exists(): print(f" ๐Ÿงน Clearing old output directory: {output_dir.relative_to(ROOT_DIR)}") shutil.rmtree(output_dir) (output_dir / "latex_proj").mkdir(parents=True, exist_ok=True) (output_dir / "poster_latex_proj").mkdir(parents=True, exist_ok=True) (output_dir / "slide_imgs").mkdir(parents=True, exist_ok=True) print(" โœ… Created subfolders: latex_proj / poster_latex_proj / slide_imgs") # ================ # Step 0: Download from arXiv # ================ try: if args.arxiv_url: import requests, tarfile from io import BytesIO print(f"๐Ÿงฉ Step 0: Downloading from arXiv: {args.arxiv_url}") paper_id = args.arxiv_url.strip().split('/')[-1] input_dir = ROOT_DIR / "input" latex_proj_dir = input_dir / "latex_proj" if input_dir.exists(): print(f" ๐Ÿงน Clearing old input directory: {input_dir.relative_to(ROOT_DIR)}") shutil.rmtree(input_dir) input_dir.mkdir(parents=True, exist_ok=True) latex_proj_dir.mkdir(parents=True, exist_ok=True) pdf_url = f"https://arxiv.org/pdf/{paper_id}.pdf" pdf_path = input_dir / "paper.pdf" print(f" ๐Ÿ“„ Downloading PDF from {pdf_url} ...") r = requests.get(pdf_url) if r.status_code == 200: with open(pdf_path, 'wb') as f: f.write(r.content) print(f" โœ… Saved PDF โ†’ {pdf_path.relative_to(ROOT_DIR)}") else: raise RuntimeError(f"โŒ Failed to download PDF (status {r.status_code})") src_url = f"https://arxiv.org/e-print/{paper_id}" print(f" ๐Ÿ“ฆ Downloading LaTeX source from {src_url} ...") r = requests.get(src_url) if r.status_code == 200: try: with tarfile.open(fileobj=BytesIO(r.content), mode="r:gz") as tar: tar.extractall(path=latex_proj_dir) print(f" โœ… Extracted LaTeX source โ†’ {latex_proj_dir.relative_to(ROOT_DIR)}") except tarfile.ReadError: print(f" โš ๏ธ LaTeX source invalid, skipping.") else: print(f" โš ๏ธ Failed to download LaTeX source.") except Exception as e: print(f"โŒ Step 0 failed: {e}") # ========================= # Step 1: Slide Generation # ========================= try: print("๐Ÿงฉ Step 1: Generating Slides ...") slide_latex_path = path.join(args.paper_latex_root, "slides.tex") slide_image_dir = path.join(args.result_dir, 'slide_imgs') os.makedirs(slide_image_dir, exist_ok=True) start_time = time.time() prompt_path = "./Paper2Video/src/prompts/slide_beamer_prompt.txt" if args.if_tree_search: usage_slide, beamer_path = latex_code_gen( prompt_path=prompt_path, tex_dir=args.paper_latex_root, beamer_save_path=slide_latex_path, model_config_ll=get_agent_config(args.model_name_t), model_config_vl=get_agent_config(args.model_name_v), beamer_temp_name=args.beamer_templete_prompt ) else: paper_latex_path = path.join(args.paper_latex_root, "main.tex") usage_slide = latex_code_gen( prompt_path=prompt_path, tex_dir=args.paper_latex_root, tex_path=paper_latex_path, beamer_save_path=slide_latex_path, model_config=get_agent_config(args.model_name_t) ) beamer_path = slide_latex_path if not os.path.exists(beamer_path): raise FileNotFoundError(f"โŒ Beamer PDF not found: {beamer_path}") slide_imgs = convert_from_path(beamer_path, dpi=400) for i, img in enumerate(slide_imgs): img.save(path.join(slide_image_dir, f"{i+1}.png")) print("โœ… Step 1 done.") except Exception as e: print(f"โŒ Step 1 failed: {e}") # ========================= # Step 1.5: Poster2Poster ๅ†…ๅฎน็”Ÿๆˆ # ========================= try: run_paper2poster_content_build() except Exception as e: print(f"โŒ Step 1.5 failed: {e}") # ========================= # Step 2: Build Poster # ========================= try: print("๐Ÿงฉ Step 2: Building poster ...") build_poster() print("โœ… Step 2 done.") except Exception as e: print(f"โŒ Step 2 failed: {e}") # ========================= # Step 3: ๅฏผๅ‡บ latex_proj # ========================= try: src_lp = PB_ROOT / "latex_proj" dst_lp = ROOT_DIR / "output" / "poster_latex_proj" copytree_overwrite(src_lp, dst_lp) print(f"๐Ÿ“ฆ Exported LaTeX project โ†’ {dst_lp.relative_to(ROOT_DIR)}") except Exception as e: print(f"โŒ Step 3 failed: {e}") print("โœ… Pipeline completed.")