| import logging | |
| import os | |
| import glob | |
| from main import generate_paper_poster | |
| from tqdm import tqdm | |
| import concurrent.futures | |
| def process_papers(input_dir, output_dir, url, model): | |
| os.makedirs(output_dir, exist_ok=True) | |
| paper_files = os.listdir(input_dir) | |
| pdf_files = [ | |
| os.path.join(input_dir, file, "paper.pdf") | |
| for file in paper_files | |
| if os.path.isdir(os.path.join(input_dir, file)) | |
| ] | |
| def process_single_pdf(pdf_file): | |
| try: | |
| file_id = os.path.basename(os.path.dirname(pdf_file)) | |
| poster_dir = os.path.join(output_dir, file_id) | |
| os.makedirs(poster_dir, exist_ok=True) | |
| output_file = os.path.join(poster_dir, "poster.json") | |
| output_png = os.path.join(poster_dir, "poster.png") | |
| if os.path.exists(output_file) and os.path.exists(output_png): | |
| print(f"跳过已存在的文件: {output_file}") | |
| return | |
| generate_paper_poster( | |
| url=url, | |
| pdf=pdf_file, | |
| model=model, | |
| output=output_file, | |
| text_prompt=" ", | |
| figures_prompt=" ", | |
| ) | |
| print(f"成功生成: {output_file}") | |
| except Exception as e: | |
| print(f"处理文件 {pdf_file} 时出错: {e}") | |
| with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor: | |
| futures = [ | |
| executor.submit(process_single_pdf, pdf_file) for pdf_file in pdf_files | |
| ] | |
| for _ in tqdm( | |
| concurrent.futures.as_completed(futures), | |
| total=len(futures), | |
| desc=f"处理文件 {model}", | |
| ): | |
| pass | |
| if __name__ == "__main__": | |
| url = "" | |
| input_dir = "eval/data" | |
| models = [] | |
| for model in models: | |
| output_dir = f"eval/temp-v2/{model.replace('/', '-')}" | |
| process_papers(input_dir, output_dir, url, model) | |