yzweak commited on
Commit
ca1833b
·
1 Parent(s): ec5f146

Limit page processing to first 20 pages

Browse files
pragent/backend/figure_table_pipeline.py CHANGED
@@ -47,6 +47,10 @@ async def run_figure_extraction(pdf_path: str, base_work_dir: str, progress: Any
47
  tqdm.write(f"[!] 错误:加载或转换PDF时失败: {e}")
48
  return None
49
 
 
 
 
 
50
  tqdm.write(f"\n--- 步骤 2/3: 分析页面布局以裁剪图和表 ---")
51
  cropped_results_dir = os.path.join(base_work_dir, "cropped_results", pdf_stem)
52
  num_pages = len(page_image_paths)
 
47
  tqdm.write(f"[!] 错误:加载或转换PDF时失败: {e}")
48
  return None
49
 
50
+ if len(page_image_paths) > 20:
51
+ tqdm.write(f"[!] Warning: PDF has {len(page_image_paths)} pages. Processing only the first 20 pages to avoid timeout.")
52
+ page_image_paths = page_image_paths[:20]
53
+
54
  tqdm.write(f"\n--- 步骤 2/3: 分析页面布局以裁剪图和表 ---")
55
  cropped_results_dir = os.path.join(base_work_dir, "cropped_results", pdf_stem)
56
  num_pages = len(page_image_paths)