Spaces:
Running
Running
Limit page processing to first 20 pages
Browse files
pragent/backend/figure_table_pipeline.py
CHANGED
|
@@ -47,6 +47,10 @@ async def run_figure_extraction(pdf_path: str, base_work_dir: str, progress: Any
|
|
| 47 |
tqdm.write(f"[!] 错误:加载或转换PDF时失败: {e}")
|
| 48 |
return None
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
tqdm.write(f"\n--- 步骤 2/3: 分析页面布局以裁剪图和表 ---")
|
| 51 |
cropped_results_dir = os.path.join(base_work_dir, "cropped_results", pdf_stem)
|
| 52 |
num_pages = len(page_image_paths)
|
|
|
|
| 47 |
tqdm.write(f"[!] 错误:加载或转换PDF时失败: {e}")
|
| 48 |
return None
|
| 49 |
|
| 50 |
+
if len(page_image_paths) > 20:
|
| 51 |
+
tqdm.write(f"[!] Warning: PDF has {len(page_image_paths)} pages. Processing only the first 20 pages to avoid timeout.")
|
| 52 |
+
page_image_paths = page_image_paths[:20]
|
| 53 |
+
|
| 54 |
tqdm.write(f"\n--- 步骤 2/3: 分析页面布局以裁剪图和表 ---")
|
| 55 |
cropped_results_dir = os.path.join(base_work_dir, "cropped_results", pdf_stem)
|
| 56 |
num_pages = len(page_image_paths)
|