Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -19,7 +19,7 @@ def get_debug_info():
|
|
| 19 |
now = datetime.now().strftime("%H:%M:%S")
|
| 20 |
files = os.listdir(PAPERS_DIR) if os.path.exists(PAPERS_DIR) else "Directory missing"
|
| 21 |
|
| 22 |
-
# 检查输出目录
|
| 23 |
output_status = "Not generated"
|
| 24 |
if os.path.exists(OUTPUT_DIR):
|
| 25 |
out_files = os.listdir(OUTPUT_DIR)
|
|
@@ -75,29 +75,35 @@ def run_mineru_parsing():
|
|
| 75 |
return "❌ 未发现已上传的 PDF 文件,请先执行步骤 2。", get_debug_info()
|
| 76 |
|
| 77 |
try:
|
| 78 |
-
# 2. 设置环境变量
|
| 79 |
env = os.environ.copy()
|
| 80 |
env["MINERU_FORMULA_ENABLE"] = "false"
|
| 81 |
env["MINERU_TABLE_ENABLE"] = "false"
|
| 82 |
-
# 强制指定为 cpu 模式,防止 mineru 内部检测 torch/cuda 时报错
|
| 83 |
env["MINERU_DEVICE_MODE"] = "cpu"
|
|
|
|
|
|
|
| 84 |
|
| 85 |
# 3. 执行 Mineru 命令
|
| 86 |
command = ["mineru", "-p", "papers", "-o", "mineru_outputs"]
|
| 87 |
|
| 88 |
-
|
|
|
|
| 89 |
|
| 90 |
-
#
|
|
|
|
| 91 |
result = subprocess.run(command, env=env, capture_output=True, text=True)
|
| 92 |
|
| 93 |
if result.returncode == 0:
|
|
|
|
| 94 |
return "✅ PDF解析完成", get_debug_info()
|
| 95 |
else:
|
| 96 |
-
#
|
|
|
|
| 97 |
return f"❌ 解析过程中出错: {result.stderr}", get_debug_info()
|
| 98 |
|
| 99 |
except Exception as e:
|
| 100 |
return f"❌ 执行命令时发生异常: {str(e)}", get_debug_info()
|
|
|
|
| 101 |
# --- 构建单页 UI ---
|
| 102 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 103 |
gr.Markdown("# 📑 PDF 助手管理后台")
|
|
@@ -126,7 +132,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 126 |
# 第三部分:解析步骤
|
| 127 |
with gr.Group():
|
| 128 |
gr.Markdown("### 3. 解析 PDF")
|
| 129 |
-
gr.Markdown("<small>
|
| 130 |
parse_btn = gr.Button("🚀 开始解析 (Run Mineru)", variant="secondary")
|
| 131 |
parse_status = gr.Textbox(label="解析进度/结果", interactive=False)
|
| 132 |
|
|
|
|
| 19 |
now = datetime.now().strftime("%H:%M:%S")
|
| 20 |
files = os.listdir(PAPERS_DIR) if os.path.exists(PAPERS_DIR) else "Directory missing"
|
| 21 |
|
| 22 |
+
# 检查输出目录状态
|
| 23 |
output_status = "Not generated"
|
| 24 |
if os.path.exists(OUTPUT_DIR):
|
| 25 |
out_files = os.listdir(OUTPUT_DIR)
|
|
|
|
| 75 |
return "❌ 未发现已上传的 PDF 文件,请先执行步骤 2。", get_debug_info()
|
| 76 |
|
| 77 |
try:
|
| 78 |
+
# 2. 设置环境变量 (包含你发现的虚拟显存设置)
|
| 79 |
env = os.environ.copy()
|
| 80 |
env["MINERU_FORMULA_ENABLE"] = "false"
|
| 81 |
env["MINERU_TABLE_ENABLE"] = "false"
|
|
|
|
| 82 |
env["MINERU_DEVICE_MODE"] = "cpu"
|
| 83 |
+
# 核心修改:添加虚拟显存大小设置,规避 torch 检测错误
|
| 84 |
+
env["MINERU_VIRTUAL_VRAM_SIZE"] = "8"
|
| 85 |
|
| 86 |
# 3. 执行 Mineru 命令
|
| 87 |
command = ["mineru", "-p", "papers", "-o", "mineru_outputs"]
|
| 88 |
|
| 89 |
+
# 将启动信息打印到 stderr 以便在 HF Log 中查看
|
| 90 |
+
print(f">>> 开始解析任务: {' '.join(command)}", file=sys.stderr, flush=True)
|
| 91 |
|
| 92 |
+
# 执行子进程
|
| 93 |
+
# capture_output=True 将捕获输出,text=True 将结果作为字符串处理
|
| 94 |
result = subprocess.run(command, env=env, capture_output=True, text=True)
|
| 95 |
|
| 96 |
if result.returncode == 0:
|
| 97 |
+
print(">>> 解析任务成功结束", file=sys.stderr, flush=True)
|
| 98 |
return "✅ PDF解析完成", get_debug_info()
|
| 99 |
else:
|
| 100 |
+
# 打印详细错误到后台日志
|
| 101 |
+
print(f">>> 解析任务失败: {result.stderr}", file=sys.stderr, flush=True)
|
| 102 |
return f"❌ 解析过程中出错: {result.stderr}", get_debug_info()
|
| 103 |
|
| 104 |
except Exception as e:
|
| 105 |
return f"❌ 执行命令时发生异常: {str(e)}", get_debug_info()
|
| 106 |
+
|
| 107 |
# --- 构建单页 UI ---
|
| 108 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 109 |
gr.Markdown("# 📑 PDF 助手管理后台")
|
|
|
|
| 132 |
# 第三部分:解析步骤
|
| 133 |
with gr.Group():
|
| 134 |
gr.Markdown("### 3. 解析 PDF")
|
| 135 |
+
gr.Markdown("<small>已应用环境变量:MINERU_VIRTUAL_VRAM_SIZE=8</small>")
|
| 136 |
parse_btn = gr.Button("🚀 开始解析 (Run Mineru)", variant="secondary")
|
| 137 |
parse_status = gr.Textbox(label="解析进度/结果", interactive=False)
|
| 138 |
|