Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -71,10 +71,12 @@ def save_api_settings(api_key, api_base_url=None):
|
|
| 71 |
|
| 72 |
|
| 73 |
def run_mineru_parsing_and_dag_gen():
|
| 74 |
-
"""执行 PDF 解析并捕获完整日志,随后执行DAG生成流程"""
|
| 75 |
if not os.path.exists(PAPERS_DIR) or not any(f.endswith('.pdf') for f in os.listdir(PAPERS_DIR)):
|
| 76 |
-
|
|
|
|
| 77 |
|
|
|
|
| 78 |
try:
|
| 79 |
# ================= 第一步:执行 Mineru 解析 =================
|
| 80 |
env = os.environ.copy()
|
|
@@ -85,49 +87,64 @@ def run_mineru_parsing_and_dag_gen():
|
|
| 85 |
|
| 86 |
command_mineru = ["mineru", "-p", PAPERS_DIR, "-o", OUTPUT_DIR]
|
| 87 |
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
command_mineru,
|
| 90 |
env=env,
|
| 91 |
-
|
|
|
|
| 92 |
text=True,
|
| 93 |
-
|
| 94 |
)
|
| 95 |
|
| 96 |
-
#
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
# ================= 第二步:执行 DAG 生成 =================
|
| 105 |
-
# 使用 sys.executable 确保使用当前虚拟环境的 Python 解释器
|
| 106 |
command_dag = [sys.executable, "gen_dag.py"]
|
| 107 |
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
| 109 |
command_dag,
|
| 110 |
-
|
|
|
|
| 111 |
text=True,
|
| 112 |
-
|
| 113 |
)
|
| 114 |
|
| 115 |
-
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
-
if
|
| 119 |
status = "✅ PDF解析与DAG生成全部完成"
|
| 120 |
else:
|
| 121 |
-
status = f"❌ DAG生成失败 (Exit Code: {
|
| 122 |
|
| 123 |
-
|
| 124 |
|
| 125 |
-
except subprocess.TimeoutExpired as e:
|
| 126 |
-
error_log = f"Execution timed out:\n{str(e)}"
|
| 127 |
-
return "❌ 运行超时", get_debug_info(), error_log
|
| 128 |
except Exception as e:
|
| 129 |
-
error_log = f"Exception occurred
|
| 130 |
-
|
| 131 |
|
| 132 |
def run_final_generation(task_type="all"):
|
| 133 |
"""
|
|
|
|
| 71 |
|
| 72 |
|
| 73 |
def run_mineru_parsing_and_dag_gen():
|
| 74 |
+
"""执行 PDF 解析并捕获完整日志,随后执行DAG生成流程(支持实时流式输出)"""
|
| 75 |
if not os.path.exists(PAPERS_DIR) or not any(f.endswith('.pdf') for f in os.listdir(PAPERS_DIR)):
|
| 76 |
+
yield "❌ 未发现 PDF 文件", get_debug_info(), "No execution logs."
|
| 77 |
+
return
|
| 78 |
|
| 79 |
+
full_log = ""
|
| 80 |
try:
|
| 81 |
# ================= 第一步:执行 Mineru 解析 =================
|
| 82 |
env = os.environ.copy()
|
|
|
|
| 87 |
|
| 88 |
command_mineru = ["mineru", "-p", PAPERS_DIR, "-o", OUTPUT_DIR]
|
| 89 |
|
| 90 |
+
full_log += "--- Mineru 执行中 ---\n"
|
| 91 |
+
yield "⏳ 正在执行 Mineru 解析...", get_debug_info(), full_log
|
| 92 |
+
|
| 93 |
+
# 1. 使用 Popen 替代 run,开启实时流
|
| 94 |
+
process_mineru = subprocess.Popen(
|
| 95 |
command_mineru,
|
| 96 |
env=env,
|
| 97 |
+
stdout=subprocess.PIPE,
|
| 98 |
+
stderr=subprocess.STDOUT, # 将 stderr 错误流合并到 stdout 一起输出
|
| 99 |
text=True,
|
| 100 |
+
bufsize=1 # 开启行缓冲
|
| 101 |
)
|
| 102 |
|
| 103 |
+
# 2. 逐行读取输出并实时 yield 给 Gradio 界面
|
| 104 |
+
for line in iter(process_mineru.stdout.readline, ''):
|
| 105 |
+
full_log += line
|
| 106 |
+
yield "⏳ 正在执行 Mineru 解析...", get_debug_info(), full_log
|
| 107 |
+
|
| 108 |
+
process_mineru.stdout.close()
|
| 109 |
+
returncode_mineru = process_mineru.wait()
|
| 110 |
+
|
| 111 |
+
# 如果解析失败,直接 yield 返回
|
| 112 |
+
if returncode_mineru != 0:
|
| 113 |
+
status = f"❌ Mineru 解析失败 (Exit Code: {returncode_mineru})"
|
| 114 |
+
yield status, get_debug_info(), full_log
|
| 115 |
+
return
|
| 116 |
|
| 117 |
# ================= 第二步:执行 DAG 生成 =================
|
|
|
|
| 118 |
command_dag = [sys.executable, "gen_dag.py"]
|
| 119 |
|
| 120 |
+
full_log += "\n--- DAG Gen 执行中 ---\n"
|
| 121 |
+
yield "⏳ Mineru 解析完成,正在执行 DAG 生成...", get_debug_info(), full_log
|
| 122 |
+
|
| 123 |
+
process_dag = subprocess.Popen(
|
| 124 |
command_dag,
|
| 125 |
+
stdout=subprocess.PIPE,
|
| 126 |
+
stderr=subprocess.STDOUT,
|
| 127 |
text=True,
|
| 128 |
+
bufsize=1
|
| 129 |
)
|
| 130 |
|
| 131 |
+
for line in iter(process_dag.stdout.readline, ''):
|
| 132 |
+
full_log += line
|
| 133 |
+
yield "⏳ 正在执行 DAG 生成...", get_debug_info(), full_log
|
| 134 |
+
|
| 135 |
+
process_dag.stdout.close()
|
| 136 |
+
returncode_dag = process_dag.wait()
|
| 137 |
|
| 138 |
+
if returncode_dag == 0:
|
| 139 |
status = "✅ PDF解析与DAG生成全部完成"
|
| 140 |
else:
|
| 141 |
+
status = f"❌ DAG生成失败 (Exit Code: {returncode_dag})"
|
| 142 |
|
| 143 |
+
yield status, get_debug_info(), full_log
|
| 144 |
|
|
|
|
|
|
|
|
|
|
| 145 |
except Exception as e:
|
| 146 |
+
error_log = full_log + f"\n[全局异常] Exception occurred:\n{str(e)}"
|
| 147 |
+
yield "❌ 运行异常", get_debug_info(), error_log
|
| 148 |
|
| 149 |
def run_final_generation(task_type="all"):
|
| 150 |
"""
|