snym04 commited on
Commit
1bb0075
·
verified ·
1 Parent(s): 08ff16d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -9
app.py CHANGED
@@ -11,6 +11,7 @@ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
11
  PAPERS_DIR = os.path.join(BASE_DIR, "papers")
12
  CONFIG_PATH = os.path.join(BASE_DIR, "config.yaml")
13
  OUTPUT_DIR = os.path.join(BASE_DIR, "mineru_outputs")
 
14
 
15
  os.makedirs(PAPERS_DIR, exist_ok=True)
16
 
@@ -19,7 +20,7 @@ def get_debug_info():
19
  now = datetime.now().strftime("%H:%M:%S")
20
  files = os.listdir(PAPERS_DIR) if os.path.exists(PAPERS_DIR) else "Directory missing"
21
 
22
- # 递归检查输出目录下的内容,看看到底生成了什么
23
  output_detail = "Not generated"
24
  if os.path.exists(OUTPUT_DIR):
25
  all_output_items = []
@@ -65,19 +66,16 @@ def run_mineru_parsing():
65
  env["MINERU_DEVICE_MODE"] = "cpu"
66
  env["MINERU_VIRTUAL_VRAM_SIZE"] = "8"
67
 
68
- # 尝试使用完整的 mineru 命令
69
  command = ["mineru", "-p", PAPERS_DIR, "-o", OUTPUT_DIR]
70
 
71
- # 使用 subprocess.run 捕获所有输出
72
  result = subprocess.run(
73
  command,
74
  env=env,
75
  capture_output=True,
76
  text=True,
77
- timeout=300 # 设置5分钟超时防止卡死
78
  )
79
 
80
- # 组合 stdout 和 stderr 作为完整的运行日志
81
  full_log = f"--- STDOUT ---\n{result.stdout}\n\n--- STDERR ---\n{result.stderr}"
82
 
83
  if result.returncode == 0:
@@ -91,6 +89,37 @@ def run_mineru_parsing():
91
  error_log = f"Exception occurred during execution:\n{str(e)}"
92
  return "❌ 运行异常", get_debug_info(), error_log
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  # --- UI ---
95
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
96
  gr.Markdown("# 📑 Mineru PDF 解析调试器")
@@ -110,15 +139,21 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
110
  parse_btn = gr.Button("🚀 Run Mineru (CPU Mode)", variant="primary")
111
  parse_status = gr.Textbox(label="运行状态")
112
 
 
 
 
 
 
 
113
  with gr.Column(scale=1):
114
  gr.Markdown("### 🔍 实时系统监控")
115
  debug_view = gr.Textbox(label="文件系统快照", value=get_debug_info(), lines=8, interactive=False)
116
 
117
- gr.Markdown("### 📜 Mineru 终端输出日志")
118
  cmd_logs = gr.Textbox(
119
  label="Command Output (Stdout/Stderr)",
120
- placeholder="等待解析任务开始...",
121
- lines=15,
122
  interactive=False
123
  )
124
  refresh_btn = gr.Button("🔄 刷新状态")
@@ -127,11 +162,16 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
127
  key_btn.click(save_api_key, inputs=key_input, outputs=[parse_status, debug_view])
128
  pdf_btn.click(save_pdf, inputs=pdf_input, outputs=[parse_status, debug_view])
129
 
130
- # 解析按钮会更新三个地方:状态、文件监控、详细日志
131
  parse_btn.click(
132
  fn=run_mineru_parsing,
133
  outputs=[parse_status, debug_view, cmd_logs]
134
  )
 
 
 
 
 
 
135
 
136
  refresh_btn.click(get_debug_info, outputs=debug_view)
137
 
 
11
  PAPERS_DIR = os.path.join(BASE_DIR, "papers")
12
  CONFIG_PATH = os.path.join(BASE_DIR, "config.yaml")
13
  OUTPUT_DIR = os.path.join(BASE_DIR, "mineru_outputs")
14
+ ZIP_OUTPUT_PATH = os.path.join(BASE_DIR, "mineru_results.zip") # 压缩包路径
15
 
16
  os.makedirs(PAPERS_DIR, exist_ok=True)
17
 
 
20
  now = datetime.now().strftime("%H:%M:%S")
21
  files = os.listdir(PAPERS_DIR) if os.path.exists(PAPERS_DIR) else "Directory missing"
22
 
23
+ # 递归检查输出目录下的内容
24
  output_detail = "Not generated"
25
  if os.path.exists(OUTPUT_DIR):
26
  all_output_items = []
 
66
  env["MINERU_DEVICE_MODE"] = "cpu"
67
  env["MINERU_VIRTUAL_VRAM_SIZE"] = "8"
68
 
 
69
  command = ["mineru", "-p", PAPERS_DIR, "-o", OUTPUT_DIR]
70
 
 
71
  result = subprocess.run(
72
  command,
73
  env=env,
74
  capture_output=True,
75
  text=True,
76
+ timeout=300
77
  )
78
 
 
79
  full_log = f"--- STDOUT ---\n{result.stdout}\n\n--- STDERR ---\n{result.stderr}"
80
 
81
  if result.returncode == 0:
 
89
  error_log = f"Exception occurred during execution:\n{str(e)}"
90
  return "❌ 运行异常", get_debug_info(), error_log
91
 
92
+ def run_final_generation():
93
+ """执行 python main.py 并压缩结果"""
94
+ if not os.path.exists(OUTPUT_DIR):
95
+ return "❌ 请先执行第二步解析", get_debug_info(), "No output folder found.", None
96
+
97
+ try:
98
+ # 1. 运行 python main.py
99
+ command = [sys.executable, "main.py"]
100
+ result = subprocess.run(
101
+ command,
102
+ capture_output=True,
103
+ text=True,
104
+ timeout=600 # 适当增加超时时间
105
+ )
106
+
107
+ full_log = f"--- STDOUT ---\n{result.stdout}\n\n--- STDERR ---\n{result.stderr}"
108
+
109
+ if result.returncode != 0:
110
+ return f"❌ 生成失败 (Exit Code: {result.returncode})", get_debug_info(), full_log, None
111
+
112
+ # 2. 压缩 mineru_outputs 文件夹
113
+ # shutil.make_archive 会自动加上 .zip 后缀,所以 base_name 不带后缀
114
+ zip_base_name = ZIP_OUTPUT_PATH.replace(".zip", "")
115
+ shutil.make_archive(zip_base_name, 'zip', OUTPUT_DIR)
116
+
117
+ return "✅ 最终生成并压缩完成", get_debug_info(), full_log, ZIP_OUTPUT_PATH
118
+
119
+ except Exception as e:
120
+ error_log = f"Exception occurred during final generation:\n{str(e)}"
121
+ return "❌ 最终生成异常", get_debug_info(), error_log, None
122
+
123
  # --- UI ---
124
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
125
  gr.Markdown("# 📑 Mineru PDF 解析调试器")
 
139
  parse_btn = gr.Button("🚀 Run Mineru (CPU Mode)", variant="primary")
140
  parse_status = gr.Textbox(label="运行状态")
141
 
142
+ with gr.Group():
143
+ gr.Markdown("### 3. 最终生成")
144
+ gen_btn = gr.Button("🔨 执行 main.py 并打包", variant="primary")
145
+ gen_status = gr.Textbox(label="生成状态")
146
+ download_file = gr.File(label="下载压缩后的结果", interactive=False)
147
+
148
  with gr.Column(scale=1):
149
  gr.Markdown("### 🔍 实时系统监控")
150
  debug_view = gr.Textbox(label="文件系统快照", value=get_debug_info(), lines=8, interactive=False)
151
 
152
+ gr.Markdown("### 📜 终端输出日志")
153
  cmd_logs = gr.Textbox(
154
  label="Command Output (Stdout/Stderr)",
155
+ placeholder="等待任务开始...",
156
+ lines=20,
157
  interactive=False
158
  )
159
  refresh_btn = gr.Button("🔄 刷新状态")
 
162
  key_btn.click(save_api_key, inputs=key_input, outputs=[parse_status, debug_view])
163
  pdf_btn.click(save_pdf, inputs=pdf_input, outputs=[parse_status, debug_view])
164
 
 
165
  parse_btn.click(
166
  fn=run_mineru_parsing,
167
  outputs=[parse_status, debug_view, cmd_logs]
168
  )
169
+
170
+ # 最终生成逻辑绑定
171
+ gen_btn.click(
172
+ fn=run_final_generation,
173
+ outputs=[gen_status, debug_view, cmd_logs, download_file]
174
+ )
175
 
176
  refresh_btn.click(get_debug_info, outputs=debug_view)
177