Laramie2 commited on
Commit
d9bf1c6
·
verified ·
1 Parent(s): 69437ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +275 -314
app.py CHANGED
@@ -1,115 +1,75 @@
1
  import gradio as gr
2
  import os
 
3
  import shutil
4
  import subprocess
5
  import sys
6
  import queue
7
  import threading
8
- import uuid
9
  from datetime import datetime
10
  from concurrent.futures import ThreadPoolExecutor, as_completed
11
  from typing import Iterable
12
  from gradio.themes import Soft
13
  from gradio.themes.utils import colors, fonts, sizes
14
 
 
 
 
 
 
 
 
15
 
16
- import threading
17
- import subprocess
18
- # ==========================================
19
- # --- 🌐 异步安装 Playwright 浏览器 ---
20
- # ==========================================
21
- def setup_playwright():
22
- """在后台静默安装 Playwright,防止阻塞 Gradio 启动导致 HF 500 超时"""
23
- try:
24
- import playwright
25
- print("⏳ [System] Downloading Playwright Chromium in background...")
26
- # 增加 --with-deps 尝试安装系统级依赖 (虽然在非 root 容器可能失效,但有备无患)
27
- subprocess.run(["playwright", "install", "chromium"], check=True)
28
- print("✅ [System] Playwright browsers ready.")
29
- except Exception as e:
30
- print(f"❌ [System] Playwright setup failed: {e}")
31
-
32
- # 这一步非常关键:启动一个后台守护线程去下载,主进程直接往下走!
33
- threading.Thread(target=setup_playwright, daemon=True).start()
34
-
35
- # ==========================================
36
- # --- 📁 全局目录配置 (修改为 Session 基础目录) ---
37
- # ==========================================
38
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
39
- SESSIONS_BASE_DIR = os.path.join(BASE_DIR, "user_sessions")
40
- os.makedirs(SESSIONS_BASE_DIR, exist_ok=True)
41
-
42
- def get_user_dirs(session_id):
43
- """根据 Session ID 生成用户专属的隔离目录"""
44
- user_base = os.path.join(SESSIONS_BASE_DIR, session_id)
45
- papers_dir = os.path.join(user_base, "papers")
46
- output_dir = os.path.join(user_base, "mineru_outputs")
47
- zip_path = os.path.join(user_base, "mineru_results.zip")
48
-
49
- os.makedirs(papers_dir, exist_ok=True)
50
- os.makedirs(output_dir, exist_ok=True)
51
- return papers_dir, output_dir, zip_path
52
-
53
-
54
- import time
55
-
56
- # ==========================================
57
- # --- 🧹 垃圾回收 (后台清理过期 Session) ---
58
- # ==========================================
59
- # 设定 Session 过期时间(例如:2 小时 = 7200 秒)
60
- SESSION_MAX_AGE_SECONDS = 2 * 60 * 60
61
- # 设定清理器执行间隔(例如:每 30 分钟扫描一次 = 1800 秒)
62
- CLEANUP_INTERVAL_SECONDS = 30 * 60
63
-
64
- def cleanup_expired_sessions():
65
- """后台运行的垃圾回收任务"""
66
- while True:
67
- try:
68
- if os.path.exists(SESSIONS_BASE_DIR):
69
- current_time = time.time()
70
- for session_folder in os.listdir(SESSIONS_BASE_DIR):
71
- folder_path = os.path.join(SESSIONS_BASE_DIR, session_folder)
72
-
73
- # 确保只处理目录
74
- if os.path.isdir(folder_path):
75
- # 获取文件夹的最后修改时间
76
- folder_mtime = os.path.getmtime(folder_path)
77
-
78
- # 判断是否超过了最大存活时间
79
- if (current_time - folder_mtime) > SESSION_MAX_AGE_SECONDS:
80
- try:
81
- shutil.rmtree(folder_path)
82
- print(f"🧹 [Garbage Collector] Deleted expired session: {session_folder}")
83
- except Exception as e:
84
- print(f"⚠️ [Garbage Collector] Failed to delete {session_folder}: {e}")
85
- except Exception as e:
86
- print(f"⚠️ [Garbage Collector] Error during cleanup scan: {e}")
87
-
88
- # 休眠到下一次扫描时间
89
- time.sleep(CLEANUP_INTERVAL_SECONDS)
90
-
91
- def start_garbage_collector():
92
- """启动后台守护线程"""
93
- gc_thread = threading.Thread(target=cleanup_expired_sessions, daemon=True)
94
- gc_thread.start()
95
- print("🚀 [Garbage Collector] Background cleanup service started.")
96
 
 
 
97
 
98
  # ==========================================
99
  # --- 🎨 Custom Purple Theme Definition ---
100
  # ==========================================
101
  colors.purple = colors.Color(
102
- name="purple", c50="#FAF5FF", c100="#F3E8FF", c200="#E9D5FF",
103
- c300="#DAB2FF", c400="#C084FC", c500="#A855F7", c600="#9333EA",
104
- c700="#7E22CE", c800="#6B21A8", c900="#581C87", c950="#3B0764",
 
 
 
 
 
 
 
 
 
105
  )
106
 
107
  class PurpleTheme(Soft):
108
- def __init__(self, **kwargs):
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  super().__init__(
110
- primary_hue=colors.gray, secondary_hue=colors.purple, neutral_hue=colors.slate,
111
- font=(fonts.GoogleFont("Outfit"), "Arial", "sans-serif"),
112
- font_mono=(fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace"),
 
 
 
113
  )
114
  super().set(
115
  background_fill_primary="*primary_50",
@@ -117,53 +77,48 @@ class PurpleTheme(Soft):
117
  body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
118
  body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
119
  button_primary_text_color="white",
 
120
  button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
121
  button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
 
 
122
  button_secondary_text_color="black",
 
123
  button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
 
 
 
124
  slider_color="*secondary_500",
 
 
125
  block_border_width="3px",
126
  block_shadow="*shadow_drop_lg",
127
  button_primary_shadow="*shadow_drop_lg",
 
 
 
128
  )
129
 
130
  purple_theme = PurpleTheme()
131
 
132
- # ==========================================
133
- # --- 🚀 HTML Progress Bar Components ---
134
- # ==========================================
135
- def empty_progress_html(text="Waiting for action..."):
136
- return f"""
137
- <div class="custom-progress-container" style="background-color: transparent; border: 2px dashed rgba(168, 85, 247, 0.4);">
138
- <div class="custom-progress-text" style="color: #A855F7;">{text}</div>
139
- </div>
140
- """
141
-
142
- def create_progress_html(percent, text, status="active"):
143
- """
144
- status: "active" (紫色滚动条纹), "success" (绿色), "error" (红色)
145
- """
146
- return f"""
147
- <div class="custom-progress-container">
148
- <div class="custom-progress-bar {status}" style="width: {percent}%;"></div>
149
- <div class="custom-progress-text">{text} ({percent}%)</div>
150
- </div>
151
- """
152
-
153
  # ==========================================
154
  # --- ⚙️ Backend Logic & Functions ---
155
  # ==========================================
156
 
157
  def get_tree_html(dir_path):
 
158
  if not os.path.exists(dir_path):
159
  return "<div style='margin-left: 15px; color: #888;'><i>Directory missing</i></div>"
160
 
161
  def build_html(current_path):
162
  html = ""
163
- try: items = sorted(os.listdir(current_path))
164
- except Exception: return ""
 
 
165
 
166
- if not items: return ""
 
167
 
168
  for item in items:
169
  item_path = os.path.join(current_path, item)
@@ -179,14 +134,13 @@ def get_tree_html(dir_path):
179
  content = build_html(dir_path)
180
  return content if content else "<div style='margin-left: 15px; color: #888;'><i>Empty directory</i></div>"
181
 
182
- def get_debug_info(session_id):
183
- papers_dir, output_dir, _ = get_user_dirs(session_id)
184
- papers_tree = get_tree_html(papers_dir)
185
- output_tree = get_tree_html(output_dir)
186
 
187
  html = f"""
188
  <div style="font-family: 'IBM Plex Mono', monospace; font-size: 13px; background-color: #1e1e1e; border: 1px solid #C084FC; border-radius: 8px; padding: 16px; max-height: 400px; overflow-y: auto;">
189
- <div style="color: #888; margin-bottom: 8px;">Session ID: {session_id[:8]}...</div>
190
  <details open style="margin-bottom: 12px; cursor: pointer;">
191
  <summary style="outline: none; font-size: 15px; color: #A855F7;">📁 <b>papers/</b></summary>
192
  {papers_tree}
@@ -199,133 +153,164 @@ def get_debug_info(session_id):
199
  """
200
  return html
201
 
202
- def save_api_settings(api_key, api_base_url, session_id):
203
  if not api_key:
204
- return "❌ Key cannot be empty", get_debug_info(session_id), False, "", ""
205
- success_msg = "✅ Key saved securely in session memory"
206
- if api_base_url: success_msg += ", Base URL updated"
207
- return success_msg, get_debug_info(session_id), True, api_key, api_base_url
208
-
209
- def save_pdf(file, session_id):
210
- if file is None:
211
- return gr.update(visible=False), get_debug_info(session_id), False
212
  try:
213
- papers_dir, _, _ = get_user_dirs(session_id)
214
- for f in os.listdir(papers_dir):
215
- file_to_del = os.path.join(papers_dir, f)
216
- if os.path.isfile(file_to_del): os.remove(file_to_del)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
- file_path = os.path.join(papers_dir, os.path.basename(file.name))
219
  shutil.copy(file.name, file_path)
220
- return gr.update(value=create_progress_html(100, f"✅ PDF Uploaded: {os.path.basename(file.name)}", "success"), visible=True), get_debug_info(session_id), True
 
221
  except Exception as e:
222
- return gr.update(value=create_progress_html(0, f"❌ Error: {str(e)}", "error"), visible=True), get_debug_info(session_id), False
223
 
224
- def clear_pdf(session_id):
 
225
  try:
226
- user_base = os.path.join(SESSIONS_BASE_DIR, session_id)
227
- if os.path.exists(user_base):
228
- shutil.rmtree(user_base)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
 
230
  disable_btn = gr.update(interactive=False)
231
- return gr.update(visible=False), gr.update(visible=False), get_debug_info(session_id), False, disable_btn, disable_btn, disable_btn, disable_btn
 
 
 
232
  except Exception as e:
233
- return gr.update(value=create_progress_html(0, f"❌ Clear Error: {str(e)}", "error"), visible=True), gr.update(), get_debug_info(session_id), False, gr.update(), gr.update(), gr.update(), gr.update()
234
-
235
- def build_user_env(api_key, api_base_url, papers_dir, output_dir):
236
- env = os.environ.copy()
237
- env["MINERU_FORMULA_ENABLE"] = "false"
238
- env["MINERU_TABLE_ENABLE"] = "false"
239
- env["MINERU_DEVICE_MODE"] = "cpu"
240
- env["MINERU_VIRTUAL_VRAM_SIZE"] = "8"
241
- if api_key: env["GEMINI_API_KEY"] = api_key
242
- if api_base_url: env["GEMINI_API_BASE_URL"] = api_base_url
243
- env["USER_PAPERS_DIR"] = papers_dir
244
- env["USER_OUTPUT_DIR"] = output_dir
245
- return env
246
-
247
- def run_mineru_parsing_and_dag_gen(session_id, api_key, api_base_url, progress=gr.Progress()):
248
  no_change = gr.update()
249
  disable_btn = gr.update(interactive=False)
250
- papers_dir, output_dir, _ = get_user_dirs(session_id)
251
 
252
- if not os.path.exists(papers_dir) or not any(f.endswith('.pdf') for f in os.listdir(papers_dir)):
253
- yield gr.update(value=create_progress_html(0, "❌ No PDF file found", "error"), visible=True), get_debug_info(session_id), "No execution logs.", no_change, no_change, no_change, no_change
254
  return
255
 
256
  full_log = ""
257
  try:
258
- env = build_user_env(api_key, api_base_url, papers_dir, output_dir)
259
- command_mineru = ["mineru", "-p", papers_dir, "-o", output_dir]
260
- full_log += f"--- Mineru Executing (Session: {session_id[:8]}) ---\n"
 
 
 
 
 
261
 
262
- # 10%
263
  progress(0.1, desc="启动 Mineru 解析...")
264
- yield gr.update(value=create_progress_html(10, "⏳ Starting Mineru parsing...", "active"), visible=True), get_debug_info(session_id), full_log, no_change, no_change, no_change, no_change
265
 
266
  process_mineru = subprocess.Popen(
267
  command_mineru, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
268
  )
269
 
270
- # 30%
271
  progress(0.3, desc="Mineru 正在解析 PDF...")
272
  for line in iter(process_mineru.stdout.readline, ''):
273
  full_log += line
274
- # ==========================================
275
- # 🔔 双端输出 1:Mineru 阶段
276
- # ==========================================
277
- print(f"[Mineru | {session_id[:6]}] {line}", end="", flush=True)
278
- yield gr.update(value=create_progress_html(30, "⏳ Mineru parsing PDF...", "active"), visible=True), get_debug_info(session_id), full_log, no_change, no_change, no_change, no_change
279
  process_mineru.stdout.close()
280
  returncode_mineru = process_mineru.wait()
281
 
282
  if returncode_mineru != 0:
283
  progress(1.0, desc="Mineru 解析失败")
284
- yield gr.update(value=create_progress_html(30, f"❌ Mineru failed (Code {returncode_mineru})", "error"), visible=True), get_debug_info(session_id), full_log, disable_btn, disable_btn, disable_btn, disable_btn
285
  return
286
 
287
  command_dag = [sys.executable, "gen_dag.py"]
288
  full_log += "\n--- DAG Gen Executing ---\n"
289
 
290
- # 60%
291
  progress(0.6, desc="执行 DAG 生成...")
292
- yield gr.update(value=create_progress_html(60, "⏳ Executing DAG generation...", "active"), visible=True), get_debug_info(session_id), full_log, no_change, no_change, no_change, no_change
293
 
294
  process_dag = subprocess.Popen(
295
- command_dag, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
296
  )
297
 
298
- # 80%
299
  progress(0.8, desc="构建图结构中...")
300
  for line in iter(process_dag.stdout.readline, ''):
301
  full_log += line
302
- # ==========================================
303
- # 🔔 双端输出 2:DAG 生成阶段
304
- # ==========================================
305
- print(f"[DAG | {session_id[:6]}] {line}", end="", flush=True)
306
- yield gr.update(value=create_progress_html(80, "⏳ Building DAG...", "active"), visible=True), get_debug_info(session_id), full_log, no_change, no_change, no_change, no_change
307
  process_dag.stdout.close()
308
  returncode_dag = process_dag.wait()
309
 
 
310
  if returncode_dag == 0:
311
  progress(1.0, desc="解析与构建完成!")
 
312
  enable_btn = gr.update(interactive=True)
313
- yield gr.update(value=create_progress_html(100, "✅ Fully completed", "success"), visible=True), get_debug_info(session_id), full_log, enable_btn, enable_btn, enable_btn, enable_btn
314
  else:
315
  progress(1.0, desc="DAG 生成失败")
316
- yield gr.update(value=create_progress_html(80, "❌ DAG generation failed", "error"), visible=True), get_debug_info(session_id), full_log, disable_btn, disable_btn, disable_btn, disable_btn
 
317
 
318
  except Exception as e:
319
  progress(1.0, desc="发生异常")
320
- error_log = full_log + f"\n[Global Exception]:\n{str(e)}"
321
- print(f"[Exception | {session_id[:6]}] {str(e)}", flush=True)
322
- yield gr.update(value=create_progress_html(0, "❌ Execution Exception", "error"), visible=True), get_debug_info(session_id), error_log, disable_btn, disable_btn, disable_btn, disable_btn
323
 
324
- def run_final_generation(task_type, session_id, api_key, api_base_url, progress=gr.Progress()):
325
- papers_dir, output_dir, zip_path = get_user_dirs(session_id)
326
-
327
- if not os.path.exists(output_dir):
328
- yield gr.update(value=create_progress_html(0, "❌ Please run parsing first", "error"), visible=True), get_debug_info(session_id), "No output folder found.", gr.update(visible=False)
 
 
 
329
  return
330
 
331
  scripts_to_run = []
@@ -333,16 +318,18 @@ def run_final_generation(task_type, session_id, api_key, api_base_url, progress=
333
  elif task_type == "poster": scripts_to_run = ["gen_poster.py"]
334
  elif task_type == "pr": scripts_to_run = ["gen_pr.py"]
335
  elif task_type == "all": scripts_to_run = ["gen_ppt.py", "gen_poster.py", "gen_pr.py"]
 
 
 
 
 
336
 
337
- full_log = f"🚀 Starting {len(scripts_to_run)} tasks for session {session_id[:8]}...\n"
338
- print(f"[GEN Start | {session_id[:6]}] Starting {task_type.upper()}", flush=True)
339
-
340
  progress(0.1, desc=f"启动 {task_type.upper()} 生成任务...")
341
- yield gr.update(value=create_progress_html(10, f"⏳ Starting {task_type.upper()}...", "active"), visible=True), get_debug_info(session_id), full_log, gr.update(visible=False)
342
 
343
  q = queue.Queue()
344
  processes = []
345
- env = build_user_env(api_key, api_base_url, papers_dir, output_dir)
346
 
347
  def enqueue_output(out, script_name):
348
  for line in iter(out.readline, ''):
@@ -352,7 +339,11 @@ def run_final_generation(task_type, session_id, api_key, api_base_url, progress=
352
  try:
353
  for script in scripts_to_run:
354
  p = subprocess.Popen(
355
- [sys.executable, script], env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
 
 
 
 
356
  )
357
  processes.append((script, p))
358
  t = threading.Thread(target=enqueue_output, args=(p.stdout, script))
@@ -360,43 +351,48 @@ def run_final_generation(task_type, session_id, api_key, api_base_url, progress=
360
  t.start()
361
 
362
  active_processes = len(processes)
363
- progress(0.5, desc=f"正在并行生成 {task_type.upper()}...")
364
 
 
 
365
  while active_processes > 0 or not q.empty():
366
  try:
367
  script_name, line = q.get(timeout=0.1)
368
  full_log += f"[{script_name}] {line}"
369
- # ==========================================
370
- # 🔔 双端输出 3:生成阶段
371
- # ==========================================
372
- print(f"[{script_name.upper()} | {session_id[:6]}] {line}", end="", flush=True)
373
- yield gr.update(value=create_progress_html(50, f"⏳ Generating {task_type.upper()}...", "active"), visible=True), get_debug_info(session_id), full_log, gr.update(visible=False)
374
  except queue.Empty:
375
  active_processes = sum(1 for _, p in processes if p.poll() is None)
376
 
377
- success = all(p.returncode == 0 for _, p in processes)
 
 
 
 
378
 
379
  if not success:
380
  progress(1.0, desc="生成失败")
381
- yield gr.update(value=create_progress_html(50, "❌ Tasks failed", "error"), visible=True), get_debug_info(session_id), full_log, gr.update(visible=False)
382
  return
383
 
384
  full_log += "\n📦 Zipping output directory...\n"
 
 
385
  progress(0.9, desc="打包压缩结果...")
386
- yield gr.update(value=create_progress_html(90, "⏳ Zipping outputs...", "active"), visible=True), get_debug_info(session_id), full_log, gr.update(visible=False)
 
 
 
387
 
388
- zip_base_name = zip_path.replace(".zip", "")
389
- shutil.make_archive(zip_base_name, 'zip', output_dir)
390
 
391
- full_log += "✅ All tasks completed successfully.\n"
392
  progress(1.0, desc="全部完成!")
393
- yield gr.update(value=create_progress_html(100, f"✅ {task_type.upper()} Generated", "success"), visible=True), get_debug_info(session_id), full_log, gr.update(value=zip_path, visible=True)
394
 
395
  except Exception as e:
396
  progress(1.0, desc="发生全局异常")
397
- error_log = full_log + f"\n[Global Exception]:\n{str(e)}"
398
- print(f"[Exception | {session_id[:6]}] {str(e)}", flush=True)
399
- yield gr.update(value=create_progress_html(0, "❌ Global exception", "error"), visible=True), get_debug_info(session_id), error_log, gr.update(visible=False)
400
 
401
  # ==========================================
402
  # --- 🚀 UI Configuration & Advanced CSS ---
@@ -456,42 +452,8 @@ body, .gradio-container {
456
  border-color: rgba(168, 85, 247, 0.3) !important;
457
  }
458
 
459
- /* ================= 进度条自定义 CSS ================= */
460
- @keyframes progress-bar-stripes {
461
- from { background-position: 1rem 0; }
462
- to { background-position: 0 0; }
463
- }
464
- .custom-progress-container {
465
- width: 100%;
466
- background-color: rgba(233, 213, 255, 0.3);
467
- border-radius: 12px;
468
- overflow: hidden;
469
- position: relative;
470
- height: 40px;
471
- border: 1px solid rgba(168, 85, 247, 0.3);
472
- box-shadow: inset 0 2px 4px rgba(0,0,0,0.05);
473
- }
474
- .custom-progress-bar {
475
- height: 100%;
476
- border-radius: 12px;
477
- transition: width 0.4s ease;
478
- }
479
- .custom-progress-bar.active {
480
- background-color: #A855F7;
481
- background-image: linear-gradient(45deg, rgba(255, 255, 255, .15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, .15) 50%, rgba(255, 255, 255, .15) 75%, transparent 75%, transparent);
482
- background-size: 1rem 1rem;
483
- animation: progress-bar-stripes 1s linear infinite;
484
- box-shadow: 0 0 10px rgba(168, 85, 247, 0.5);
485
- }
486
- .custom-progress-bar.success { background-image: none; background-color: #10B981; box-shadow: 0 0 10px rgba(16, 185, 129, 0.5);}
487
- .custom-progress-bar.error { background-image: none; background-color: #EF4444; box-shadow: 0 0 10px rgba(239, 68, 68, 0.5);}
488
- .custom-progress-text {
489
- position: absolute; top: 0; left: 0; width: 100%; height: 100%;
490
- display: flex; align-items: center; justify-content: center;
491
- font-weight: 600; color: #581C87; font-size: 14px;
492
- text-shadow: 0 0 4px rgba(255,255,255,0.8);
493
- }
494
- .dark .custom-progress-text { color: #E9D5FF; text-shadow: 0 0 4px rgba(0,0,0,0.8); }
495
 
496
  #pdf-upload-box {
497
  border: 2px dashed rgba(192, 132, 252, 0.6) !important;
@@ -541,7 +503,7 @@ body, .gradio-container {
541
  margin-right: auto !important;
542
  margin-top: 10px !important;
543
  margin-bottom: 10px !important;
544
- display: block !important;
545
  transition: transform 0.3s cubic-bezier(0.175, 0.885, 0.32, 1.275), box-shadow 0.3s ease !important;
546
  box-shadow: 0 4px 15px rgba(126, 34, 206, 0.3) !important;
547
  cursor: pointer !important;
@@ -581,6 +543,7 @@ body, .gradio-container {
581
  }
582
  .action-btn:active { transform: translateY(2px) scale(0.98) !important; box-shadow: 0 2px 10px rgba(147, 51, 234, 0.2) !important; }
583
 
 
584
  .primary-action-btn:disabled, .action-btn:disabled {
585
  background: #e5e7eb !important;
586
  color: #9ca3af !important;
@@ -602,15 +565,13 @@ body, .gradio-container {
602
  ::-webkit-scrollbar-track { background: rgba(168, 85, 247, 0.05); border-radius: 4px; }
603
  ::-webkit-scrollbar-thumb { background: linear-gradient(135deg, #A855F7, #C084FC); border-radius: 4px; }
604
  ::-webkit-scrollbar-thumb:hover { background: linear-gradient(135deg, #9333EA, #A855F7); }
 
605
  details > summary { transition: color 0.2s ease; }
606
  details > summary:hover { color: #E9D5FF !important; }
607
  """
608
 
609
  with gr.Blocks(theme=purple_theme, css=custom_css) as demo:
610
- session_id_state = gr.State("")
611
- user_api_key_state = gr.State("")
612
- user_api_base_state = gr.State("")
613
-
614
  api_saved_state = gr.State(False)
615
  pdf_ready_state = gr.State(False)
616
 
@@ -619,126 +580,126 @@ with gr.Blocks(theme=purple_theme, css=custom_css) as demo:
619
  gr.Markdown("One-click parsing of academic PDFs, DAG structuring, and multi-modal asset generation.", elem_id="subtitle")
620
 
621
  with gr.Row():
 
622
  with gr.Column(scale=1):
 
623
  # 1. API Configuration
624
  with gr.Group(elem_classes="gradio-group"):
625
  gr.Markdown("### ⚙️ 1. Global API Configuration")
626
  with gr.Row():
627
- key_input = gr.Textbox(label="Gemini API Key", type="password", placeholder="sk-...", scale=1)
628
  api_base_url_input = gr.Textbox(label="Base URL (Optional)", placeholder="https://api.example.com", scale=1)
629
  key_btn = gr.Button("💾 Save API Configuration")
630
- api_status = gr.Textbox(show_label=False, interactive=False, elem_classes="status-text")
631
-
 
 
 
632
  # 2. Document Parsing
633
  with gr.Group(elem_classes="gradio-group"):
634
  gr.Markdown("### 📄 2. Document Parsing")
635
- pdf_input = gr.File(label="Upload Document", file_types=[".pdf"], elem_id="pdf-upload-box")
 
 
 
 
 
 
 
636
  parse_btn = gr.Button("🚀 Start Mineru & DAG Extraction", elem_classes="primary-action-btn", interactive=False)
637
 
638
- # 默认隐藏进度条
639
- parse_progress = gr.HTML(visible=False)
 
640
 
641
  # 3. Asset Generation
642
  with gr.Group(elem_classes="gradio-group"):
643
  gr.Markdown("### 🎯 3. Asset Generation")
 
 
644
  with gr.Row(elem_classes="action-row"):
 
645
  gen_ppt_btn = gr.Button("📊 Gen PPT", elem_classes="action-btn", interactive=False)
646
  gen_poster_btn = gr.Button("🖼️ Gen Poster", elem_classes="action-btn", interactive=False)
647
  gen_pr_btn = gr.Button("📰 Gen PR", elem_classes="action-btn", interactive=False)
648
- gen_all_btn = gr.Button("✨ Generate All Assets (ALL)", elem_classes="primary-action-btn", interactive=False)
649
 
650
- # 默认隐藏进度条
651
- gen_progress = gr.HTML(visible=False)
652
 
 
653
  with gr.Column(scale=1):
 
654
  # 4. Results & Downloads
655
  with gr.Group(elem_classes="gradio-group"):
656
  gr.Markdown("### 📦 Generation Results & Download")
657
-
658
- download_placeholder = gr.HTML(
659
- '''
660
- <div style="display: flex; flex-direction: column; align-items: center; justify-content: center; height: 160px; border: 2px dashed rgba(192, 132, 252, 0.6); border-radius: 16px; background-color: rgba(250, 245, 255, 0.5); color: #9333EA; text-align: center; transition: all 0.3s ease;">
661
- <span style="font-size: 32px; margin-bottom: 8px;">📦</span>
662
- <span style="font-weight: 600; font-size: 16px; margin-bottom: 4px;">Awaiting Generation</span>
663
- <span style="font-size: 13px; color: #A855F7; opacity: 0.8;">Generated assets will appear here as a downloadable ZIP archive.</span>
664
- </div>
665
- '''
666
  )
667
-
668
  download_file = gr.File(label="📥 Get Final Zip Archive", interactive=False, visible=False)
669
 
670
- # 5. Debugging
671
  with gr.Group(elem_classes="gradio-group"):
672
  gr.Markdown("### 🛠️ Developer Monitoring (Debug Only)")
673
  with gr.Tabs():
674
  with gr.Tab("📜 Terminal Stream"):
675
- cmd_logs = gr.Textbox(show_label=False, lines=14, interactive=False, elem_classes="log-box")
 
 
 
676
  with gr.Tab("🔍 System Snapshot"):
677
  refresh_btn = gr.Button("🔄 Refresh Directory Tree")
678
- debug_view = gr.HTML()
679
-
680
- # ================= LOGIC BINDINGS =================
681
-
682
- def init_app_for_user():
683
- new_session_id = str(uuid.uuid4())
684
- debug_html = get_debug_info(new_session_id)
685
- return new_session_id, debug_html
686
-
687
- demo.load(fn=init_app_for_user, inputs=None, outputs=[session_id_state, debug_view])
688
 
 
 
 
 
689
  key_btn.click(
690
  fn=save_api_settings,
691
- inputs=[key_input, api_base_url_input, session_id_state],
692
- outputs=[api_status, debug_view, api_saved_state, user_api_key_state, user_api_base_state]
693
  )
694
 
695
- pdf_input.upload(
696
- fn=save_pdf,
697
- inputs=[pdf_input, session_id_state],
698
- outputs=[parse_progress, debug_view, pdf_ready_state]
699
- )
700
 
 
 
701
  pdf_input.clear(
702
  fn=clear_pdf,
703
- inputs=[session_id_state],
704
- outputs=[parse_progress, gen_progress, debug_view, pdf_ready_state, gen_ppt_btn, gen_poster_btn, gen_pr_btn, gen_all_btn]
705
  )
706
 
 
707
  def check_parse_btn_ready(api_ready, pdf_ready):
708
  return gr.update(interactive=(api_ready and pdf_ready))
709
 
710
  api_saved_state.change(fn=check_parse_btn_ready, inputs=[api_saved_state, pdf_ready_state], outputs=parse_btn)
711
  pdf_ready_state.change(fn=check_parse_btn_ready, inputs=[api_saved_state, pdf_ready_state], outputs=parse_btn)
712
 
 
 
713
  parse_btn.click(
714
  fn=run_mineru_parsing_and_dag_gen,
715
- inputs=[session_id_state, user_api_key_state, user_api_base_state],
716
- outputs=[parse_progress, debug_view, cmd_logs, gen_ppt_btn, gen_poster_btn, gen_pr_btn, gen_all_btn]
717
  )
718
 
719
- def trigger_gen_ppt(sid, ak, ab, progress=gr.Progress()): yield from run_final_generation("ppt", sid, ak, ab, progress)
720
- def trigger_gen_poster(sid, ak, ab, progress=gr.Progress()): yield from run_final_generation("poster", sid, ak, ab, progress)
721
- def trigger_gen_pr(sid, ak, ab, progress=gr.Progress()): yield from run_final_generation("pr", sid, ak, ab, progress)
722
- def trigger_gen_all(sid, ak, ab, progress=gr.Progress()): yield from run_final_generation("all", sid, ak, ab, progress)
723
-
724
- gen_ppt_btn.click(fn=trigger_gen_ppt, inputs=[session_id_state, user_api_key_state, user_api_base_state], outputs=[gen_progress, debug_view, cmd_logs, download_file])
725
- gen_poster_btn.click(fn=trigger_gen_poster, inputs=[session_id_state, user_api_key_state, user_api_base_state], outputs=[gen_progress, debug_view, cmd_logs, download_file])
726
- gen_pr_btn.click(fn=trigger_gen_pr, inputs=[session_id_state, user_api_key_state, user_api_base_state], outputs=[gen_progress, debug_view, cmd_logs, download_file])
727
- gen_all_btn.click(fn=trigger_gen_all, inputs=[session_id_state, user_api_key_state, user_api_base_state], outputs=[gen_progress, debug_view, cmd_logs, download_file])
 
 
728
 
729
- refresh_btn.click(fn=get_debug_info, inputs=[session_id_state], outputs=debug_view)
730
-
731
- def toggle_empty_placeholder(file_val):
732
- return gr.update(visible=(file_val is None))
733
-
734
- download_file.change(
735
- fn=toggle_empty_placeholder,
736
- inputs=[download_file],
737
- outputs=[download_placeholder]
738
- )
739
 
740
  if __name__ == "__main__":
741
- start_garbage_collector()
742
- # 并发放宽至 5
743
- # demo.queue(default_concurrency_limit=5).launch()
744
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import gradio as gr
2
  import os
3
+ import yaml
4
  import shutil
5
  import subprocess
6
  import sys
7
  import queue
8
  import threading
 
9
  from datetime import datetime
10
  from concurrent.futures import ThreadPoolExecutor, as_completed
11
  from typing import Iterable
12
  from gradio.themes import Soft
13
  from gradio.themes.utils import colors, fonts, sizes
14
 
15
+ # 自动安装 Playwright 浏览器(如果不存在)
16
+ try:
17
+ import playwright
18
+ # 检查是否已经安装了浏览器,没有则下载
19
+ subprocess.run(["playwright", "install", "chromium"], check=True)
20
+ except Exception as e:
21
+ print(f"Playwright setup failed: {e}")
22
 
23
+ # Initialize environment paths
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
25
+ PAPERS_DIR = os.path.join(BASE_DIR, "papers")
26
+ CONFIG_PATH = os.path.join(BASE_DIR, "config.yaml")
27
+ OUTPUT_DIR = os.path.join(BASE_DIR, "mineru_outputs")
28
+ ZIP_OUTPUT_PATH = os.path.join(BASE_DIR, "mineru_results.zip")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ os.makedirs(PAPERS_DIR, exist_ok=True)
31
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
32
 
33
  # ==========================================
34
  # --- 🎨 Custom Purple Theme Definition ---
35
  # ==========================================
36
  colors.purple = colors.Color(
37
+ name="purple",
38
+ c50="#FAF5FF",
39
+ c100="#F3E8FF",
40
+ c200="#E9D5FF",
41
+ c300="#DAB2FF",
42
+ c400="#C084FC",
43
+ c500="#A855F7",
44
+ c600="#9333EA",
45
+ c700="#7E22CE",
46
+ c800="#6B21A8",
47
+ c900="#581C87",
48
+ c950="#3B0764",
49
  )
50
 
51
  class PurpleTheme(Soft):
52
+ def __init__(
53
+ self,
54
+ *,
55
+ primary_hue: colors.Color | str = colors.gray,
56
+ secondary_hue: colors.Color | str = colors.purple,
57
+ neutral_hue: colors.Color | str = colors.slate,
58
+ text_size: sizes.Size | str = sizes.text_lg,
59
+ font: fonts.Font | str | Iterable[fonts.Font | str] = (
60
+ fonts.GoogleFont("Outfit"), "Arial", "sans-serif",
61
+ ),
62
+ font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
63
+ fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
64
+ ),
65
+ ):
66
  super().__init__(
67
+ primary_hue=primary_hue,
68
+ secondary_hue=secondary_hue,
69
+ neutral_hue=neutral_hue,
70
+ text_size=text_size,
71
+ font=font,
72
+ font_mono=font_mono,
73
  )
74
  super().set(
75
  background_fill_primary="*primary_50",
 
77
  body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
78
  body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
79
  button_primary_text_color="white",
80
+ button_primary_text_color_hover="white",
81
  button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
82
  button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
83
+ button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_700)",
84
+ button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_600)",
85
  button_secondary_text_color="black",
86
+ button_secondary_text_color_hover="white",
87
  button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
88
+ button_secondary_background_fill_hover="linear-gradient(90deg, *primary_400, *primary_400)",
89
+ button_secondary_background_fill_dark="linear-gradient(90deg, *primary_500, *primary_600)",
90
+ button_secondary_background_fill_hover_dark="linear-gradient(90deg, *primary_500, *primary_500)",
91
  slider_color="*secondary_500",
92
+ slider_color_dark="*secondary_600",
93
+ block_title_text_weight="600",
94
  block_border_width="3px",
95
  block_shadow="*shadow_drop_lg",
96
  button_primary_shadow="*shadow_drop_lg",
97
+ button_large_padding="11px",
98
+ color_accent_soft="*primary_100",
99
+ block_label_background_fill="*primary_200",
100
  )
101
 
102
  purple_theme = PurpleTheme()
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  # ==========================================
105
  # --- ⚙️ Backend Logic & Functions ---
106
  # ==========================================
107
 
108
  def get_tree_html(dir_path):
109
+ """递归生成可折叠的 HTML 目录树"""
110
  if not os.path.exists(dir_path):
111
  return "<div style='margin-left: 15px; color: #888;'><i>Directory missing</i></div>"
112
 
113
  def build_html(current_path):
114
  html = ""
115
+ try:
116
+ items = sorted(os.listdir(current_path))
117
+ except Exception:
118
+ return ""
119
 
120
+ if not items:
121
+ return ""
122
 
123
  for item in items:
124
  item_path = os.path.join(current_path, item)
 
134
  content = build_html(dir_path)
135
  return content if content else "<div style='margin-left: 15px; color: #888;'><i>Empty directory</i></div>"
136
 
137
+ def get_debug_info():
138
+ """返回渲染好的目录树 HTML 容器"""
139
+ papers_tree = get_tree_html(PAPERS_DIR)
140
+ output_tree = get_tree_html(OUTPUT_DIR)
141
 
142
  html = f"""
143
  <div style="font-family: 'IBM Plex Mono', monospace; font-size: 13px; background-color: #1e1e1e; border: 1px solid #C084FC; border-radius: 8px; padding: 16px; max-height: 400px; overflow-y: auto;">
 
144
  <details open style="margin-bottom: 12px; cursor: pointer;">
145
  <summary style="outline: none; font-size: 15px; color: #A855F7;">📁 <b>papers/</b></summary>
146
  {papers_tree}
 
153
  """
154
  return html
155
 
156
+ def save_api_settings(api_key, api_base_url=None):
157
  if not api_key:
158
+ return "❌ Key cannot be empty", get_debug_info(), False
 
 
 
 
 
 
 
159
  try:
160
+ config = {}
161
+ if os.path.exists(CONFIG_PATH):
162
+ with open(CONFIG_PATH, "r", encoding="utf-8") as f:
163
+ config = yaml.safe_load(f) or {}
164
+ config.setdefault("api_keys", {})["gemini_api_key"] = api_key
165
+ if api_base_url:
166
+ config["api_base_url"] = api_base_url
167
+ with open(CONFIG_PATH, "w", encoding="utf-8") as f:
168
+ yaml.dump(config, f, allow_unicode=True)
169
+
170
+ success_msg = "✅ Key saved"
171
+ if api_base_url:
172
+ success_msg += ", Base URL updated"
173
+
174
+ # 返回成功消息和 True (表示 API 已就绪)
175
+ return success_msg, get_debug_info(), True
176
+ except Exception as e:
177
+ return f"❌ Error: {str(e)}", get_debug_info(), False
178
+
179
+ def save_pdf(file):
180
+ if file is None: return "❌ Please select a PDF first", get_debug_info(), False
181
+ try:
182
+ for f in os.listdir(PAPERS_DIR):
183
+ file_to_del = os.path.join(PAPERS_DIR, f)
184
+ if os.path.isfile(file_to_del):
185
+ os.remove(file_to_del)
186
 
187
+ file_path = os.path.join(PAPERS_DIR, os.path.basename(file.name))
188
  shutil.copy(file.name, file_path)
189
+ # 返回成功消息和 True (表示 PDF 已就绪)
190
+ return f"✅ Saved: {os.path.basename(file.name)}", get_debug_info(), True
191
  except Exception as e:
192
+ return f"❌ Error: {str(e)}", get_debug_info(), False
193
 
194
+ def clear_pdf():
195
+ """清空 PDF 并锁定后续所有步骤"""
196
  try:
197
+ deleted_files = []
198
+ deleted_dirs = []
199
+
200
+ pdf_names = [f for f in os.listdir(PAPERS_DIR) if f.endswith('.pdf')]
201
+
202
+ for f in os.listdir(PAPERS_DIR):
203
+ file_to_del = os.path.join(PAPERS_DIR, f)
204
+ if os.path.isfile(file_to_del):
205
+ os.remove(file_to_del)
206
+ deleted_files.append(f)
207
+
208
+ if os.path.exists(OUTPUT_DIR):
209
+ for pdf_name in pdf_names:
210
+ base_name = os.path.splitext(pdf_name)[0]
211
+ base_name_us = base_name.replace(" ", "_")
212
+
213
+ for item in os.listdir(OUTPUT_DIR):
214
+ if item in [base_name, base_name_us, pdf_name]:
215
+ dir_to_del = os.path.join(OUTPUT_DIR, item)
216
+ if os.path.isdir(dir_to_del):
217
+ shutil.rmtree(dir_to_del)
218
+ deleted_dirs.append(item)
219
+
220
+ if os.path.exists(ZIP_OUTPUT_PATH):
221
+ os.remove(ZIP_OUTPUT_PATH)
222
 
223
  disable_btn = gr.update(interactive=False)
224
+ # 返回:状态文本, debug视窗, pdf_ready=False, 以及锁定4个生成按钮
225
+ if deleted_files or deleted_dirs:
226
+ return f"🗑️ Workspace cleared", get_debug_info(), False, disable_btn, disable_btn, disable_btn, disable_btn
227
+ return "ℹ️ Workspace is already empty", get_debug_info(), False, disable_btn, disable_btn, disable_btn, disable_btn
228
  except Exception as e:
229
+ return f"❌ Error deleting file: {str(e)}", get_debug_info(), False, gr.update(), gr.update(), gr.update(), gr.update()
230
+
231
+ def run_mineru_parsing_and_dag_gen(progress=gr.Progress()):
 
 
 
 
 
 
 
 
 
 
 
 
232
  no_change = gr.update()
233
  disable_btn = gr.update(interactive=False)
 
234
 
235
+ if not os.path.exists(PAPERS_DIR) or not any(f.endswith('.pdf') for f in os.listdir(PAPERS_DIR)):
236
+ yield "❌ No PDF file found", get_debug_info(), "No execution logs.", no_change, no_change, no_change, no_change
237
  return
238
 
239
  full_log = ""
240
  try:
241
+ env = os.environ.copy()
242
+ env["MINERU_FORMULA_ENABLE"] = "false"
243
+ env["MINERU_TABLE_ENABLE"] = "false"
244
+ env["MINERU_DEVICE_MODE"] = "cpu"
245
+ env["MINERU_VIRTUAL_VRAM_SIZE"] = "8"
246
+
247
+ command_mineru = ["mineru", "-p", PAPERS_DIR, "-o", OUTPUT_DIR]
248
+ full_log += "--- Mineru Executing ---\n"
249
 
250
+ # 初始化进度条 (10%)
251
  progress(0.1, desc="启动 Mineru 解析...")
252
+ yield "⏳ Executing Mineru parsing...", get_debug_info(), full_log, no_change, no_change, no_change, no_change
253
 
254
  process_mineru = subprocess.Popen(
255
  command_mineru, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
256
  )
257
 
258
+ # 维持在 30%,显示解析中
259
  progress(0.3, desc="Mineru 正在解析 PDF...")
260
  for line in iter(process_mineru.stdout.readline, ''):
261
  full_log += line
262
+ yield "⏳ Executing Mineru parsing...", get_debug_info(), full_log, no_change, no_change, no_change, no_change
 
 
 
 
263
  process_mineru.stdout.close()
264
  returncode_mineru = process_mineru.wait()
265
 
266
  if returncode_mineru != 0:
267
  progress(1.0, desc="Mineru 解析失败")
268
+ yield f"❌ Mineru parsing failed (Exit Code: {returncode_mineru})", get_debug_info(), full_log, disable_btn, disable_btn, disable_btn, disable_btn
269
  return
270
 
271
  command_dag = [sys.executable, "gen_dag.py"]
272
  full_log += "\n--- DAG Gen Executing ---\n"
273
 
274
+ # Mineru 完成,进入 DAG 生成 (60%)
275
  progress(0.6, desc="执行 DAG 生成...")
276
+ yield "⏳ Mineru parsing complete, executing DAG generation...", get_debug_info(), full_log, no_change, no_change, no_change, no_change
277
 
278
  process_dag = subprocess.Popen(
279
+ command_dag, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
280
  )
281
 
282
+ # 维持在 80%
283
  progress(0.8, desc="构建图结构中...")
284
  for line in iter(process_dag.stdout.readline, ''):
285
  full_log += line
286
+ yield "⏳ Executing DAG generation...", get_debug_info(), full_log, no_change, no_change, no_change, no_change
 
 
 
 
287
  process_dag.stdout.close()
288
  returncode_dag = process_dag.wait()
289
 
290
+ # 解析完全成功 (100%)
291
  if returncode_dag == 0:
292
  progress(1.0, desc="解析与构建完成!")
293
+ status = "✅ PDF parsing & DAG generation fully completed"
294
  enable_btn = gr.update(interactive=True)
295
+ yield status, get_debug_info(), full_log, enable_btn, enable_btn, enable_btn, enable_btn
296
  else:
297
  progress(1.0, desc="DAG 生成失败")
298
+ status = f"❌ DAG generation failed (Exit Code: {returncode_dag})"
299
+ yield status, get_debug_info(), full_log, disable_btn, disable_btn, disable_btn, disable_btn
300
 
301
  except Exception as e:
302
  progress(1.0, desc="发生异常")
303
+ error_log = full_log + f"\n[Global Exception] Exception occurred:\n{str(e)}"
304
+ yield " Execution Exception", get_debug_info(), error_log, disable_btn, disable_btn, disable_btn, disable_btn
 
305
 
306
+
307
+ def run_final_generation(task_type="all", progress=gr.Progress()):
308
+ """
309
+ 使用队列和多线程实时获取子进程的流式输出,并通过 yield 推送到前端界面。
310
+ 引入 progress 参数更新进度条。
311
+ """
312
+ if not os.path.exists(OUTPUT_DIR):
313
+ yield "❌ Please run the parsing step first", get_debug_info(), "No output folder found.", gr.update(visible=False)
314
  return
315
 
316
  scripts_to_run = []
 
318
  elif task_type == "poster": scripts_to_run = ["gen_poster.py"]
319
  elif task_type == "pr": scripts_to_run = ["gen_pr.py"]
320
  elif task_type == "all": scripts_to_run = ["gen_ppt.py", "gen_poster.py", "gen_pr.py"]
321
+ else:
322
+ yield "❌ Unknown task type", get_debug_info(), "Invalid task_type.", gr.update(visible=False)
323
+ return
324
+
325
+ full_log = f"🚀 Preparing to start {len(scripts_to_run)} tasks...\n"
326
 
327
+ # 启动阶段 (10%)
 
 
328
  progress(0.1, desc=f"启动 {task_type.upper()} 生成任务...")
329
+ yield f"⏳ Starting {task_type.upper()} generation...", get_debug_info(), full_log, gr.update(visible=False)
330
 
331
  q = queue.Queue()
332
  processes = []
 
333
 
334
  def enqueue_output(out, script_name):
335
  for line in iter(out.readline, ''):
 
339
  try:
340
  for script in scripts_to_run:
341
  p = subprocess.Popen(
342
+ [sys.executable, script],
343
+ stdout=subprocess.PIPE,
344
+ stderr=subprocess.STDOUT,
345
+ text=True,
346
+ bufsize=1
347
  )
348
  processes.append((script, p))
349
  t = threading.Thread(target=enqueue_output, args=(p.stdout, script))
 
351
  t.start()
352
 
353
  active_processes = len(processes)
 
354
 
355
+ # 生成阶段维持在 (50%)
356
+ progress(0.5, desc=f"正在并行生成 {task_type.upper()} 资产...")
357
  while active_processes > 0 or not q.empty():
358
  try:
359
  script_name, line = q.get(timeout=0.1)
360
  full_log += f"[{script_name}] {line}"
361
+ yield f"⏳ Generating {task_type.upper()}...", get_debug_info(), full_log, gr.update(visible=False)
 
 
 
 
362
  except queue.Empty:
363
  active_processes = sum(1 for _, p in processes if p.poll() is None)
364
 
365
+ success = True
366
+ for script, p in processes:
367
+ if p.returncode != 0:
368
+ success = False
369
+ full_log += f"\n❌ [Error] {script} returned non-zero exit code (Exit Code: {p.returncode})\n"
370
 
371
  if not success:
372
  progress(1.0, desc="生成失败")
373
+ yield f"❌ {task_type.upper()} contains failed tasks, please check logs", get_debug_info(), full_log, gr.update(visible=False)
374
  return
375
 
376
  full_log += "\n📦 Zipping output directory...\n"
377
+
378
+ # 压缩阶段 (90%)
379
  progress(0.9, desc="打包压缩结果...")
380
+ yield f"⏳ Zipping outputs...", get_debug_info(), full_log, gr.update(visible=False)
381
+
382
+ zip_base_name = ZIP_OUTPUT_PATH.replace(".zip", "")
383
+ shutil.make_archive(zip_base_name, 'zip', OUTPUT_DIR)
384
 
385
+ full_log += "✅ All tasks and zipping completed successfully.\n"
 
386
 
387
+ # 完成阶段 (100%)
388
  progress(1.0, desc="全部完成!")
389
+ yield f"✅ {task_type.upper()} generated and zipped successfully", get_debug_info(), full_log, gr.update(value=ZIP_OUTPUT_PATH, visible=True)
390
 
391
  except Exception as e:
392
  progress(1.0, desc="发生全局异常")
393
+ error_log = full_log + f"\n[Global Exception] Exception occurred:\n{str(e)}"
394
+ yield " Global exception during final generation", get_debug_info(), error_log, gr.update(visible=False)
395
+
396
 
397
  # ==========================================
398
  # --- 🚀 UI Configuration & Advanced CSS ---
 
452
  border-color: rgba(168, 85, 247, 0.3) !important;
453
  }
454
 
455
+ .gradio-group > div,
456
+ .gradio-group > .form { border-radius: 12px !important; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
457
 
458
  #pdf-upload-box {
459
  border: 2px dashed rgba(192, 132, 252, 0.6) !important;
 
503
  margin-right: auto !important;
504
  margin-top: 10px !important;
505
  margin-bottom: 10px !important;
506
+ display: block !important;
507
  transition: transform 0.3s cubic-bezier(0.175, 0.885, 0.32, 1.275), box-shadow 0.3s ease !important;
508
  box-shadow: 0 4px 15px rgba(126, 34, 206, 0.3) !important;
509
  cursor: pointer !important;
 
543
  }
544
  .action-btn:active { transform: translateY(2px) scale(0.98) !important; box-shadow: 0 2px 10px rgba(147, 51, 234, 0.2) !important; }
545
 
546
+ /* ======== CSS FOR DISABLED BUTTONS ======== */
547
  .primary-action-btn:disabled, .action-btn:disabled {
548
  background: #e5e7eb !important;
549
  color: #9ca3af !important;
 
565
  ::-webkit-scrollbar-track { background: rgba(168, 85, 247, 0.05); border-radius: 4px; }
566
  ::-webkit-scrollbar-thumb { background: linear-gradient(135deg, #A855F7, #C084FC); border-radius: 4px; }
567
  ::-webkit-scrollbar-thumb:hover { background: linear-gradient(135deg, #9333EA, #A855F7); }
568
+
569
  details > summary { transition: color 0.2s ease; }
570
  details > summary:hover { color: #E9D5FF !important; }
571
  """
572
 
573
  with gr.Blocks(theme=purple_theme, css=custom_css) as demo:
574
+ # 定义全局状态变量,用于实现依赖解锁
 
 
 
575
  api_saved_state = gr.State(False)
576
  pdf_ready_state = gr.State(False)
577
 
 
580
  gr.Markdown("One-click parsing of academic PDFs, DAG structuring, and multi-modal asset generation.", elem_id="subtitle")
581
 
582
  with gr.Row():
583
+ # ================= LEFT COLUMN: SETTINGS & ACTIONS =================
584
  with gr.Column(scale=1):
585
+
586
  # 1. API Configuration
587
  with gr.Group(elem_classes="gradio-group"):
588
  gr.Markdown("### ⚙️ 1. Global API Configuration")
589
  with gr.Row():
590
+ key_input = gr.Textbox(label="API Key", type="password", placeholder="sk-...", scale=1)
591
  api_base_url_input = gr.Textbox(label="Base URL (Optional)", placeholder="https://api.example.com", scale=1)
592
  key_btn = gr.Button("💾 Save API Configuration")
593
+
594
+ api_status = gr.Textbox(
595
+ show_label=False, placeholder="Waiting for API configuration...", lines=1, interactive=False, elem_classes="status-text"
596
+ )
597
+
598
  # 2. Document Parsing
599
  with gr.Group(elem_classes="gradio-group"):
600
  gr.Markdown("### 📄 2. Document Parsing")
601
+
602
+ pdf_input = gr.File(
603
+ label="Upload Document",
604
+ file_types=[".pdf"],
605
+ elem_id="pdf-upload-box"
606
+ )
607
+
608
+ # 默认锁定
609
  parse_btn = gr.Button("🚀 Start Mineru & DAG Extraction", elem_classes="primary-action-btn", interactive=False)
610
 
611
+ parse_status = gr.Textbox(
612
+ show_label=False, placeholder="Waiting for document upload...", lines=1, interactive=False, elem_classes="status-text"
613
+ )
614
 
615
  # 3. Asset Generation
616
  with gr.Group(elem_classes="gradio-group"):
617
  gr.Markdown("### 🎯 3. Asset Generation")
618
+ gr.Markdown("Generate final formats based on DAG structure:")
619
+
620
  with gr.Row(elem_classes="action-row"):
621
+ # 默认锁定
622
  gen_ppt_btn = gr.Button("📊 Gen PPT", elem_classes="action-btn", interactive=False)
623
  gen_poster_btn = gr.Button("🖼️ Gen Poster", elem_classes="action-btn", interactive=False)
624
  gen_pr_btn = gr.Button("📰 Gen PR", elem_classes="action-btn", interactive=False)
 
625
 
626
+ # 默认锁定
627
+ gen_all_btn = gr.Button("✨ Generate All Assets (ALL)", elem_classes="primary-action-btn", interactive=False)
628
 
629
+ # ================= RIGHT COLUMN: OUTPUTS & LOGS =================
630
  with gr.Column(scale=1):
631
+
632
  # 4. Results & Downloads
633
  with gr.Group(elem_classes="gradio-group"):
634
  gr.Markdown("### 📦 Generation Results & Download")
635
+ gen_status = gr.Textbox(
636
+ show_label=False, placeholder="No generation task currently...", lines=2, interactive=False, elem_classes="status-text"
 
 
 
 
 
 
 
637
  )
 
638
  download_file = gr.File(label="📥 Get Final Zip Archive", interactive=False, visible=False)
639
 
640
+ # 5. Debugging & Terminal
641
  with gr.Group(elem_classes="gradio-group"):
642
  gr.Markdown("### 🛠️ Developer Monitoring (Debug Only)")
643
  with gr.Tabs():
644
  with gr.Tab("📜 Terminal Stream"):
645
+ cmd_logs = gr.Textbox(
646
+ label="Stdout / Stderr", placeholder="Waiting for task to start...", lines=14, interactive=False, elem_classes="log-box"
647
+ )
648
+
649
  with gr.Tab("🔍 System Snapshot"):
650
  refresh_btn = gr.Button("🔄 Refresh Directory Tree")
651
+ debug_view = gr.HTML(
652
+ value=get_debug_info()
653
+ )
 
 
 
 
 
 
 
654
 
655
+ # ================= LOGIC BINDINGS =================
656
+
657
+ # [步骤 1] 配置 API
658
+ # 成功后不仅更新 UI 文本,还会将 api_saved_state 设置为 True
659
  key_btn.click(
660
  fn=save_api_settings,
661
+ inputs=[key_input, api_base_url_input],
662
+ outputs=[api_status, debug_view, api_saved_state]
663
  )
664
 
665
+ # [步骤 2.1] 上传 PDF
666
+ # 成功后更新文本,并将 pdf_ready_state 设置为 True
667
+ pdf_input.upload(fn=save_pdf, inputs=pdf_input, outputs=[parse_status, debug_view, pdf_ready_state])
 
 
668
 
669
+ # [步骤 2.2] 清除 PDF
670
+ # 会锁定第四步的所有生成按钮,同时将 pdf_ready_state 重置为 False
671
  pdf_input.clear(
672
  fn=clear_pdf,
673
+ outputs=[parse_status, debug_view, pdf_ready_state, gen_ppt_btn, gen_poster_btn, gen_pr_btn, gen_all_btn]
 
674
  )
675
 
676
+ # [依赖检查] 当 api 状态或 pdf 状态发生变化时,检查是否解锁"开始解析"按钮
677
  def check_parse_btn_ready(api_ready, pdf_ready):
678
  return gr.update(interactive=(api_ready and pdf_ready))
679
 
680
  api_saved_state.change(fn=check_parse_btn_ready, inputs=[api_saved_state, pdf_ready_state], outputs=parse_btn)
681
  pdf_ready_state.change(fn=check_parse_btn_ready, inputs=[api_saved_state, pdf_ready_state], outputs=parse_btn)
682
 
683
+ # [步骤 2.3] 执行解析
684
+ # 如果完全成功,函数最后的一个 yield 会连带输出 interactive=True 给这4个生成按钮
685
  parse_btn.click(
686
  fn=run_mineru_parsing_and_dag_gen,
687
+ outputs=[parse_status, debug_view, cmd_logs, gen_ppt_btn, gen_poster_btn, gen_pr_btn, gen_all_btn]
 
688
  )
689
 
690
+ # [步骤 3] 资产生成
691
+ # progress 对象注入到包装函数中
692
+ def trigger_gen_ppt(progress=gr.Progress()): yield from run_final_generation("ppt", progress)
693
+ def trigger_gen_poster(progress=gr.Progress()): yield from run_final_generation("poster", progress)
694
+ def trigger_gen_pr(progress=gr.Progress()): yield from run_final_generation("pr", progress)
695
+ def trigger_gen_all(progress=gr.Progress()): yield from run_final_generation("all", progress)
696
+
697
+ gen_ppt_btn.click(fn=trigger_gen_ppt, outputs=[gen_status, debug_view, cmd_logs, download_file])
698
+ gen_poster_btn.click(fn=trigger_gen_poster, outputs=[gen_status, debug_view, cmd_logs, download_file])
699
+ gen_pr_btn.click(fn=trigger_gen_pr, outputs=[gen_status, debug_view, cmd_logs, download_file])
700
+ gen_all_btn.click(fn=trigger_gen_all, outputs=[gen_status, debug_view, cmd_logs, download_file])
701
 
702
+ refresh_btn.click(fn=get_debug_info, outputs=debug_view)
 
 
 
 
 
 
 
 
 
703
 
704
  if __name__ == "__main__":
705
+ demo.launch(debug=True)