Laramie2 commited on
Commit
76ebde2
·
verified ·
1 Parent(s): d9bf1c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +314 -275
app.py CHANGED
@@ -1,75 +1,115 @@
1
  import gradio as gr
2
  import os
3
- import yaml
4
  import shutil
5
  import subprocess
6
  import sys
7
  import queue
8
  import threading
 
9
  from datetime import datetime
10
  from concurrent.futures import ThreadPoolExecutor, as_completed
11
  from typing import Iterable
12
  from gradio.themes import Soft
13
  from gradio.themes.utils import colors, fonts, sizes
14
 
15
- # 自动安装 Playwright 浏览器(如果不存在)
16
- try:
17
- import playwright
18
- # 检查是否已经安装了浏览器,没有则下载
19
- subprocess.run(["playwright", "install", "chromium"], check=True)
20
- except Exception as e:
21
- print(f"Playwright setup failed: {e}")
22
 
23
- # Initialize environment paths
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
25
- PAPERS_DIR = os.path.join(BASE_DIR, "papers")
26
- CONFIG_PATH = os.path.join(BASE_DIR, "config.yaml")
27
- OUTPUT_DIR = os.path.join(BASE_DIR, "mineru_outputs")
28
- ZIP_OUTPUT_PATH = os.path.join(BASE_DIR, "mineru_results.zip")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- os.makedirs(PAPERS_DIR, exist_ok=True)
31
- os.makedirs(OUTPUT_DIR, exist_ok=True)
32
 
33
  # ==========================================
34
  # --- 🎨 Custom Purple Theme Definition ---
35
  # ==========================================
36
  colors.purple = colors.Color(
37
- name="purple",
38
- c50="#FAF5FF",
39
- c100="#F3E8FF",
40
- c200="#E9D5FF",
41
- c300="#DAB2FF",
42
- c400="#C084FC",
43
- c500="#A855F7",
44
- c600="#9333EA",
45
- c700="#7E22CE",
46
- c800="#6B21A8",
47
- c900="#581C87",
48
- c950="#3B0764",
49
  )
50
 
51
  class PurpleTheme(Soft):
52
- def __init__(
53
- self,
54
- *,
55
- primary_hue: colors.Color | str = colors.gray,
56
- secondary_hue: colors.Color | str = colors.purple,
57
- neutral_hue: colors.Color | str = colors.slate,
58
- text_size: sizes.Size | str = sizes.text_lg,
59
- font: fonts.Font | str | Iterable[fonts.Font | str] = (
60
- fonts.GoogleFont("Outfit"), "Arial", "sans-serif",
61
- ),
62
- font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
63
- fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
64
- ),
65
- ):
66
  super().__init__(
67
- primary_hue=primary_hue,
68
- secondary_hue=secondary_hue,
69
- neutral_hue=neutral_hue,
70
- text_size=text_size,
71
- font=font,
72
- font_mono=font_mono,
73
  )
74
  super().set(
75
  background_fill_primary="*primary_50",
@@ -77,48 +117,53 @@ class PurpleTheme(Soft):
77
  body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
78
  body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
79
  button_primary_text_color="white",
80
- button_primary_text_color_hover="white",
81
  button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
82
  button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
83
- button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_700)",
84
- button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_600)",
85
  button_secondary_text_color="black",
86
- button_secondary_text_color_hover="white",
87
  button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
88
- button_secondary_background_fill_hover="linear-gradient(90deg, *primary_400, *primary_400)",
89
- button_secondary_background_fill_dark="linear-gradient(90deg, *primary_500, *primary_600)",
90
- button_secondary_background_fill_hover_dark="linear-gradient(90deg, *primary_500, *primary_500)",
91
  slider_color="*secondary_500",
92
- slider_color_dark="*secondary_600",
93
- block_title_text_weight="600",
94
  block_border_width="3px",
95
  block_shadow="*shadow_drop_lg",
96
  button_primary_shadow="*shadow_drop_lg",
97
- button_large_padding="11px",
98
- color_accent_soft="*primary_100",
99
- block_label_background_fill="*primary_200",
100
  )
101
 
102
  purple_theme = PurpleTheme()
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  # ==========================================
105
  # --- ⚙️ Backend Logic & Functions ---
106
  # ==========================================
107
 
108
  def get_tree_html(dir_path):
109
- """递归生成可折叠的 HTML 目录树"""
110
  if not os.path.exists(dir_path):
111
  return "<div style='margin-left: 15px; color: #888;'><i>Directory missing</i></div>"
112
 
113
  def build_html(current_path):
114
  html = ""
115
- try:
116
- items = sorted(os.listdir(current_path))
117
- except Exception:
118
- return ""
119
 
120
- if not items:
121
- return ""
122
 
123
  for item in items:
124
  item_path = os.path.join(current_path, item)
@@ -134,13 +179,14 @@ def get_tree_html(dir_path):
134
  content = build_html(dir_path)
135
  return content if content else "<div style='margin-left: 15px; color: #888;'><i>Empty directory</i></div>"
136
 
137
- def get_debug_info():
138
- """返回渲染好的目录树 HTML 容器"""
139
- papers_tree = get_tree_html(PAPERS_DIR)
140
- output_tree = get_tree_html(OUTPUT_DIR)
141
 
142
  html = f"""
143
  <div style="font-family: 'IBM Plex Mono', monospace; font-size: 13px; background-color: #1e1e1e; border: 1px solid #C084FC; border-radius: 8px; padding: 16px; max-height: 400px; overflow-y: auto;">
 
144
  <details open style="margin-bottom: 12px; cursor: pointer;">
145
  <summary style="outline: none; font-size: 15px; color: #A855F7;">📁 <b>papers/</b></summary>
146
  {papers_tree}
@@ -153,164 +199,133 @@ def get_debug_info():
153
  """
154
  return html
155
 
156
- def save_api_settings(api_key, api_base_url=None):
157
  if not api_key:
158
- return "❌ Key cannot be empty", get_debug_info(), False
 
 
 
 
 
 
 
159
  try:
160
- config = {}
161
- if os.path.exists(CONFIG_PATH):
162
- with open(CONFIG_PATH, "r", encoding="utf-8") as f:
163
- config = yaml.safe_load(f) or {}
164
- config.setdefault("api_keys", {})["gemini_api_key"] = api_key
165
- if api_base_url:
166
- config["api_base_url"] = api_base_url
167
- with open(CONFIG_PATH, "w", encoding="utf-8") as f:
168
- yaml.dump(config, f, allow_unicode=True)
169
-
170
- success_msg = "✅ Key saved"
171
- if api_base_url:
172
- success_msg += ", Base URL updated"
173
-
174
- # 返回成功消息和 True (表示 API 已就绪)
175
- return success_msg, get_debug_info(), True
176
- except Exception as e:
177
- return f"❌ Error: {str(e)}", get_debug_info(), False
178
-
179
- def save_pdf(file):
180
- if file is None: return "❌ Please select a PDF first", get_debug_info(), False
181
- try:
182
- for f in os.listdir(PAPERS_DIR):
183
- file_to_del = os.path.join(PAPERS_DIR, f)
184
- if os.path.isfile(file_to_del):
185
- os.remove(file_to_del)
186
 
187
- file_path = os.path.join(PAPERS_DIR, os.path.basename(file.name))
188
  shutil.copy(file.name, file_path)
189
- # 返回成功消息和 True (表示 PDF 已就绪)
190
- return f"✅ Saved: {os.path.basename(file.name)}", get_debug_info(), True
191
  except Exception as e:
192
- return f"❌ Error: {str(e)}", get_debug_info(), False
193
 
194
- def clear_pdf():
195
- """清空 PDF 并锁定后续所有步骤"""
196
  try:
197
- deleted_files = []
198
- deleted_dirs = []
199
-
200
- pdf_names = [f for f in os.listdir(PAPERS_DIR) if f.endswith('.pdf')]
201
-
202
- for f in os.listdir(PAPERS_DIR):
203
- file_to_del = os.path.join(PAPERS_DIR, f)
204
- if os.path.isfile(file_to_del):
205
- os.remove(file_to_del)
206
- deleted_files.append(f)
207
-
208
- if os.path.exists(OUTPUT_DIR):
209
- for pdf_name in pdf_names:
210
- base_name = os.path.splitext(pdf_name)[0]
211
- base_name_us = base_name.replace(" ", "_")
212
-
213
- for item in os.listdir(OUTPUT_DIR):
214
- if item in [base_name, base_name_us, pdf_name]:
215
- dir_to_del = os.path.join(OUTPUT_DIR, item)
216
- if os.path.isdir(dir_to_del):
217
- shutil.rmtree(dir_to_del)
218
- deleted_dirs.append(item)
219
-
220
- if os.path.exists(ZIP_OUTPUT_PATH):
221
- os.remove(ZIP_OUTPUT_PATH)
222
 
223
  disable_btn = gr.update(interactive=False)
224
- # 返回:状态文本, debug视窗, pdf_ready=False, 以及锁定4个生成按钮
225
- if deleted_files or deleted_dirs:
226
- return f"����️ Workspace cleared", get_debug_info(), False, disable_btn, disable_btn, disable_btn, disable_btn
227
- return "ℹ️ Workspace is already empty", get_debug_info(), False, disable_btn, disable_btn, disable_btn, disable_btn
228
  except Exception as e:
229
- return f"❌ Error deleting file: {str(e)}", get_debug_info(), False, gr.update(), gr.update(), gr.update(), gr.update()
230
-
231
- def run_mineru_parsing_and_dag_gen(progress=gr.Progress()):
 
 
 
 
 
 
 
 
 
 
 
 
232
  no_change = gr.update()
233
  disable_btn = gr.update(interactive=False)
 
234
 
235
- if not os.path.exists(PAPERS_DIR) or not any(f.endswith('.pdf') for f in os.listdir(PAPERS_DIR)):
236
- yield "❌ No PDF file found", get_debug_info(), "No execution logs.", no_change, no_change, no_change, no_change
237
  return
238
 
239
  full_log = ""
240
  try:
241
- env = os.environ.copy()
242
- env["MINERU_FORMULA_ENABLE"] = "false"
243
- env["MINERU_TABLE_ENABLE"] = "false"
244
- env["MINERU_DEVICE_MODE"] = "cpu"
245
- env["MINERU_VIRTUAL_VRAM_SIZE"] = "8"
246
-
247
- command_mineru = ["mineru", "-p", PAPERS_DIR, "-o", OUTPUT_DIR]
248
- full_log += "--- Mineru Executing ---\n"
249
 
250
- # 初始化进度条 (10%)
251
  progress(0.1, desc="启动 Mineru 解析...")
252
- yield "⏳ Executing Mineru parsing...", get_debug_info(), full_log, no_change, no_change, no_change, no_change
253
 
254
  process_mineru = subprocess.Popen(
255
  command_mineru, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
256
  )
257
 
258
- # 维持在 30%,显示解析中
259
  progress(0.3, desc="Mineru 正在解析 PDF...")
260
  for line in iter(process_mineru.stdout.readline, ''):
261
  full_log += line
262
- yield "⏳ Executing Mineru parsing...", get_debug_info(), full_log, no_change, no_change, no_change, no_change
 
 
 
 
263
  process_mineru.stdout.close()
264
  returncode_mineru = process_mineru.wait()
265
 
266
  if returncode_mineru != 0:
267
  progress(1.0, desc="Mineru 解析失败")
268
- yield f"❌ Mineru parsing failed (Exit Code: {returncode_mineru})", get_debug_info(), full_log, disable_btn, disable_btn, disable_btn, disable_btn
269
  return
270
 
271
  command_dag = [sys.executable, "gen_dag.py"]
272
  full_log += "\n--- DAG Gen Executing ---\n"
273
 
274
- # Mineru 完成,进入 DAG 生成 (60%)
275
  progress(0.6, desc="执行 DAG 生成...")
276
- yield "⏳ Mineru parsing complete, executing DAG generation...", get_debug_info(), full_log, no_change, no_change, no_change, no_change
277
 
278
  process_dag = subprocess.Popen(
279
- command_dag, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
280
  )
281
 
282
- # 维持在 80%
283
  progress(0.8, desc="构建图结构中...")
284
  for line in iter(process_dag.stdout.readline, ''):
285
  full_log += line
286
- yield "⏳ Executing DAG generation...", get_debug_info(), full_log, no_change, no_change, no_change, no_change
 
 
 
 
287
  process_dag.stdout.close()
288
  returncode_dag = process_dag.wait()
289
 
290
- # 解析完全成功 (100%)
291
  if returncode_dag == 0:
292
  progress(1.0, desc="解析与构建完成!")
293
- status = "✅ PDF parsing & DAG generation fully completed"
294
  enable_btn = gr.update(interactive=True)
295
- yield status, get_debug_info(), full_log, enable_btn, enable_btn, enable_btn, enable_btn
296
  else:
297
  progress(1.0, desc="DAG 生成失败")
298
- status = f"❌ DAG generation failed (Exit Code: {returncode_dag})"
299
- yield status, get_debug_info(), full_log, disable_btn, disable_btn, disable_btn, disable_btn
300
 
301
  except Exception as e:
302
  progress(1.0, desc="发生异常")
303
- error_log = full_log + f"\n[Global Exception] Exception occurred:\n{str(e)}"
304
- yield " Execution Exception", get_debug_info(), error_log, disable_btn, disable_btn, disable_btn, disable_btn
 
305
 
306
-
307
- def run_final_generation(task_type="all", progress=gr.Progress()):
308
- """
309
- 使用队列和多线程实时获取子进程的流式输出,并通过 yield 推送到前端界面。
310
- 引入 progress 参数更新进度条。
311
- """
312
- if not os.path.exists(OUTPUT_DIR):
313
- yield "❌ Please run the parsing step first", get_debug_info(), "No output folder found.", gr.update(visible=False)
314
  return
315
 
316
  scripts_to_run = []
@@ -318,18 +333,16 @@ def run_final_generation(task_type="all", progress=gr.Progress()):
318
  elif task_type == "poster": scripts_to_run = ["gen_poster.py"]
319
  elif task_type == "pr": scripts_to_run = ["gen_pr.py"]
320
  elif task_type == "all": scripts_to_run = ["gen_ppt.py", "gen_poster.py", "gen_pr.py"]
321
- else:
322
- yield "❌ Unknown task type", get_debug_info(), "Invalid task_type.", gr.update(visible=False)
323
- return
324
-
325
- full_log = f"🚀 Preparing to start {len(scripts_to_run)} tasks...\n"
326
 
327
- # 启动阶段 (10%)
 
 
328
  progress(0.1, desc=f"启动 {task_type.upper()} 生成任务...")
329
- yield f"⏳ Starting {task_type.upper()} generation...", get_debug_info(), full_log, gr.update(visible=False)
330
 
331
  q = queue.Queue()
332
  processes = []
 
333
 
334
  def enqueue_output(out, script_name):
335
  for line in iter(out.readline, ''):
@@ -339,11 +352,7 @@ def run_final_generation(task_type="all", progress=gr.Progress()):
339
  try:
340
  for script in scripts_to_run:
341
  p = subprocess.Popen(
342
- [sys.executable, script],
343
- stdout=subprocess.PIPE,
344
- stderr=subprocess.STDOUT,
345
- text=True,
346
- bufsize=1
347
  )
348
  processes.append((script, p))
349
  t = threading.Thread(target=enqueue_output, args=(p.stdout, script))
@@ -351,48 +360,43 @@ def run_final_generation(task_type="all", progress=gr.Progress()):
351
  t.start()
352
 
353
  active_processes = len(processes)
 
354
 
355
- # 生成阶段维持在 (50%)
356
- progress(0.5, desc=f"正在并行生成 {task_type.upper()} 资产...")
357
  while active_processes > 0 or not q.empty():
358
  try:
359
  script_name, line = q.get(timeout=0.1)
360
  full_log += f"[{script_name}] {line}"
361
- yield f"⏳ Generating {task_type.upper()}...", get_debug_info(), full_log, gr.update(visible=False)
 
 
 
 
362
  except queue.Empty:
363
  active_processes = sum(1 for _, p in processes if p.poll() is None)
364
 
365
- success = True
366
- for script, p in processes:
367
- if p.returncode != 0:
368
- success = False
369
- full_log += f"\n❌ [Error] {script} returned non-zero exit code (Exit Code: {p.returncode})\n"
370
 
371
  if not success:
372
  progress(1.0, desc="生成失败")
373
- yield f"❌ {task_type.upper()} contains failed tasks, please check logs", get_debug_info(), full_log, gr.update(visible=False)
374
  return
375
 
376
  full_log += "\n📦 Zipping output directory...\n"
377
-
378
- # 压缩阶段 (90%)
379
  progress(0.9, desc="打包压缩结果...")
380
- yield f"⏳ Zipping outputs...", get_debug_info(), full_log, gr.update(visible=False)
381
-
382
- zip_base_name = ZIP_OUTPUT_PATH.replace(".zip", "")
383
- shutil.make_archive(zip_base_name, 'zip', OUTPUT_DIR)
384
 
385
- full_log += "✅ All tasks and zipping completed successfully.\n"
 
386
 
387
- # 完成阶段 (100%)
388
  progress(1.0, desc="全部完成!")
389
- yield f"✅ {task_type.upper()} generated and zipped successfully", get_debug_info(), full_log, gr.update(value=ZIP_OUTPUT_PATH, visible=True)
390
 
391
  except Exception as e:
392
  progress(1.0, desc="发生全局异常")
393
- error_log = full_log + f"\n[Global Exception] Exception occurred:\n{str(e)}"
394
- yield " Global exception during final generation", get_debug_info(), error_log, gr.update(visible=False)
395
-
396
 
397
  # ==========================================
398
  # --- 🚀 UI Configuration & Advanced CSS ---
@@ -452,8 +456,42 @@ body, .gradio-container {
452
  border-color: rgba(168, 85, 247, 0.3) !important;
453
  }
454
 
455
- .gradio-group > div,
456
- .gradio-group > .form { border-radius: 12px !important; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
457
 
458
  #pdf-upload-box {
459
  border: 2px dashed rgba(192, 132, 252, 0.6) !important;
@@ -503,7 +541,7 @@ body, .gradio-container {
503
  margin-right: auto !important;
504
  margin-top: 10px !important;
505
  margin-bottom: 10px !important;
506
- display: block !important;
507
  transition: transform 0.3s cubic-bezier(0.175, 0.885, 0.32, 1.275), box-shadow 0.3s ease !important;
508
  box-shadow: 0 4px 15px rgba(126, 34, 206, 0.3) !important;
509
  cursor: pointer !important;
@@ -543,7 +581,6 @@ body, .gradio-container {
543
  }
544
  .action-btn:active { transform: translateY(2px) scale(0.98) !important; box-shadow: 0 2px 10px rgba(147, 51, 234, 0.2) !important; }
545
 
546
- /* ======== CSS FOR DISABLED BUTTONS ======== */
547
  .primary-action-btn:disabled, .action-btn:disabled {
548
  background: #e5e7eb !important;
549
  color: #9ca3af !important;
@@ -565,13 +602,15 @@ body, .gradio-container {
565
  ::-webkit-scrollbar-track { background: rgba(168, 85, 247, 0.05); border-radius: 4px; }
566
  ::-webkit-scrollbar-thumb { background: linear-gradient(135deg, #A855F7, #C084FC); border-radius: 4px; }
567
  ::-webkit-scrollbar-thumb:hover { background: linear-gradient(135deg, #9333EA, #A855F7); }
568
-
569
  details > summary { transition: color 0.2s ease; }
570
  details > summary:hover { color: #E9D5FF !important; }
571
  """
572
 
573
  with gr.Blocks(theme=purple_theme, css=custom_css) as demo:
574
- # 定义全局状态变量,用于实现依赖解锁
 
 
 
575
  api_saved_state = gr.State(False)
576
  pdf_ready_state = gr.State(False)
577
 
@@ -580,126 +619,126 @@ with gr.Blocks(theme=purple_theme, css=custom_css) as demo:
580
  gr.Markdown("One-click parsing of academic PDFs, DAG structuring, and multi-modal asset generation.", elem_id="subtitle")
581
 
582
  with gr.Row():
583
- # ================= LEFT COLUMN: SETTINGS & ACTIONS =================
584
  with gr.Column(scale=1):
585
-
586
  # 1. API Configuration
587
  with gr.Group(elem_classes="gradio-group"):
588
  gr.Markdown("### ⚙️ 1. Global API Configuration")
589
  with gr.Row():
590
- key_input = gr.Textbox(label="API Key", type="password", placeholder="sk-...", scale=1)
591
  api_base_url_input = gr.Textbox(label="Base URL (Optional)", placeholder="https://api.example.com", scale=1)
592
  key_btn = gr.Button("💾 Save API Configuration")
593
-
594
- api_status = gr.Textbox(
595
- show_label=False, placeholder="Waiting for API configuration...", lines=1, interactive=False, elem_classes="status-text"
596
- )
597
-
598
  # 2. Document Parsing
599
  with gr.Group(elem_classes="gradio-group"):
600
  gr.Markdown("### 📄 2. Document Parsing")
601
-
602
- pdf_input = gr.File(
603
- label="Upload Document",
604
- file_types=[".pdf"],
605
- elem_id="pdf-upload-box"
606
- )
607
-
608
- # 默认锁定
609
  parse_btn = gr.Button("🚀 Start Mineru & DAG Extraction", elem_classes="primary-action-btn", interactive=False)
610
 
611
- parse_status = gr.Textbox(
612
- show_label=False, placeholder="Waiting for document upload...", lines=1, interactive=False, elem_classes="status-text"
613
- )
614
 
615
  # 3. Asset Generation
616
  with gr.Group(elem_classes="gradio-group"):
617
  gr.Markdown("### 🎯 3. Asset Generation")
618
- gr.Markdown("Generate final formats based on DAG structure:")
619
-
620
  with gr.Row(elem_classes="action-row"):
621
- # 默认锁定
622
  gen_ppt_btn = gr.Button("📊 Gen PPT", elem_classes="action-btn", interactive=False)
623
  gen_poster_btn = gr.Button("🖼️ Gen Poster", elem_classes="action-btn", interactive=False)
624
  gen_pr_btn = gr.Button("📰 Gen PR", elem_classes="action-btn", interactive=False)
625
-
626
- # 默认锁定
627
  gen_all_btn = gr.Button("✨ Generate All Assets (ALL)", elem_classes="primary-action-btn", interactive=False)
 
 
 
628
 
629
- # ================= RIGHT COLUMN: OUTPUTS & LOGS =================
630
  with gr.Column(scale=1):
631
-
632
  # 4. Results & Downloads
633
  with gr.Group(elem_classes="gradio-group"):
634
  gr.Markdown("### 📦 Generation Results & Download")
635
- gen_status = gr.Textbox(
636
- show_label=False, placeholder="No generation task currently...", lines=2, interactive=False, elem_classes="status-text"
 
 
 
 
 
 
 
637
  )
 
638
  download_file = gr.File(label="📥 Get Final Zip Archive", interactive=False, visible=False)
639
 
640
- # 5. Debugging & Terminal
641
  with gr.Group(elem_classes="gradio-group"):
642
  gr.Markdown("### 🛠️ Developer Monitoring (Debug Only)")
643
  with gr.Tabs():
644
  with gr.Tab("📜 Terminal Stream"):
645
- cmd_logs = gr.Textbox(
646
- label="Stdout / Stderr", placeholder="Waiting for task to start...", lines=14, interactive=False, elem_classes="log-box"
647
- )
648
-
649
  with gr.Tab("🔍 System Snapshot"):
650
  refresh_btn = gr.Button("🔄 Refresh Directory Tree")
651
- debug_view = gr.HTML(
652
- value=get_debug_info()
653
- )
 
 
 
 
 
 
 
654
 
655
- # ================= LOGIC BINDINGS =================
656
-
657
- # [步骤 1] 配置 API
658
- # 成功后不仅更新 UI 文本,还会将 api_saved_state 设置为 True
659
  key_btn.click(
660
  fn=save_api_settings,
661
- inputs=[key_input, api_base_url_input],
662
- outputs=[api_status, debug_view, api_saved_state]
663
  )
664
 
665
- # [步骤 2.1] 上传 PDF
666
- # 成功后更新文本,并将 pdf_ready_state 设置为 True
667
- pdf_input.upload(fn=save_pdf, inputs=pdf_input, outputs=[parse_status, debug_view, pdf_ready_state])
 
 
668
 
669
- # [步骤 2.2] 清除 PDF
670
- # 会锁定第四步的所有生成按钮,同时将 pdf_ready_state 重置为 False
671
  pdf_input.clear(
672
  fn=clear_pdf,
673
- outputs=[parse_status, debug_view, pdf_ready_state, gen_ppt_btn, gen_poster_btn, gen_pr_btn, gen_all_btn]
 
674
  )
675
 
676
- # [依赖检查] 当 api 状态或 pdf 状态发生变化时,检查是否解锁"开始解析"按钮
677
  def check_parse_btn_ready(api_ready, pdf_ready):
678
  return gr.update(interactive=(api_ready and pdf_ready))
679
 
680
  api_saved_state.change(fn=check_parse_btn_ready, inputs=[api_saved_state, pdf_ready_state], outputs=parse_btn)
681
  pdf_ready_state.change(fn=check_parse_btn_ready, inputs=[api_saved_state, pdf_ready_state], outputs=parse_btn)
682
 
683
- # [步骤 2.3] 执行解析
684
- # 如果完全成功,函数最后的一个 yield 会连带输出 interactive=True 给这4个生成按钮
685
  parse_btn.click(
686
  fn=run_mineru_parsing_and_dag_gen,
687
- outputs=[parse_status, debug_view, cmd_logs, gen_ppt_btn, gen_poster_btn, gen_pr_btn, gen_all_btn]
 
688
  )
689
 
690
- # [步骤 3] 资产生成
691
- # progress 对象注入到包装函数中
692
- def trigger_gen_ppt(progress=gr.Progress()): yield from run_final_generation("ppt", progress)
693
- def trigger_gen_poster(progress=gr.Progress()): yield from run_final_generation("poster", progress)
694
- def trigger_gen_pr(progress=gr.Progress()): yield from run_final_generation("pr", progress)
695
- def trigger_gen_all(progress=gr.Progress()): yield from run_final_generation("all", progress)
696
-
697
- gen_ppt_btn.click(fn=trigger_gen_ppt, outputs=[gen_status, debug_view, cmd_logs, download_file])
698
- gen_poster_btn.click(fn=trigger_gen_poster, outputs=[gen_status, debug_view, cmd_logs, download_file])
699
- gen_pr_btn.click(fn=trigger_gen_pr, outputs=[gen_status, debug_view, cmd_logs, download_file])
700
- gen_all_btn.click(fn=trigger_gen_all, outputs=[gen_status, debug_view, cmd_logs, download_file])
701
 
702
- refresh_btn.click(fn=get_debug_info, outputs=debug_view)
 
 
 
 
 
 
 
 
 
703
 
704
  if __name__ == "__main__":
705
- demo.launch(debug=True)
 
 
 
 
1
  import gradio as gr
2
  import os
 
3
  import shutil
4
  import subprocess
5
  import sys
6
  import queue
7
  import threading
8
+ import uuid
9
  from datetime import datetime
10
  from concurrent.futures import ThreadPoolExecutor, as_completed
11
  from typing import Iterable
12
  from gradio.themes import Soft
13
  from gradio.themes.utils import colors, fonts, sizes
14
 
 
 
 
 
 
 
 
15
 
16
+ import threading
17
+ import subprocess
18
+ # ==========================================
19
+ # --- 🌐 异步安装 Playwright 浏览器 ---
20
+ # ==========================================
21
+ def setup_playwright():
22
+ """在后台静默安装 Playwright,防止阻塞 Gradio 启动导致 HF 500 超时"""
23
+ try:
24
+ import playwright
25
+ print("⏳ [System] Downloading Playwright Chromium in background...")
26
+ # 增加 --with-deps 尝试安装系统级依赖 (虽然在非 root 容器可能失效,但有备无患)
27
+ subprocess.run(["playwright", "install", "chromium"], check=True)
28
+ print("✅ [System] Playwright browsers ready.")
29
+ except Exception as e:
30
+ print(f"❌ [System] Playwright setup failed: {e}")
31
+
32
+ # 这一步非常关键:启动一个后台守护线程去下载,主进程直接往下走!
33
+ threading.Thread(target=setup_playwright, daemon=True).start()
34
+
35
+ # ==========================================
36
+ # --- 📁 全局目录配置 (修改为 Session 基础目录) ---
37
+ # ==========================================
38
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
39
+ SESSIONS_BASE_DIR = os.path.join(BASE_DIR, "user_sessions")
40
+ os.makedirs(SESSIONS_BASE_DIR, exist_ok=True)
41
+
42
+ def get_user_dirs(session_id):
43
+ """根据 Session ID 生成用户专属的隔离目录"""
44
+ user_base = os.path.join(SESSIONS_BASE_DIR, session_id)
45
+ papers_dir = os.path.join(user_base, "papers")
46
+ output_dir = os.path.join(user_base, "mineru_outputs")
47
+ zip_path = os.path.join(user_base, "mineru_results.zip")
48
+
49
+ os.makedirs(papers_dir, exist_ok=True)
50
+ os.makedirs(output_dir, exist_ok=True)
51
+ return papers_dir, output_dir, zip_path
52
+
53
+
54
+ import time
55
+
56
+ # ==========================================
57
+ # --- 🧹 垃圾回收 (后台清理过期 Session) ---
58
+ # ==========================================
59
+ # 设定 Session 过期时间(例如:2 小时 = 7200 秒)
60
+ SESSION_MAX_AGE_SECONDS = 2 * 60 * 60
61
+ # 设定清理器执行间隔(例如:每 30 分钟扫描一次 = 1800 秒)
62
+ CLEANUP_INTERVAL_SECONDS = 30 * 60
63
+
64
+ def cleanup_expired_sessions():
65
+ """后台运行的垃圾回收任务"""
66
+ while True:
67
+ try:
68
+ if os.path.exists(SESSIONS_BASE_DIR):
69
+ current_time = time.time()
70
+ for session_folder in os.listdir(SESSIONS_BASE_DIR):
71
+ folder_path = os.path.join(SESSIONS_BASE_DIR, session_folder)
72
+
73
+ # 确保只处理目录
74
+ if os.path.isdir(folder_path):
75
+ # 获取文件夹的最后修改时间
76
+ folder_mtime = os.path.getmtime(folder_path)
77
+
78
+ # 判断是否超过了最大存活时间
79
+ if (current_time - folder_mtime) > SESSION_MAX_AGE_SECONDS:
80
+ try:
81
+ shutil.rmtree(folder_path)
82
+ print(f"🧹 [Garbage Collector] Deleted expired session: {session_folder}")
83
+ except Exception as e:
84
+ print(f"⚠️ [Garbage Collector] Failed to delete {session_folder}: {e}")
85
+ except Exception as e:
86
+ print(f"⚠️ [Garbage Collector] Error during cleanup scan: {e}")
87
+
88
+ # 休眠到下一次扫描时间
89
+ time.sleep(CLEANUP_INTERVAL_SECONDS)
90
+
91
+ def start_garbage_collector():
92
+ """启动后台守护线程"""
93
+ gc_thread = threading.Thread(target=cleanup_expired_sessions, daemon=True)
94
+ gc_thread.start()
95
+ print("🚀 [Garbage Collector] Background cleanup service started.")
96
 
 
 
97
 
98
  # ==========================================
99
  # --- 🎨 Custom Purple Theme Definition ---
100
  # ==========================================
101
  colors.purple = colors.Color(
102
+ name="purple", c50="#FAF5FF", c100="#F3E8FF", c200="#E9D5FF",
103
+ c300="#DAB2FF", c400="#C084FC", c500="#A855F7", c600="#9333EA",
104
+ c700="#7E22CE", c800="#6B21A8", c900="#581C87", c950="#3B0764",
 
 
 
 
 
 
 
 
 
105
  )
106
 
107
  class PurpleTheme(Soft):
108
+ def __init__(self, **kwargs):
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  super().__init__(
110
+ primary_hue=colors.gray, secondary_hue=colors.purple, neutral_hue=colors.slate,
111
+ font=(fonts.GoogleFont("Outfit"), "Arial", "sans-serif"),
112
+ font_mono=(fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace"),
 
 
 
113
  )
114
  super().set(
115
  background_fill_primary="*primary_50",
 
117
  body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
118
  body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
119
  button_primary_text_color="white",
 
120
  button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
121
  button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
 
 
122
  button_secondary_text_color="black",
 
123
  button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
 
 
 
124
  slider_color="*secondary_500",
 
 
125
  block_border_width="3px",
126
  block_shadow="*shadow_drop_lg",
127
  button_primary_shadow="*shadow_drop_lg",
 
 
 
128
  )
129
 
130
  purple_theme = PurpleTheme()
131
 
132
+ # ==========================================
133
+ # --- 🚀 HTML Progress Bar Components ---
134
+ # ==========================================
135
+ def empty_progress_html(text="Waiting for action..."):
136
+ return f"""
137
+ <div class="custom-progress-container" style="background-color: transparent; border: 2px dashed rgba(168, 85, 247, 0.4);">
138
+ <div class="custom-progress-text" style="color: #A855F7;">{text}</div>
139
+ </div>
140
+ """
141
+
142
+ def create_progress_html(percent, text, status="active"):
143
+ """
144
+ status: "active" (紫色滚动条纹), "success" (绿色), "error" (红色)
145
+ """
146
+ return f"""
147
+ <div class="custom-progress-container">
148
+ <div class="custom-progress-bar {status}" style="width: {percent}%;"></div>
149
+ <div class="custom-progress-text">{text} ({percent}%)</div>
150
+ </div>
151
+ """
152
+
153
  # ==========================================
154
  # --- ⚙️ Backend Logic & Functions ---
155
  # ==========================================
156
 
157
  def get_tree_html(dir_path):
 
158
  if not os.path.exists(dir_path):
159
  return "<div style='margin-left: 15px; color: #888;'><i>Directory missing</i></div>"
160
 
161
  def build_html(current_path):
162
  html = ""
163
+ try: items = sorted(os.listdir(current_path))
164
+ except Exception: return ""
 
 
165
 
166
+ if not items: return ""
 
167
 
168
  for item in items:
169
  item_path = os.path.join(current_path, item)
 
179
  content = build_html(dir_path)
180
  return content if content else "<div style='margin-left: 15px; color: #888;'><i>Empty directory</i></div>"
181
 
182
+ def get_debug_info(session_id):
183
+ papers_dir, output_dir, _ = get_user_dirs(session_id)
184
+ papers_tree = get_tree_html(papers_dir)
185
+ output_tree = get_tree_html(output_dir)
186
 
187
  html = f"""
188
  <div style="font-family: 'IBM Plex Mono', monospace; font-size: 13px; background-color: #1e1e1e; border: 1px solid #C084FC; border-radius: 8px; padding: 16px; max-height: 400px; overflow-y: auto;">
189
+ <div style="color: #888; margin-bottom: 8px;">Session ID: {session_id[:8]}...</div>
190
  <details open style="margin-bottom: 12px; cursor: pointer;">
191
  <summary style="outline: none; font-size: 15px; color: #A855F7;">📁 <b>papers/</b></summary>
192
  {papers_tree}
 
199
  """
200
  return html
201
 
202
+ def save_api_settings(api_key, api_base_url, session_id):
203
  if not api_key:
204
+ return "❌ Key cannot be empty", get_debug_info(session_id), False, "", ""
205
+ success_msg = "✅ Key saved securely in session memory"
206
+ if api_base_url: success_msg += ", Base URL updated"
207
+ return success_msg, get_debug_info(session_id), True, api_key, api_base_url
208
+
209
+ def save_pdf(file, session_id):
210
+ if file is None:
211
+ return gr.update(visible=False), get_debug_info(session_id), False
212
  try:
213
+ papers_dir, _, _ = get_user_dirs(session_id)
214
+ for f in os.listdir(papers_dir):
215
+ file_to_del = os.path.join(papers_dir, f)
216
+ if os.path.isfile(file_to_del): os.remove(file_to_del)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
+ file_path = os.path.join(papers_dir, os.path.basename(file.name))
219
  shutil.copy(file.name, file_path)
220
+ return gr.update(value=create_progress_html(100, f"✅ PDF Uploaded: {os.path.basename(file.name)}", "success"), visible=True), get_debug_info(session_id), True
 
221
  except Exception as e:
222
+ return gr.update(value=create_progress_html(0, f"❌ Error: {str(e)}", "error"), visible=True), get_debug_info(session_id), False
223
 
224
+ def clear_pdf(session_id):
 
225
  try:
226
+ user_base = os.path.join(SESSIONS_BASE_DIR, session_id)
227
+ if os.path.exists(user_base):
228
+ shutil.rmtree(user_base)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
 
230
  disable_btn = gr.update(interactive=False)
231
+ return gr.update(visible=False), gr.update(visible=False), get_debug_info(session_id), False, disable_btn, disable_btn, disable_btn, disable_btn
 
 
 
232
  except Exception as e:
233
+ return gr.update(value=create_progress_html(0, f"❌ Clear Error: {str(e)}", "error"), visible=True), gr.update(), get_debug_info(session_id), False, gr.update(), gr.update(), gr.update(), gr.update()
234
+
235
+ def build_user_env(api_key, api_base_url, papers_dir, output_dir):
236
+ env = os.environ.copy()
237
+ env["MINERU_FORMULA_ENABLE"] = "false"
238
+ env["MINERU_TABLE_ENABLE"] = "false"
239
+ env["MINERU_DEVICE_MODE"] = "cpu"
240
+ env["MINERU_VIRTUAL_VRAM_SIZE"] = "8"
241
+ if api_key: env["GEMINI_API_KEY"] = api_key
242
+ if api_base_url: env["GEMINI_API_BASE_URL"] = api_base_url
243
+ env["USER_PAPERS_DIR"] = papers_dir
244
+ env["USER_OUTPUT_DIR"] = output_dir
245
+ return env
246
+
247
+ def run_mineru_parsing_and_dag_gen(session_id, api_key, api_base_url, progress=gr.Progress()):
248
  no_change = gr.update()
249
  disable_btn = gr.update(interactive=False)
250
+ papers_dir, output_dir, _ = get_user_dirs(session_id)
251
 
252
+ if not os.path.exists(papers_dir) or not any(f.endswith('.pdf') for f in os.listdir(papers_dir)):
253
+ yield gr.update(value=create_progress_html(0, "❌ No PDF file found", "error"), visible=True), get_debug_info(session_id), "No execution logs.", no_change, no_change, no_change, no_change
254
  return
255
 
256
  full_log = ""
257
  try:
258
+ env = build_user_env(api_key, api_base_url, papers_dir, output_dir)
259
+ command_mineru = ["mineru", "-p", papers_dir, "-o", output_dir]
260
+ full_log += f"--- Mineru Executing (Session: {session_id[:8]}) ---\n"
 
 
 
 
 
261
 
262
+ # 10%
263
  progress(0.1, desc="启动 Mineru 解析...")
264
+ yield gr.update(value=create_progress_html(10, "⏳ Starting Mineru parsing...", "active"), visible=True), get_debug_info(session_id), full_log, no_change, no_change, no_change, no_change
265
 
266
  process_mineru = subprocess.Popen(
267
  command_mineru, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
268
  )
269
 
270
+ # 30%
271
  progress(0.3, desc="Mineru 正在解析 PDF...")
272
  for line in iter(process_mineru.stdout.readline, ''):
273
  full_log += line
274
+ # ==========================================
275
+ # 🔔 双端输出 1:Mineru 阶段
276
+ # ==========================================
277
+ print(f"[Mineru | {session_id[:6]}] {line}", end="", flush=True)
278
+ yield gr.update(value=create_progress_html(30, "⏳ Mineru parsing PDF...", "active"), visible=True), get_debug_info(session_id), full_log, no_change, no_change, no_change, no_change
279
  process_mineru.stdout.close()
280
  returncode_mineru = process_mineru.wait()
281
 
282
  if returncode_mineru != 0:
283
  progress(1.0, desc="Mineru 解析失败")
284
+ yield gr.update(value=create_progress_html(30, f"❌ Mineru failed (Code {returncode_mineru})", "error"), visible=True), get_debug_info(session_id), full_log, disable_btn, disable_btn, disable_btn, disable_btn
285
  return
286
 
287
  command_dag = [sys.executable, "gen_dag.py"]
288
  full_log += "\n--- DAG Gen Executing ---\n"
289
 
290
+ # 60%
291
  progress(0.6, desc="执行 DAG 生成...")
292
+ yield gr.update(value=create_progress_html(60, "⏳ Executing DAG generation...", "active"), visible=True), get_debug_info(session_id), full_log, no_change, no_change, no_change, no_change
293
 
294
  process_dag = subprocess.Popen(
295
+ command_dag, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
296
  )
297
 
298
+ # 80%
299
  progress(0.8, desc="构建图结构中...")
300
  for line in iter(process_dag.stdout.readline, ''):
301
  full_log += line
302
+ # ==========================================
303
+ # 🔔 双端输出 2:DAG 生成阶段
304
+ # ==========================================
305
+ print(f"[DAG | {session_id[:6]}] {line}", end="", flush=True)
306
+ yield gr.update(value=create_progress_html(80, "⏳ Building DAG...", "active"), visible=True), get_debug_info(session_id), full_log, no_change, no_change, no_change, no_change
307
  process_dag.stdout.close()
308
  returncode_dag = process_dag.wait()
309
 
 
310
  if returncode_dag == 0:
311
  progress(1.0, desc="解析与构建完成!")
 
312
  enable_btn = gr.update(interactive=True)
313
+ yield gr.update(value=create_progress_html(100, "✅ Fully completed", "success"), visible=True), get_debug_info(session_id), full_log, enable_btn, enable_btn, enable_btn, enable_btn
314
  else:
315
  progress(1.0, desc="DAG 生成失败")
316
+ yield gr.update(value=create_progress_html(80, "❌ DAG generation failed", "error"), visible=True), get_debug_info(session_id), full_log, disable_btn, disable_btn, disable_btn, disable_btn
 
317
 
318
  except Exception as e:
319
  progress(1.0, desc="发生异常")
320
+ error_log = full_log + f"\n[Global Exception]:\n{str(e)}"
321
+ print(f"[Exception | {session_id[:6]}] {str(e)}", flush=True)
322
+ yield gr.update(value=create_progress_html(0, "❌ Execution Exception", "error"), visible=True), get_debug_info(session_id), error_log, disable_btn, disable_btn, disable_btn, disable_btn
323
 
324
+ def run_final_generation(task_type, session_id, api_key, api_base_url, progress=gr.Progress()):
325
+ papers_dir, output_dir, zip_path = get_user_dirs(session_id)
326
+
327
+ if not os.path.exists(output_dir):
328
+ yield gr.update(value=create_progress_html(0, "❌ Please run parsing first", "error"), visible=True), get_debug_info(session_id), "No output folder found.", gr.update(visible=False)
 
 
 
329
  return
330
 
331
  scripts_to_run = []
 
333
  elif task_type == "poster": scripts_to_run = ["gen_poster.py"]
334
  elif task_type == "pr": scripts_to_run = ["gen_pr.py"]
335
  elif task_type == "all": scripts_to_run = ["gen_ppt.py", "gen_poster.py", "gen_pr.py"]
 
 
 
 
 
336
 
337
+ full_log = f"🚀 Starting {len(scripts_to_run)} tasks for session {session_id[:8]}...\n"
338
+ print(f"[GEN Start | {session_id[:6]}] Starting {task_type.upper()}", flush=True)
339
+
340
  progress(0.1, desc=f"启动 {task_type.upper()} 生成任务...")
341
+ yield gr.update(value=create_progress_html(10, f"⏳ Starting {task_type.upper()}...", "active"), visible=True), get_debug_info(session_id), full_log, gr.update(visible=False)
342
 
343
  q = queue.Queue()
344
  processes = []
345
+ env = build_user_env(api_key, api_base_url, papers_dir, output_dir)
346
 
347
  def enqueue_output(out, script_name):
348
  for line in iter(out.readline, ''):
 
352
  try:
353
  for script in scripts_to_run:
354
  p = subprocess.Popen(
355
+ [sys.executable, script], env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
 
 
 
 
356
  )
357
  processes.append((script, p))
358
  t = threading.Thread(target=enqueue_output, args=(p.stdout, script))
 
360
  t.start()
361
 
362
  active_processes = len(processes)
363
+ progress(0.5, desc=f"正在并行生成 {task_type.upper()}...")
364
 
 
 
365
  while active_processes > 0 or not q.empty():
366
  try:
367
  script_name, line = q.get(timeout=0.1)
368
  full_log += f"[{script_name}] {line}"
369
+ # ==========================================
370
+ # 🔔 双端输出 3:生成阶段
371
+ # ==========================================
372
+ print(f"[{script_name.upper()} | {session_id[:6]}] {line}", end="", flush=True)
373
+ yield gr.update(value=create_progress_html(50, f"⏳ Generating {task_type.upper()}...", "active"), visible=True), get_debug_info(session_id), full_log, gr.update(visible=False)
374
  except queue.Empty:
375
  active_processes = sum(1 for _, p in processes if p.poll() is None)
376
 
377
+ success = all(p.returncode == 0 for _, p in processes)
 
 
 
 
378
 
379
  if not success:
380
  progress(1.0, desc="生成失败")
381
+ yield gr.update(value=create_progress_html(50, "❌ Tasks failed", "error"), visible=True), get_debug_info(session_id), full_log, gr.update(visible=False)
382
  return
383
 
384
  full_log += "\n📦 Zipping output directory...\n"
 
 
385
  progress(0.9, desc="打包压缩结果...")
386
+ yield gr.update(value=create_progress_html(90, "⏳ Zipping outputs...", "active"), visible=True), get_debug_info(session_id), full_log, gr.update(visible=False)
 
 
 
387
 
388
+ zip_base_name = zip_path.replace(".zip", "")
389
+ shutil.make_archive(zip_base_name, 'zip', output_dir)
390
 
391
+ full_log += "✅ All tasks completed successfully.\n"
392
  progress(1.0, desc="全部完成!")
393
+ yield gr.update(value=create_progress_html(100, f"✅ {task_type.upper()} Generated", "success"), visible=True), get_debug_info(session_id), full_log, gr.update(value=zip_path, visible=True)
394
 
395
  except Exception as e:
396
  progress(1.0, desc="发生全局异常")
397
+ error_log = full_log + f"\n[Global Exception]:\n{str(e)}"
398
+ print(f"[Exception | {session_id[:6]}] {str(e)}", flush=True)
399
+ yield gr.update(value=create_progress_html(0, "❌ Global exception", "error"), visible=True), get_debug_info(session_id), error_log, gr.update(visible=False)
400
 
401
  # ==========================================
402
  # --- 🚀 UI Configuration & Advanced CSS ---
 
456
  border-color: rgba(168, 85, 247, 0.3) !important;
457
  }
458
 
459
+ /* ================= 进度条自定义 CSS ================= */
460
+ @keyframes progress-bar-stripes {
461
+ from { background-position: 1rem 0; }
462
+ to { background-position: 0 0; }
463
+ }
464
+ .custom-progress-container {
465
+ width: 100%;
466
+ background-color: rgba(233, 213, 255, 0.3);
467
+ border-radius: 12px;
468
+ overflow: hidden;
469
+ position: relative;
470
+ height: 40px;
471
+ border: 1px solid rgba(168, 85, 247, 0.3);
472
+ box-shadow: inset 0 2px 4px rgba(0,0,0,0.05);
473
+ }
474
+ .custom-progress-bar {
475
+ height: 100%;
476
+ border-radius: 12px;
477
+ transition: width 0.4s ease;
478
+ }
479
+ .custom-progress-bar.active {
480
+ background-color: #A855F7;
481
+ background-image: linear-gradient(45deg, rgba(255, 255, 255, .15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, .15) 50%, rgba(255, 255, 255, .15) 75%, transparent 75%, transparent);
482
+ background-size: 1rem 1rem;
483
+ animation: progress-bar-stripes 1s linear infinite;
484
+ box-shadow: 0 0 10px rgba(168, 85, 247, 0.5);
485
+ }
486
+ .custom-progress-bar.success { background-image: none; background-color: #10B981; box-shadow: 0 0 10px rgba(16, 185, 129, 0.5);}
487
+ .custom-progress-bar.error { background-image: none; background-color: #EF4444; box-shadow: 0 0 10px rgba(239, 68, 68, 0.5);}
488
+ .custom-progress-text {
489
+ position: absolute; top: 0; left: 0; width: 100%; height: 100%;
490
+ display: flex; align-items: center; justify-content: center;
491
+ font-weight: 600; color: #581C87; font-size: 14px;
492
+ text-shadow: 0 0 4px rgba(255,255,255,0.8);
493
+ }
494
+ .dark .custom-progress-text { color: #E9D5FF; text-shadow: 0 0 4px rgba(0,0,0,0.8); }
495
 
496
  #pdf-upload-box {
497
  border: 2px dashed rgba(192, 132, 252, 0.6) !important;
 
541
  margin-right: auto !important;
542
  margin-top: 10px !important;
543
  margin-bottom: 10px !important;
544
+ display: block !important;
545
  transition: transform 0.3s cubic-bezier(0.175, 0.885, 0.32, 1.275), box-shadow 0.3s ease !important;
546
  box-shadow: 0 4px 15px rgba(126, 34, 206, 0.3) !important;
547
  cursor: pointer !important;
 
581
  }
582
  .action-btn:active { transform: translateY(2px) scale(0.98) !important; box-shadow: 0 2px 10px rgba(147, 51, 234, 0.2) !important; }
583
 
 
584
  .primary-action-btn:disabled, .action-btn:disabled {
585
  background: #e5e7eb !important;
586
  color: #9ca3af !important;
 
602
  ::-webkit-scrollbar-track { background: rgba(168, 85, 247, 0.05); border-radius: 4px; }
603
  ::-webkit-scrollbar-thumb { background: linear-gradient(135deg, #A855F7, #C084FC); border-radius: 4px; }
604
  ::-webkit-scrollbar-thumb:hover { background: linear-gradient(135deg, #9333EA, #A855F7); }
 
605
  details > summary { transition: color 0.2s ease; }
606
  details > summary:hover { color: #E9D5FF !important; }
607
  """
608
 
609
  with gr.Blocks(theme=purple_theme, css=custom_css) as demo:
610
+ session_id_state = gr.State("")
611
+ user_api_key_state = gr.State("")
612
+ user_api_base_state = gr.State("")
613
+
614
  api_saved_state = gr.State(False)
615
  pdf_ready_state = gr.State(False)
616
 
 
619
  gr.Markdown("One-click parsing of academic PDFs, DAG structuring, and multi-modal asset generation.", elem_id="subtitle")
620
 
621
  with gr.Row():
 
622
  with gr.Column(scale=1):
 
623
  # 1. API Configuration
624
  with gr.Group(elem_classes="gradio-group"):
625
  gr.Markdown("### ⚙️ 1. Global API Configuration")
626
  with gr.Row():
627
+ key_input = gr.Textbox(label="Gemini API Key", type="password", placeholder="sk-...", scale=1)
628
  api_base_url_input = gr.Textbox(label="Base URL (Optional)", placeholder="https://api.example.com", scale=1)
629
  key_btn = gr.Button("💾 Save API Configuration")
630
+ api_status = gr.Textbox(show_label=False, interactive=False, elem_classes="status-text")
631
+
 
 
 
632
  # 2. Document Parsing
633
  with gr.Group(elem_classes="gradio-group"):
634
  gr.Markdown("### 📄 2. Document Parsing")
635
+ pdf_input = gr.File(label="Upload Document", file_types=[".pdf"], elem_id="pdf-upload-box")
 
 
 
 
 
 
 
636
  parse_btn = gr.Button("🚀 Start Mineru & DAG Extraction", elem_classes="primary-action-btn", interactive=False)
637
 
638
+ # 默认隐藏进度条
639
+ parse_progress = gr.HTML(visible=False)
 
640
 
641
  # 3. Asset Generation
642
  with gr.Group(elem_classes="gradio-group"):
643
  gr.Markdown("### 🎯 3. Asset Generation")
 
 
644
  with gr.Row(elem_classes="action-row"):
 
645
  gen_ppt_btn = gr.Button("📊 Gen PPT", elem_classes="action-btn", interactive=False)
646
  gen_poster_btn = gr.Button("🖼️ Gen Poster", elem_classes="action-btn", interactive=False)
647
  gen_pr_btn = gr.Button("📰 Gen PR", elem_classes="action-btn", interactive=False)
 
 
648
  gen_all_btn = gr.Button("✨ Generate All Assets (ALL)", elem_classes="primary-action-btn", interactive=False)
649
+
650
+ # 默认隐藏进度条
651
+ gen_progress = gr.HTML(visible=False)
652
 
 
653
  with gr.Column(scale=1):
 
654
  # 4. Results & Downloads
655
  with gr.Group(elem_classes="gradio-group"):
656
  gr.Markdown("### 📦 Generation Results & Download")
657
+
658
+ download_placeholder = gr.HTML(
659
+ '''
660
+ <div style="display: flex; flex-direction: column; align-items: center; justify-content: center; height: 160px; border: 2px dashed rgba(192, 132, 252, 0.6); border-radius: 16px; background-color: rgba(250, 245, 255, 0.5); color: #9333EA; text-align: center; transition: all 0.3s ease;">
661
+ <span style="font-size: 32px; margin-bottom: 8px;">📦</span>
662
+ <span style="font-weight: 600; font-size: 16px; margin-bottom: 4px;">Awaiting Generation</span>
663
+ <span style="font-size: 13px; color: #A855F7; opacity: 0.8;">Generated assets will appear here as a downloadable ZIP archive.</span>
664
+ </div>
665
+ '''
666
  )
667
+
668
  download_file = gr.File(label="📥 Get Final Zip Archive", interactive=False, visible=False)
669
 
670
+ # 5. Debugging
671
  with gr.Group(elem_classes="gradio-group"):
672
  gr.Markdown("### 🛠️ Developer Monitoring (Debug Only)")
673
  with gr.Tabs():
674
  with gr.Tab("📜 Terminal Stream"):
675
+ cmd_logs = gr.Textbox(show_label=False, lines=14, interactive=False, elem_classes="log-box")
 
 
 
676
  with gr.Tab("🔍 System Snapshot"):
677
  refresh_btn = gr.Button("🔄 Refresh Directory Tree")
678
+ debug_view = gr.HTML()
679
+
680
+ # ================= LOGIC BINDINGS =================
681
+
682
+ def init_app_for_user():
683
+ new_session_id = str(uuid.uuid4())
684
+ debug_html = get_debug_info(new_session_id)
685
+ return new_session_id, debug_html
686
+
687
+ demo.load(fn=init_app_for_user, inputs=None, outputs=[session_id_state, debug_view])
688
 
 
 
 
 
689
  key_btn.click(
690
  fn=save_api_settings,
691
+ inputs=[key_input, api_base_url_input, session_id_state],
692
+ outputs=[api_status, debug_view, api_saved_state, user_api_key_state, user_api_base_state]
693
  )
694
 
695
+ pdf_input.upload(
696
+ fn=save_pdf,
697
+ inputs=[pdf_input, session_id_state],
698
+ outputs=[parse_progress, debug_view, pdf_ready_state]
699
+ )
700
 
 
 
701
  pdf_input.clear(
702
  fn=clear_pdf,
703
+ inputs=[session_id_state],
704
+ outputs=[parse_progress, gen_progress, debug_view, pdf_ready_state, gen_ppt_btn, gen_poster_btn, gen_pr_btn, gen_all_btn]
705
  )
706
 
 
707
  def check_parse_btn_ready(api_ready, pdf_ready):
708
  return gr.update(interactive=(api_ready and pdf_ready))
709
 
710
  api_saved_state.change(fn=check_parse_btn_ready, inputs=[api_saved_state, pdf_ready_state], outputs=parse_btn)
711
  pdf_ready_state.change(fn=check_parse_btn_ready, inputs=[api_saved_state, pdf_ready_state], outputs=parse_btn)
712
 
 
 
713
  parse_btn.click(
714
  fn=run_mineru_parsing_and_dag_gen,
715
+ inputs=[session_id_state, user_api_key_state, user_api_base_state],
716
+ outputs=[parse_progress, debug_view, cmd_logs, gen_ppt_btn, gen_poster_btn, gen_pr_btn, gen_all_btn]
717
  )
718
 
719
+ def trigger_gen_ppt(sid, ak, ab, progress=gr.Progress()): yield from run_final_generation("ppt", sid, ak, ab, progress)
720
+ def trigger_gen_poster(sid, ak, ab, progress=gr.Progress()): yield from run_final_generation("poster", sid, ak, ab, progress)
721
+ def trigger_gen_pr(sid, ak, ab, progress=gr.Progress()): yield from run_final_generation("pr", sid, ak, ab, progress)
722
+ def trigger_gen_all(sid, ak, ab, progress=gr.Progress()): yield from run_final_generation("all", sid, ak, ab, progress)
723
+
724
+ gen_ppt_btn.click(fn=trigger_gen_ppt, inputs=[session_id_state, user_api_key_state, user_api_base_state], outputs=[gen_progress, debug_view, cmd_logs, download_file])
725
+ gen_poster_btn.click(fn=trigger_gen_poster, inputs=[session_id_state, user_api_key_state, user_api_base_state], outputs=[gen_progress, debug_view, cmd_logs, download_file])
726
+ gen_pr_btn.click(fn=trigger_gen_pr, inputs=[session_id_state, user_api_key_state, user_api_base_state], outputs=[gen_progress, debug_view, cmd_logs, download_file])
727
+ gen_all_btn.click(fn=trigger_gen_all, inputs=[session_id_state, user_api_key_state, user_api_base_state], outputs=[gen_progress, debug_view, cmd_logs, download_file])
 
 
728
 
729
+ refresh_btn.click(fn=get_debug_info, inputs=[session_id_state], outputs=debug_view)
730
+
731
+ def toggle_empty_placeholder(file_val):
732
+ return gr.update(visible=(file_val is None))
733
+
734
+ download_file.change(
735
+ fn=toggle_empty_placeholder,
736
+ inputs=[download_file],
737
+ outputs=[download_placeholder]
738
+ )
739
 
740
  if __name__ == "__main__":
741
+ start_garbage_collector()
742
+ # 并发放宽至 5
743
+ # demo.queue(default_concurrency_limit=5).launch()
744
+ demo.launch(server_name="0.0.0.0", server_port=7860)