import gradio as gr
import os
import shutil
import subprocess
import sys
import queue
import threading
import uuid
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Iterable
from gradio.themes import Soft
from gradio.themes.utils import colors, fonts, sizes
import threading
import subprocess
# ==========================================
# --- 🌐 异步安装 Playwright 浏览器 ---
# ==========================================
def setup_playwright():
"""在后台静默安装 Playwright,防止阻塞 Gradio 启动导致 HF 500 超时"""
try:
import playwright
print("⏳ [System] Downloading Playwright Chromium in background...")
# 增加 --with-deps 尝试安装系统级依赖 (虽然在非 root 容器可能失效,但有备无患)
subprocess.run(["playwright", "install", "chromium"], check=True)
print("✅ [System] Playwright browsers ready.")
except Exception as e:
print(f"❌ [System] Playwright setup failed: {e}")
# 这一步非常关键:启动一个后台守护线程去下载,主进程直接往下走!
threading.Thread(target=setup_playwright, daemon=True).start()
# ==========================================
# --- 📁 全局目录配置 (修改为 Session 基础目录) ---
# ==========================================
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
SESSIONS_BASE_DIR = os.path.join(BASE_DIR, "user_sessions")
os.makedirs(SESSIONS_BASE_DIR, exist_ok=True)
def get_user_dirs(session_id):
"""根据 Session ID 生成用户专属的隔离目录"""
user_base = os.path.join(SESSIONS_BASE_DIR, session_id)
papers_dir = os.path.join(user_base, "papers")
output_dir = os.path.join(user_base, "mineru_outputs")
zip_path = os.path.join(user_base, "mineru_results.zip")
os.makedirs(papers_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)
return papers_dir, output_dir, zip_path
import time
# ==========================================
# --- 🧹 垃圾回收 (后台清理过期 Session) ---
# ==========================================
# 设定 Session 过期时间(例如:2 小时 = 7200 秒)
SESSION_MAX_AGE_SECONDS = 2 * 60 * 60
# 设定清理器执行间隔(例如:每 30 分钟扫描一次 = 1800 秒)
CLEANUP_INTERVAL_SECONDS = 30 * 60
def cleanup_expired_sessions():
"""后台运行的垃圾回收任务"""
while True:
try:
if os.path.exists(SESSIONS_BASE_DIR):
current_time = time.time()
for session_folder in os.listdir(SESSIONS_BASE_DIR):
folder_path = os.path.join(SESSIONS_BASE_DIR, session_folder)
# 确保只处理目录
if os.path.isdir(folder_path):
# 获取文件夹的最后修改时间
folder_mtime = os.path.getmtime(folder_path)
# 判断是否超过了最大存活时间
if (current_time - folder_mtime) > SESSION_MAX_AGE_SECONDS:
try:
shutil.rmtree(folder_path)
print(f"🧹 [Garbage Collector] Deleted expired session: {session_folder}")
except Exception as e:
print(f"⚠️ [Garbage Collector] Failed to delete {session_folder}: {e}")
except Exception as e:
print(f"⚠️ [Garbage Collector] Error during cleanup scan: {e}")
# 休眠到下一次扫描时间
time.sleep(CLEANUP_INTERVAL_SECONDS)
def start_garbage_collector():
"""启动后台守护线程"""
gc_thread = threading.Thread(target=cleanup_expired_sessions, daemon=True)
gc_thread.start()
print("🚀 [Garbage Collector] Background cleanup service started.")
# ==========================================
# --- 🎨 Custom Purple Theme Definition ---
# ==========================================
colors.purple = colors.Color(
name="purple", c50="#FAF5FF", c100="#F3E8FF", c200="#E9D5FF",
c300="#DAB2FF", c400="#C084FC", c500="#A855F7", c600="#9333EA",
c700="#7E22CE", c800="#6B21A8", c900="#581C87", c950="#3B0764",
)
class PurpleTheme(Soft):
def __init__(self, **kwargs):
super().__init__(
primary_hue=colors.gray, secondary_hue=colors.purple, neutral_hue=colors.slate,
font=(fonts.GoogleFont("Outfit"), "Arial", "sans-serif"),
font_mono=(fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace"),
)
super().set(
background_fill_primary="*primary_50",
background_fill_primary_dark="*primary_900",
body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
button_primary_text_color="white",
button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
button_secondary_text_color="black",
button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
slider_color="*secondary_500",
block_border_width="3px",
block_shadow="*shadow_drop_lg",
button_primary_shadow="*shadow_drop_lg",
)
purple_theme = PurpleTheme()
# ==========================================
# --- 🚀 HTML Progress Bar Components ---
# ==========================================
def empty_progress_html(text="Waiting for action..."):
return f"""
"""
def create_progress_html(percent, text, status="active"):
"""
status: "active" (紫色滚动条纹), "success" (绿色), "error" (红色)
"""
return f"""
"""
# ==========================================
# --- ⚙️ Backend Logic & Functions ---
# ==========================================
def get_tree_html(dir_path):
if not os.path.exists(dir_path):
return "Directory missing
"
def build_html(current_path):
html = ""
try: items = sorted(os.listdir(current_path))
except Exception: return ""
if not items: return ""
for item in items:
item_path = os.path.join(current_path, item)
if os.path.isdir(item_path):
html += f'📁 {item}
'
inner_html = build_html(item_path)
html += inner_html if inner_html else "Empty
"
html += ' '
else:
html += f'📄 {item}
'
return html
content = build_html(dir_path)
return content if content else "Empty directory
"
def get_debug_info(session_id):
papers_dir, output_dir, _ = get_user_dirs(session_id)
papers_tree = get_tree_html(papers_dir)
output_tree = get_tree_html(output_dir)
html = f"""
Session ID: {session_id[:8]}...
📁 papers/
{papers_tree}
📂 mineru_outputs/
{output_tree}
"""
return html
def save_api_settings(api_key, api_base_url, session_id):
if not api_key:
return "❌ Key cannot be empty", get_debug_info(session_id), False, "", ""
success_msg = "✅ Key saved securely in session memory"
if api_base_url: success_msg += ", Base URL updated"
return success_msg, get_debug_info(session_id), True, api_key, api_base_url
def save_pdf(file, session_id):
if file is None:
return gr.update(visible=False), get_debug_info(session_id), False
try:
papers_dir, _, _ = get_user_dirs(session_id)
for f in os.listdir(papers_dir):
file_to_del = os.path.join(papers_dir, f)
if os.path.isfile(file_to_del): os.remove(file_to_del)
file_path = os.path.join(papers_dir, os.path.basename(file.name))
shutil.copy(file.name, file_path)
return gr.update(value=create_progress_html(100, f"✅ PDF Uploaded: {os.path.basename(file.name)}", "success"), visible=True), get_debug_info(session_id), True
except Exception as e:
return gr.update(value=create_progress_html(0, f"❌ Error: {str(e)}", "error"), visible=True), get_debug_info(session_id), False
def clear_pdf(session_id):
try:
user_base = os.path.join(SESSIONS_BASE_DIR, session_id)
if os.path.exists(user_base):
shutil.rmtree(user_base)
disable_btn = gr.update(interactive=False)
return gr.update(visible=False), gr.update(visible=False), get_debug_info(session_id), False, disable_btn, disable_btn, disable_btn, disable_btn
except Exception as e:
return gr.update(value=create_progress_html(0, f"❌ Clear Error: {str(e)}", "error"), visible=True), gr.update(), get_debug_info(session_id), False, gr.update(), gr.update(), gr.update(), gr.update()
def build_user_env(api_key, api_base_url, papers_dir, output_dir):
env = os.environ.copy()
env["MINERU_FORMULA_ENABLE"] = "false"
env["MINERU_TABLE_ENABLE"] = "false"
env["MINERU_DEVICE_MODE"] = "cpu"
env["MINERU_VIRTUAL_VRAM_SIZE"] = "8"
if api_key: env["GEMINI_API_KEY"] = api_key
if api_base_url: env["GEMINI_API_BASE_URL"] = api_base_url
env["USER_PAPERS_DIR"] = papers_dir
env["USER_OUTPUT_DIR"] = output_dir
return env
def run_mineru_parsing_and_dag_gen(session_id, api_key, api_base_url, progress=gr.Progress()):
no_change = gr.update()
disable_btn = gr.update(interactive=False)
papers_dir, output_dir, _ = get_user_dirs(session_id)
if not os.path.exists(papers_dir) or not any(f.endswith('.pdf') for f in os.listdir(papers_dir)):
yield gr.update(value=create_progress_html(0, "❌ No PDF file found", "error"), visible=True), get_debug_info(session_id), "No execution logs.", no_change, no_change, no_change, no_change
return
full_log = ""
try:
env = build_user_env(api_key, api_base_url, papers_dir, output_dir)
command_mineru = ["mineru", "-p", papers_dir, "-o", output_dir]
full_log += f"--- Mineru Executing (Session: {session_id[:8]}) ---\n"
# 10%
progress(0.1, desc="启动 Mineru 解析...")
yield gr.update(value=create_progress_html(10, "⏳ Starting Mineru parsing...", "active"), visible=True), get_debug_info(session_id), full_log, no_change, no_change, no_change, no_change
process_mineru = subprocess.Popen(
command_mineru, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
)
# 30%
progress(0.3, desc="Mineru 正在解析 PDF...")
for line in iter(process_mineru.stdout.readline, ''):
full_log += line
# ==========================================
# 🔔 双端输出 1:Mineru 阶段
# ==========================================
print(f"[Mineru | {session_id[:6]}] {line}", end="", flush=True)
yield gr.update(value=create_progress_html(30, "⏳ Mineru parsing PDF...", "active"), visible=True), get_debug_info(session_id), full_log, no_change, no_change, no_change, no_change
process_mineru.stdout.close()
returncode_mineru = process_mineru.wait()
if returncode_mineru != 0:
progress(1.0, desc="Mineru 解析失败")
yield gr.update(value=create_progress_html(30, f"❌ Mineru failed (Code {returncode_mineru})", "error"), visible=True), get_debug_info(session_id), full_log, disable_btn, disable_btn, disable_btn, disable_btn
return
command_dag = [sys.executable, "gen_dag.py"]
full_log += "\n--- DAG Gen Executing ---\n"
# 60%
progress(0.6, desc="执行 DAG 生成...")
yield gr.update(value=create_progress_html(60, "⏳ Executing DAG generation...", "active"), visible=True), get_debug_info(session_id), full_log, no_change, no_change, no_change, no_change
process_dag = subprocess.Popen(
command_dag, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
)
# 80%
progress(0.8, desc="构建图结构中...")
for line in iter(process_dag.stdout.readline, ''):
full_log += line
# ==========================================
# 🔔 双端输出 2:DAG 生成阶段
# ==========================================
print(f"[DAG | {session_id[:6]}] {line}", end="", flush=True)
yield gr.update(value=create_progress_html(80, "⏳ Building DAG...", "active"), visible=True), get_debug_info(session_id), full_log, no_change, no_change, no_change, no_change
process_dag.stdout.close()
returncode_dag = process_dag.wait()
if returncode_dag == 0:
progress(1.0, desc="解析与构建完成!")
enable_btn = gr.update(interactive=True)
yield gr.update(value=create_progress_html(100, "✅ Fully completed", "success"), visible=True), get_debug_info(session_id), full_log, enable_btn, enable_btn, enable_btn, enable_btn
else:
progress(1.0, desc="DAG 生成失败")
yield gr.update(value=create_progress_html(80, "❌ DAG generation failed", "error"), visible=True), get_debug_info(session_id), full_log, disable_btn, disable_btn, disable_btn, disable_btn
except Exception as e:
progress(1.0, desc="发生异常")
error_log = full_log + f"\n[Global Exception]:\n{str(e)}"
print(f"[Exception | {session_id[:6]}] {str(e)}", flush=True)
yield gr.update(value=create_progress_html(0, "❌ Execution Exception", "error"), visible=True), get_debug_info(session_id), error_log, disable_btn, disable_btn, disable_btn, disable_btn
def run_final_generation(task_type, session_id, api_key, api_base_url, progress=gr.Progress()):
papers_dir, output_dir, zip_path = get_user_dirs(session_id)
if not os.path.exists(output_dir):
yield gr.update(value=create_progress_html(0, "❌ Please run parsing first", "error"), visible=True), get_debug_info(session_id), "No output folder found.", gr.update(visible=False)
return
scripts_to_run = []
if task_type == "ppt": scripts_to_run = ["gen_ppt.py"]
elif task_type == "poster": scripts_to_run = ["gen_poster.py"]
elif task_type == "pr": scripts_to_run = ["gen_pr.py"]
elif task_type == "all": scripts_to_run = ["gen_ppt.py", "gen_poster.py", "gen_pr.py"]
full_log = f"🚀 Starting {len(scripts_to_run)} tasks for session {session_id[:8]}...\n"
print(f"[GEN Start | {session_id[:6]}] Starting {task_type.upper()}", flush=True)
progress(0.1, desc=f"启动 {task_type.upper()} 生成任务...")
yield gr.update(value=create_progress_html(10, f"⏳ Starting {task_type.upper()}...", "active"), visible=True), get_debug_info(session_id), full_log, gr.update(visible=False)
q = queue.Queue()
processes = []
env = build_user_env(api_key, api_base_url, papers_dir, output_dir)
def enqueue_output(out, script_name):
for line in iter(out.readline, ''):
q.put((script_name, line))
out.close()
try:
for script in scripts_to_run:
p = subprocess.Popen(
[sys.executable, script], env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
)
processes.append((script, p))
t = threading.Thread(target=enqueue_output, args=(p.stdout, script))
t.daemon = True
t.start()
active_processes = len(processes)
progress(0.5, desc=f"正在并行生成 {task_type.upper()}...")
while active_processes > 0 or not q.empty():
try:
script_name, line = q.get(timeout=0.1)
full_log += f"[{script_name}] {line}"
# ==========================================
# 🔔 双端输出 3:生成阶段
# ==========================================
print(f"[{script_name.upper()} | {session_id[:6]}] {line}", end="", flush=True)
yield gr.update(value=create_progress_html(50, f"⏳ Generating {task_type.upper()}...", "active"), visible=True), get_debug_info(session_id), full_log, gr.update(visible=False)
except queue.Empty:
active_processes = sum(1 for _, p in processes if p.poll() is None)
success = all(p.returncode == 0 for _, p in processes)
if not success:
progress(1.0, desc="生成失败")
yield gr.update(value=create_progress_html(50, "❌ Tasks failed", "error"), visible=True), get_debug_info(session_id), full_log, gr.update(visible=False)
return
full_log += "\n📦 Zipping output directory...\n"
progress(0.9, desc="打包压缩结果...")
yield gr.update(value=create_progress_html(90, "⏳ Zipping outputs...", "active"), visible=True), get_debug_info(session_id), full_log, gr.update(visible=False)
zip_base_name = zip_path.replace(".zip", "")
shutil.make_archive(zip_base_name, 'zip', output_dir)
full_log += "✅ All tasks completed successfully.\n"
progress(1.0, desc="全部完成!")
yield gr.update(value=create_progress_html(100, f"✅ {task_type.upper()} Generated", "success"), visible=True), get_debug_info(session_id), full_log, gr.update(value=zip_path, visible=True)
except Exception as e:
progress(1.0, desc="发生全局异常")
error_log = full_log + f"\n[Global Exception]:\n{str(e)}"
print(f"[Exception | {session_id[:6]}] {str(e)}", flush=True)
yield gr.update(value=create_progress_html(0, "❌ Global exception", "error"), visible=True), get_debug_info(session_id), error_log, gr.update(visible=False)
# ==========================================
# --- 🚀 UI Configuration & Advanced CSS ---
# ==========================================
custom_css = """
@import url('https://fonts.googleapis.com/css2?family=Outfit:wght@400;500;600;700&family=IBM+Plex+Mono:wght@400;500;600&display=swap');
body, .gradio-container {
background-color: #FAF5FF !important;
background-image: linear-gradient(#E9D5FF 1px, transparent 1px), linear-gradient(90deg, #E9D5FF 1px, transparent 1px) !important;
background-size: 40px 40px !important;
font-family: 'Outfit', sans-serif !important;
}
.dark body, .dark .gradio-container {
background-color: #1a1a1a !important;
background-image: linear-gradient(rgba(168, 85, 247, .1) 1px, transparent 1px), linear-gradient(90deg, rgba(168, 85, 247, .1) 1px, transparent 1px) !important;
}
#col-container { margin: 0 auto; max-width: 90%; padding: 20px; }
#main-title { text-align: center !important; padding: 1.5rem 0 0.5rem 0; }
#main-title h1 {
font-size: 2.6em !important; font-weight: 800 !important;
background: linear-gradient(135deg, #A855F7 0%, #C084FC 50%, #9333EA 100%);
background-size: 200% 200%;
-webkit-background-clip: text; -webkit-text-fill-color: transparent;
animation: gradient-shift 4s ease infinite; letter-spacing: -0.02em;
}
@keyframes gradient-shift { 0%, 100% { background-position: 0% 50%; } 50% { background-position: 100% 50%; } }
#subtitle { text-align: center !important; margin-bottom: 2rem; }
#subtitle p { margin: 0 auto; color: #666; font-size: 1.1rem; font-weight: 500; }
.dark #subtitle p { color: #DAB2FF; }
.gradio-group {
background: rgba(255, 255, 255, 0.9) !important;
border: 2px solid #E9D5FF !important;
border-radius: 24px !important;
box-shadow: 0 4px 24px rgba(168, 85, 247, 0.08) !important;
backdrop-filter: blur(10px);
transition: all 0.3s ease;
padding: 16px 16px 16px 16px !important;
overflow: visible !important;
margin-bottom: 10px !important;
display: flex !important;
flex-direction: column !important;
gap: 16px !important;
}
.gradio-group:hover {
box-shadow: 0 8px 32px rgba(168, 85, 247, 0.12) !important;
border-color: #C084FC !important;
}
.dark .gradio-group {
background: rgba(30, 30, 30, 0.9) !important;
border-color: rgba(168, 85, 247, 0.3) !important;
}
/* ================= 进度条自定义 CSS ================= */
@keyframes progress-bar-stripes {
from { background-position: 1rem 0; }
to { background-position: 0 0; }
}
.custom-progress-container {
width: 100%;
background-color: rgba(233, 213, 255, 0.3);
border-radius: 12px;
overflow: hidden;
position: relative;
height: 40px;
border: 1px solid rgba(168, 85, 247, 0.3);
box-shadow: inset 0 2px 4px rgba(0,0,0,0.05);
}
.custom-progress-bar {
height: 100%;
border-radius: 12px;
transition: width 0.4s ease;
}
.custom-progress-bar.active {
background-color: #A855F7;
background-image: linear-gradient(45deg, rgba(255, 255, 255, .15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, .15) 50%, rgba(255, 255, 255, .15) 75%, transparent 75%, transparent);
background-size: 1rem 1rem;
animation: progress-bar-stripes 1s linear infinite;
box-shadow: 0 0 10px rgba(168, 85, 247, 0.5);
}
.custom-progress-bar.success { background-image: none; background-color: #10B981; box-shadow: 0 0 10px rgba(16, 185, 129, 0.5);}
.custom-progress-bar.error { background-image: none; background-color: #EF4444; box-shadow: 0 0 10px rgba(239, 68, 68, 0.5);}
.custom-progress-text {
position: absolute; top: 0; left: 0; width: 100%; height: 100%;
display: flex; align-items: center; justify-content: center;
font-weight: 600; color: #581C87; font-size: 14px;
text-shadow: 0 0 4px rgba(255,255,255,0.8);
}
.dark .custom-progress-text { color: #E9D5FF; text-shadow: 0 0 4px rgba(0,0,0,0.8); }
#pdf-upload-box {
border: 2px dashed rgba(192, 132, 252, 0.6) !important;
border-radius: 16px !important;
background-color: rgba(250, 245, 255, 0.5) !important;
transition: all 0.3s ease !important;
min-height: 220px !important;
position: relative !important;
margin-top: 10px !important;
}
#pdf-upload-box:hover {
border-color: #A855F7 !important;
background-color: rgba(243, 232, 255, 0.8) !important;
box-shadow: 0 4px 15px rgba(168, 85, 247, 0.15) !important;
}
#pdf-upload-box .upload-container { background: transparent !important; }
#pdf-upload-box .upload-container > span,
#pdf-upload-box .upload-container > svg { display: none !important; }
#pdf-upload-box .upload-container::before {
content: "📤\\A Click here to select a PDF\\A or Drag & Drop the file here";
white-space: pre-wrap;
font-size: 1.2rem;
line-height: 1.8;
font-weight: 600;
color: #9333EA;
display: flex;
align-items: center;
justify-content: center;
text-align: center;
width: 100%;
height: 100%;
position: absolute;
top: 0; left: 0;
pointer-events: none;
}
.primary-action-btn {
border-radius: 25px !important;
background: linear-gradient(135deg, #9333EA, #7E22CE) !important;
color: white !important; font-weight: 700 !important; border: none !important;
height: 50px !important;
width: 80% !important;
margin-left: auto !important;
margin-right: auto !important;
margin-top: 10px !important;
margin-bottom: 10px !important;
display: block !important;
transition: transform 0.3s cubic-bezier(0.175, 0.885, 0.32, 1.275), box-shadow 0.3s ease !important;
box-shadow: 0 4px 15px rgba(126, 34, 206, 0.3) !important;
cursor: pointer !important;
font-size: 1.15rem !important;
}
.primary-action-btn:hover {
transform: translateY(-5px) scale(1.02) !important;
box-shadow: 0 10px 25px rgba(126, 34, 206, 0.5) !important;
background: linear-gradient(135deg, #A855F7, #9333EA) !important;
}
.primary-action-btn:active { transform: translateY(2px) scale(0.98) !important; box-shadow: 0 2px 10px rgba(126, 34, 206, 0.2) !important; }
.action-row { display: flex !important; justify-content: center !important; gap: 10px !important; margin-bottom: 10px !important; margin-top: 10px !important;}
.action-btn {
border-radius: 24px !important;
background: linear-gradient(135deg, #A855F7, #9333EA) !important;
color: white !important; font-weight: 600 !important; border: none !important;
height: 40px !important;
width: 120px !important;
flex: none !important;
display: flex !important;
align-items: center !important;
justify-content: center !important;
line-height: 1 !important;
padding: 0 !important;
margin-top: 10px !important;
margin-bottom: 10px !important;
transition: transform 0.3s cubic-bezier(0.175, 0.885, 0.32, 1.275), box-shadow 0.3s ease !important;
box-shadow: 0 4px 15px rgba(147, 51, 234, 0.2) !important;
cursor: pointer !important;
font-size: 1.05rem !important;
}
.action-btn:hover {
transform: translateY(-5px) scale(1.03) !important;
box-shadow: 0 10px 25px rgba(147, 51, 234, 0.4) !important;
background: linear-gradient(135deg, #C084FC, #A855F7) !important;
}
.action-btn:active { transform: translateY(2px) scale(0.98) !important; box-shadow: 0 2px 10px rgba(147, 51, 234, 0.2) !important; }
.primary-action-btn:disabled, .action-btn:disabled {
background: #e5e7eb !important;
color: #9ca3af !important;
box-shadow: none !important;
transform: none !important;
cursor: not-allowed !important;
border: 1px solid #d1d5db !important;
}
.dark .primary-action-btn:disabled, .dark .action-btn:disabled {
background: #374151 !important;
color: #6b7280 !important;
border: 1px solid #4b5563 !important;
}
.log-box textarea { font-family: 'IBM Plex Mono', monospace !important; font-size: 13px !important; background-color: #1e1e1e !important; color: #DAB2FF !important; border: 1px solid #C084FC !important; border-radius: 8px !important; }
.status-text textarea { background-color: transparent !important; border: none !important; box-shadow: none !important; font-weight: 600 !important; color: #6B21A8 !important; }
.dark .status-text textarea { color: #C084FC !important; }
::-webkit-scrollbar { width: 8px; height: 8px; }
::-webkit-scrollbar-track { background: rgba(168, 85, 247, 0.05); border-radius: 4px; }
::-webkit-scrollbar-thumb { background: linear-gradient(135deg, #A855F7, #C084FC); border-radius: 4px; }
::-webkit-scrollbar-thumb:hover { background: linear-gradient(135deg, #9333EA, #A855F7); }
details > summary { transition: color 0.2s ease; }
details > summary:hover { color: #E9D5FF !important; }
"""
with gr.Blocks(theme=purple_theme, css=custom_css) as demo:
session_id_state = gr.State("")
user_api_key_state = gr.State("")
user_api_base_state = gr.State("")
api_saved_state = gr.State(False)
pdf_ready_state = gr.State(False)
with gr.Column(elem_id="col-container"):
gr.Markdown("# **PaperX Platform**", elem_id="main-title")
gr.Markdown("One-click parsing of academic PDFs, DAG structuring, and multi-modal asset generation.", elem_id="subtitle")
with gr.Row():
with gr.Column(scale=1):
# 1. API Configuration
with gr.Group(elem_classes="gradio-group"):
gr.Markdown("### ⚙️ 1. Global API Configuration")
with gr.Row():
key_input = gr.Textbox(label="Gemini API Key", type="password", placeholder="sk-...", scale=1)
api_base_url_input = gr.Textbox(label="Base URL (Optional)", placeholder="https://api.example.com", scale=1)
key_btn = gr.Button("💾 Save API Configuration")
api_status = gr.Textbox(show_label=False, interactive=False, elem_classes="status-text")
# 2. Document Parsing
with gr.Group(elem_classes="gradio-group"):
gr.Markdown("### 📄 2. Document Parsing")
pdf_input = gr.File(label="Upload Document", file_types=[".pdf"], elem_id="pdf-upload-box")
parse_btn = gr.Button("🚀 Start Mineru & DAG Extraction", elem_classes="primary-action-btn", interactive=False)
# 默认隐藏进度条
parse_progress = gr.HTML(visible=False)
# 3. Asset Generation
with gr.Group(elem_classes="gradio-group"):
gr.Markdown("### 🎯 3. Asset Generation")
with gr.Row(elem_classes="action-row"):
gen_ppt_btn = gr.Button("📊 Gen PPT", elem_classes="action-btn", interactive=False)
gen_poster_btn = gr.Button("🖼️ Gen Poster", elem_classes="action-btn", interactive=False)
gen_pr_btn = gr.Button("📰 Gen PR", elem_classes="action-btn", interactive=False)
gen_all_btn = gr.Button("✨ Generate All Assets (ALL)", elem_classes="primary-action-btn", interactive=False)
# 默认隐藏进度条
gen_progress = gr.HTML(visible=False)
with gr.Column(scale=1):
# 4. Results & Downloads
with gr.Group(elem_classes="gradio-group"):
gr.Markdown("### 📦 Generation Results & Download")
download_placeholder = gr.HTML(
'''
📦
Awaiting Generation
Generated assets will appear here as a downloadable ZIP archive.
'''
)
download_file = gr.File(label="📥 Get Final Zip Archive", interactive=False, visible=False)
# 5. Debugging
with gr.Group(elem_classes="gradio-group"):
gr.Markdown("### 🛠️ Developer Monitoring (Debug Only)")
with gr.Tabs():
with gr.Tab("📜 Terminal Stream"):
cmd_logs = gr.Textbox(show_label=False, lines=14, interactive=False, elem_classes="log-box")
with gr.Tab("🔍 System Snapshot"):
refresh_btn = gr.Button("🔄 Refresh Directory Tree")
debug_view = gr.HTML()
# ================= LOGIC BINDINGS =================
def init_app_for_user():
new_session_id = str(uuid.uuid4())
debug_html = get_debug_info(new_session_id)
return new_session_id, debug_html
demo.load(fn=init_app_for_user, inputs=None, outputs=[session_id_state, debug_view])
key_btn.click(
fn=save_api_settings,
inputs=[key_input, api_base_url_input, session_id_state],
outputs=[api_status, debug_view, api_saved_state, user_api_key_state, user_api_base_state]
)
pdf_input.upload(
fn=save_pdf,
inputs=[pdf_input, session_id_state],
outputs=[parse_progress, debug_view, pdf_ready_state]
)
pdf_input.clear(
fn=clear_pdf,
inputs=[session_id_state],
outputs=[parse_progress, gen_progress, debug_view, pdf_ready_state, gen_ppt_btn, gen_poster_btn, gen_pr_btn, gen_all_btn]
)
def check_parse_btn_ready(api_ready, pdf_ready):
return gr.update(interactive=(api_ready and pdf_ready))
api_saved_state.change(fn=check_parse_btn_ready, inputs=[api_saved_state, pdf_ready_state], outputs=parse_btn)
pdf_ready_state.change(fn=check_parse_btn_ready, inputs=[api_saved_state, pdf_ready_state], outputs=parse_btn)
parse_btn.click(
fn=run_mineru_parsing_and_dag_gen,
inputs=[session_id_state, user_api_key_state, user_api_base_state],
outputs=[parse_progress, debug_view, cmd_logs, gen_ppt_btn, gen_poster_btn, gen_pr_btn, gen_all_btn]
)
def trigger_gen_ppt(sid, ak, ab, progress=gr.Progress()): yield from run_final_generation("ppt", sid, ak, ab, progress)
def trigger_gen_poster(sid, ak, ab, progress=gr.Progress()): yield from run_final_generation("poster", sid, ak, ab, progress)
def trigger_gen_pr(sid, ak, ab, progress=gr.Progress()): yield from run_final_generation("pr", sid, ak, ab, progress)
def trigger_gen_all(sid, ak, ab, progress=gr.Progress()): yield from run_final_generation("all", sid, ak, ab, progress)
gen_ppt_btn.click(fn=trigger_gen_ppt, inputs=[session_id_state, user_api_key_state, user_api_base_state], outputs=[gen_progress, debug_view, cmd_logs, download_file])
gen_poster_btn.click(fn=trigger_gen_poster, inputs=[session_id_state, user_api_key_state, user_api_base_state], outputs=[gen_progress, debug_view, cmd_logs, download_file])
gen_pr_btn.click(fn=trigger_gen_pr, inputs=[session_id_state, user_api_key_state, user_api_base_state], outputs=[gen_progress, debug_view, cmd_logs, download_file])
gen_all_btn.click(fn=trigger_gen_all, inputs=[session_id_state, user_api_key_state, user_api_base_state], outputs=[gen_progress, debug_view, cmd_logs, download_file])
refresh_btn.click(fn=get_debug_info, inputs=[session_id_state], outputs=debug_view)
def toggle_empty_placeholder(file_val):
return gr.update(visible=(file_val is None))
download_file.change(
fn=toggle_empty_placeholder,
inputs=[download_file],
outputs=[download_placeholder]
)
if __name__ == "__main__":
start_garbage_collector()
# 并发放宽至 5
# demo.queue(default_concurrency_limit=5).launch()
demo.queue(default_concurrency_limit=5).launch(server_name="0.0.0.0", server_port=7860)