Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -94,11 +94,8 @@ purple_theme = PurpleTheme()
|
|
| 94 |
# --- ⚙️ Backend Logic & Functions ---
|
| 95 |
# ==========================================
|
| 96 |
def get_debug_info():
|
| 97 |
-
"""Read server file system status"""
|
| 98 |
now = datetime.now().strftime("%H:%M:%S")
|
| 99 |
files = os.listdir(PAPERS_DIR) if os.path.exists(PAPERS_DIR) else "Directory missing"
|
| 100 |
-
|
| 101 |
-
# Recursively check output directory contents
|
| 102 |
output_detail = "Not generated"
|
| 103 |
if os.path.exists(OUTPUT_DIR):
|
| 104 |
all_output_items = []
|
|
@@ -106,7 +103,6 @@ def get_debug_info():
|
|
| 106 |
for name in files_in_out:
|
| 107 |
all_output_items.append(os.path.join(os.path.relpath(root, OUTPUT_DIR), name))
|
| 108 |
output_detail = f"Found {len(all_output_items)} files: {all_output_items[:5]}..." if all_output_items else "Directory exists but is EMPTY"
|
| 109 |
-
|
| 110 |
return f"[{now}] 📁 papers/ Content: {files}\n\n[{now}] 📂 mineru_outputs Status: {output_detail}"
|
| 111 |
|
| 112 |
def save_pdf(file):
|
|
@@ -125,35 +121,25 @@ def save_api_settings(api_key, api_base_url=None):
|
|
| 125 |
if os.path.exists(CONFIG_PATH):
|
| 126 |
with open(CONFIG_PATH, "r", encoding="utf-8") as f:
|
| 127 |
config = yaml.safe_load(f) or {}
|
| 128 |
-
|
| 129 |
-
# Save API Key
|
| 130 |
config.setdefault("api_keys", {})["gemini_api_key"] = api_key
|
| 131 |
-
|
| 132 |
-
# Save base URL if not empty
|
| 133 |
if api_base_url:
|
| 134 |
config["api_base_url"] = api_base_url
|
| 135 |
-
|
| 136 |
-
# Write to YAML
|
| 137 |
with open(CONFIG_PATH, "w", encoding="utf-8") as f:
|
| 138 |
yaml.dump(config, f, allow_unicode=True)
|
| 139 |
-
|
| 140 |
success_msg = "✅ Key saved"
|
| 141 |
if api_base_url:
|
| 142 |
success_msg += ", Base URL updated"
|
| 143 |
-
|
| 144 |
return success_msg, get_debug_info()
|
| 145 |
except Exception as e:
|
| 146 |
return f"❌ Error: {str(e)}", get_debug_info()
|
| 147 |
|
| 148 |
def run_mineru_parsing_and_dag_gen():
|
| 149 |
-
"""Execute PDF parsing and DAG generation (supports real-time streaming)"""
|
| 150 |
if not os.path.exists(PAPERS_DIR) or not any(f.endswith('.pdf') for f in os.listdir(PAPERS_DIR)):
|
| 151 |
yield "❌ No PDF file found", get_debug_info(), "No execution logs."
|
| 152 |
return
|
| 153 |
|
| 154 |
full_log = ""
|
| 155 |
try:
|
| 156 |
-
# ================= Step 1: Mineru Parsing =================
|
| 157 |
env = os.environ.copy()
|
| 158 |
env["MINERU_FORMULA_ENABLE"] = "false"
|
| 159 |
env["MINERU_TABLE_ENABLE"] = "false"
|
|
@@ -161,19 +147,12 @@ def run_mineru_parsing_and_dag_gen():
|
|
| 161 |
env["MINERU_VIRTUAL_VRAM_SIZE"] = "8"
|
| 162 |
|
| 163 |
command_mineru = ["mineru", "-p", PAPERS_DIR, "-o", OUTPUT_DIR]
|
| 164 |
-
|
| 165 |
full_log += "--- Mineru Executing ---\n"
|
| 166 |
yield "⏳ Executing Mineru parsing...", get_debug_info(), full_log
|
| 167 |
|
| 168 |
process_mineru = subprocess.Popen(
|
| 169 |
-
command_mineru,
|
| 170 |
-
env=env,
|
| 171 |
-
stdout=subprocess.PIPE,
|
| 172 |
-
stderr=subprocess.STDOUT,
|
| 173 |
-
text=True,
|
| 174 |
-
bufsize=1
|
| 175 |
)
|
| 176 |
-
|
| 177 |
for line in iter(process_mineru.stdout.readline, ''):
|
| 178 |
full_log += line
|
| 179 |
yield "⏳ Executing Mineru parsing...", get_debug_info(), full_log
|
|
@@ -181,35 +160,23 @@ def run_mineru_parsing_and_dag_gen():
|
|
| 181 |
returncode_mineru = process_mineru.wait()
|
| 182 |
|
| 183 |
if returncode_mineru != 0:
|
| 184 |
-
|
| 185 |
-
yield status, get_debug_info(), full_log
|
| 186 |
return
|
| 187 |
|
| 188 |
-
# ================= Step 2: DAG Generation =================
|
| 189 |
command_dag = [sys.executable, "gen_dag.py"]
|
| 190 |
-
|
| 191 |
full_log += "\n--- DAG Gen Executing ---\n"
|
| 192 |
yield "⏳ Mineru parsing complete, executing DAG generation...", get_debug_info(), full_log
|
| 193 |
|
| 194 |
process_dag = subprocess.Popen(
|
| 195 |
-
command_dag,
|
| 196 |
-
stdout=subprocess.PIPE,
|
| 197 |
-
stderr=subprocess.STDOUT,
|
| 198 |
-
text=True,
|
| 199 |
-
bufsize=1
|
| 200 |
)
|
| 201 |
-
|
| 202 |
for line in iter(process_dag.stdout.readline, ''):
|
| 203 |
full_log += line
|
| 204 |
yield "⏳ Executing DAG generation...", get_debug_info(), full_log
|
| 205 |
process_dag.stdout.close()
|
| 206 |
returncode_dag = process_dag.wait()
|
| 207 |
|
| 208 |
-
if returncode_dag == 0:
|
| 209 |
-
status = "✅ PDF parsing & DAG generation fully completed"
|
| 210 |
-
else:
|
| 211 |
-
status = f"❌ DAG generation failed (Exit Code: {returncode_dag})"
|
| 212 |
-
|
| 213 |
yield status, get_debug_info(), full_log
|
| 214 |
|
| 215 |
except Exception as e:
|
|
@@ -217,46 +184,35 @@ def run_mineru_parsing_and_dag_gen():
|
|
| 217 |
yield "❌ Execution Exception", get_debug_info(), error_log
|
| 218 |
|
| 219 |
def run_final_generation(task_type="all"):
|
| 220 |
-
"""Execute generation scripts and zip results"""
|
| 221 |
if not os.path.exists(OUTPUT_DIR):
|
| 222 |
return "❌ Please run the parsing step first", get_debug_info(), "No output folder found.", None
|
| 223 |
|
| 224 |
scripts_to_run = []
|
| 225 |
-
if task_type == "ppt":
|
| 226 |
-
|
| 227 |
-
elif task_type == "
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
scripts_to_run = ["gen_pr.py"]
|
| 231 |
-
elif task_type == "all":
|
| 232 |
-
scripts_to_run = ["gen_ppt.py", "gen_poster.py", "gen_pr.py"]
|
| 233 |
-
else:
|
| 234 |
-
return "❌ Unknown task type", get_debug_info(), "Invalid task_type.", None
|
| 235 |
|
| 236 |
full_log = f"🚀 Preparing to start {len(scripts_to_run)} tasks...\n"
|
| 237 |
success = True
|
| 238 |
|
| 239 |
def execute_script(script):
|
| 240 |
command = [sys.executable, script]
|
| 241 |
-
|
| 242 |
-
return script, result
|
| 243 |
|
| 244 |
try:
|
| 245 |
with ThreadPoolExecutor(max_workers=len(scripts_to_run)) as executor:
|
| 246 |
future_to_script = {executor.submit(execute_script, s): s for s in scripts_to_run}
|
| 247 |
-
|
| 248 |
for future in as_completed(future_to_script):
|
| 249 |
script_name = future_to_script[future]
|
| 250 |
try:
|
| 251 |
_, result = future.result()
|
| 252 |
-
|
| 253 |
full_log += f"\n================ ✅ Execution Complete: {script_name} ================\n"
|
| 254 |
full_log += f"--- STDOUT ---\n{result.stdout}\n\n--- STDERR ---\n{result.stderr}\n"
|
| 255 |
-
|
| 256 |
if result.returncode != 0:
|
| 257 |
success = False
|
| 258 |
full_log += f"❌ [Error] {script_name} returned non-zero exit code (Exit Code: {result.returncode})\n"
|
| 259 |
-
|
| 260 |
except subprocess.TimeoutExpired as e:
|
| 261 |
success = False
|
| 262 |
full_log += f"\n================ ❌ Task Timeout: {script_name} ================\n{str(e)}\n"
|
|
@@ -264,16 +220,11 @@ def run_final_generation(task_type="all"):
|
|
| 264 |
success = False
|
| 265 |
full_log += f"\n================ ❌ Task Exception: {script_name} ================\n{str(e)}\n"
|
| 266 |
|
| 267 |
-
if not success:
|
| 268 |
-
return f"❌ {task_type.upper()} contains failed tasks, please check logs", get_debug_info(), full_log, None
|
| 269 |
|
| 270 |
-
# Zip the mineru_outputs folder
|
| 271 |
zip_base_name = ZIP_OUTPUT_PATH.replace(".zip", "")
|
| 272 |
shutil.make_archive(zip_base_name, 'zip', OUTPUT_DIR)
|
| 273 |
-
|
| 274 |
-
success_msg = f"✅ {task_type.upper()} generated and zipped successfully"
|
| 275 |
-
return success_msg, get_debug_info(), full_log, ZIP_OUTPUT_PATH
|
| 276 |
-
|
| 277 |
except Exception as e:
|
| 278 |
error_log = full_log + f"\n[Global Exception] Exception occurred:\n{str(e)}"
|
| 279 |
return "❌ Global exception during final generation", get_debug_info(), error_log, None
|
|
@@ -294,101 +245,118 @@ body, .gradio-container {
|
|
| 294 |
.dark body, .dark .gradio-container {
|
| 295 |
background-color: #1a1a1a !important;
|
| 296 |
background-image: linear-gradient(rgba(168, 85, 247, .1) 1px, transparent 1px), linear-gradient(90deg, rgba(168, 85, 247, .1) 1px, transparent 1px) !important;
|
| 297 |
-
background-size: 40px 40px !important;
|
| 298 |
}
|
| 299 |
|
| 300 |
-
#col-container {
|
| 301 |
-
margin: 0 auto;
|
| 302 |
-
max-width: 1200px;
|
| 303 |
-
}
|
| 304 |
-
|
| 305 |
-
#main-title {
|
| 306 |
-
text-align: center !important;
|
| 307 |
-
padding: 1.5rem 0 0.5rem 0;
|
| 308 |
-
}
|
| 309 |
|
|
|
|
| 310 |
#main-title h1 {
|
| 311 |
-
font-size: 2.6em !important;
|
| 312 |
-
font-weight: 800 !important;
|
| 313 |
background: linear-gradient(135deg, #A855F7 0%, #C084FC 50%, #9333EA 100%);
|
| 314 |
background-size: 200% 200%;
|
| 315 |
-
-webkit-background-clip: text;
|
| 316 |
-
-
|
| 317 |
-
background-clip: text;
|
| 318 |
-
animation: gradient-shift 4s ease infinite;
|
| 319 |
-
letter-spacing: -0.02em;
|
| 320 |
-
}
|
| 321 |
-
|
| 322 |
-
@keyframes gradient-shift {
|
| 323 |
-
0%, 100% { background-position: 0% 50%; }
|
| 324 |
-
50% { background-position: 100% 50%; }
|
| 325 |
}
|
| 326 |
|
| 327 |
-
|
| 328 |
-
text-align: center !important;
|
| 329 |
-
margin-bottom: 2rem;
|
| 330 |
-
}
|
| 331 |
-
|
| 332 |
-
#subtitle p {
|
| 333 |
-
margin: 0 auto;
|
| 334 |
-
color: #666;
|
| 335 |
-
font-size: 1.1rem;
|
| 336 |
-
font-weight: 500;
|
| 337 |
-
}
|
| 338 |
|
|
|
|
|
|
|
| 339 |
.dark #subtitle p { color: #DAB2FF; }
|
| 340 |
|
| 341 |
/* Glassmorphism Cards */
|
| 342 |
.gradio-group {
|
| 343 |
background: rgba(255, 255, 255, 0.9) !important;
|
| 344 |
-
border: 2px solid #E9D5FF !important;
|
| 345 |
-
border-radius: 12px !important;
|
| 346 |
box-shadow: 0 4px 24px rgba(168, 85, 247, 0.08) !important;
|
| 347 |
-
backdrop-filter: blur(10px);
|
| 348 |
-
transition: all 0.3s ease;
|
| 349 |
-
padding: 15px !important;
|
| 350 |
}
|
|
|
|
|
|
|
| 351 |
|
| 352 |
-
.
|
| 353 |
-
|
| 354 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
}
|
|
|
|
| 356 |
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
border-color: rgba(168, 85, 247, 0.3) !important;
|
| 360 |
-
}
|
| 361 |
|
| 362 |
-
/*
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
border: 1px solid #C084FC !important;
|
| 369 |
-
border-radius: 8px !important;
|
| 370 |
}
|
| 371 |
|
| 372 |
-
/*
|
| 373 |
-
.
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
|
|
|
|
|
|
| 379 |
}
|
| 380 |
|
| 381 |
-
|
| 382 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 383 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 384 |
|
| 385 |
-
/*
|
|
|
|
|
|
|
|
|
|
| 386 |
::-webkit-scrollbar { width: 8px; height: 8px; }
|
| 387 |
::-webkit-scrollbar-track { background: rgba(168, 85, 247, 0.05); border-radius: 4px; }
|
| 388 |
::-webkit-scrollbar-thumb { background: linear-gradient(135deg, #A855F7, #C084FC); border-radius: 4px; }
|
| 389 |
::-webkit-scrollbar-thumb:hover { background: linear-gradient(135deg, #9333EA, #A855F7); }
|
| 390 |
"""
|
| 391 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 392 |
with gr.Blocks(theme=purple_theme, css=custom_css) as demo:
|
| 393 |
with gr.Column(elem_id="col-container"):
|
| 394 |
gr.Markdown("# **PaperX / Mineru Parsing Platform**", elem_id="main-title")
|
|
@@ -408,16 +376,13 @@ with gr.Blocks(theme=purple_theme, css=custom_css) as demo:
|
|
| 408 |
# 2. Document Parsing
|
| 409 |
with gr.Group(elem_classes="gradio-group"):
|
| 410 |
gr.Markdown("### 📄 2. Document Parsing")
|
| 411 |
-
pdf_input = gr.File(label="Drag and drop or click to upload PDF", file_types=[".pdf"])
|
| 412 |
|
| 413 |
-
|
|
|
|
| 414 |
|
|
|
|
| 415 |
parse_status = gr.Textbox(
|
| 416 |
-
show_label=False,
|
| 417 |
-
placeholder="Waiting for document upload...",
|
| 418 |
-
lines=1,
|
| 419 |
-
interactive=False,
|
| 420 |
-
elem_classes="status-text"
|
| 421 |
)
|
| 422 |
|
| 423 |
# 3. Asset Generation
|
|
@@ -425,10 +390,11 @@ with gr.Blocks(theme=purple_theme, css=custom_css) as demo:
|
|
| 425 |
gr.Markdown("### 🎯 3. Asset Generation")
|
| 426 |
gr.Markdown("Generate final formats based on DAG structure:")
|
| 427 |
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
|
|
|
| 432 |
|
| 433 |
gen_all_btn = gr.Button("✨ Generate All Assets (ALL)", variant="primary")
|
| 434 |
|
|
@@ -439,11 +405,7 @@ with gr.Blocks(theme=purple_theme, css=custom_css) as demo:
|
|
| 439 |
with gr.Group(elem_classes="gradio-group"):
|
| 440 |
gr.Markdown("### 📦 Generation Results & Download")
|
| 441 |
gen_status = gr.Textbox(
|
| 442 |
-
show_label=False,
|
| 443 |
-
placeholder="No generation task currently...",
|
| 444 |
-
lines=2,
|
| 445 |
-
interactive=False,
|
| 446 |
-
elem_classes="status-text"
|
| 447 |
)
|
| 448 |
download_file = gr.File(label="📥 Get Final Zip Archive", interactive=False, visible=False)
|
| 449 |
|
|
@@ -453,21 +415,13 @@ with gr.Blocks(theme=purple_theme, css=custom_css) as demo:
|
|
| 453 |
with gr.Tabs():
|
| 454 |
with gr.Tab("📜 Terminal Stream"):
|
| 455 |
cmd_logs = gr.Textbox(
|
| 456 |
-
label="Stdout / Stderr",
|
| 457 |
-
placeholder="Waiting for task to start...",
|
| 458 |
-
lines=14,
|
| 459 |
-
interactive=False,
|
| 460 |
-
elem_classes="log-box"
|
| 461 |
)
|
| 462 |
|
| 463 |
with gr.Tab("🔍 System Snapshot"):
|
| 464 |
refresh_btn = gr.Button("🔄 Refresh Directory Tree")
|
| 465 |
debug_view = gr.Textbox(
|
| 466 |
-
label="Workspace Files",
|
| 467 |
-
lines=13,
|
| 468 |
-
interactive=False,
|
| 469 |
-
value=get_debug_info(),
|
| 470 |
-
elem_classes="log-box"
|
| 471 |
)
|
| 472 |
|
| 473 |
# ================= LOGIC BINDINGS =================
|
|
@@ -476,10 +430,7 @@ with gr.Blocks(theme=purple_theme, css=custom_css) as demo:
|
|
| 476 |
pdf_input.upload(fn=save_pdf, inputs=pdf_input, outputs=[parse_status, debug_view])
|
| 477 |
pdf_input.clear(fn=lambda: ("ℹ️ File cleared", get_debug_info()), outputs=[parse_status, debug_view])
|
| 478 |
|
| 479 |
-
parse_btn.click(
|
| 480 |
-
fn=run_mineru_parsing_and_dag_gen,
|
| 481 |
-
outputs=[parse_status, debug_view, cmd_logs]
|
| 482 |
-
)
|
| 483 |
|
| 484 |
def trigger_gen(task):
|
| 485 |
status, debug, logs, file_path = run_final_generation(task)
|
|
@@ -493,5 +444,8 @@ with gr.Blocks(theme=purple_theme, css=custom_css) as demo:
|
|
| 493 |
|
| 494 |
refresh_btn.click(fn=get_debug_info, outputs=debug_view)
|
| 495 |
|
|
|
|
|
|
|
|
|
|
| 496 |
if __name__ == "__main__":
|
| 497 |
demo.launch()
|
|
|
|
| 94 |
# --- ⚙️ Backend Logic & Functions ---
|
| 95 |
# ==========================================
|
| 96 |
def get_debug_info():
|
|
|
|
| 97 |
now = datetime.now().strftime("%H:%M:%S")
|
| 98 |
files = os.listdir(PAPERS_DIR) if os.path.exists(PAPERS_DIR) else "Directory missing"
|
|
|
|
|
|
|
| 99 |
output_detail = "Not generated"
|
| 100 |
if os.path.exists(OUTPUT_DIR):
|
| 101 |
all_output_items = []
|
|
|
|
| 103 |
for name in files_in_out:
|
| 104 |
all_output_items.append(os.path.join(os.path.relpath(root, OUTPUT_DIR), name))
|
| 105 |
output_detail = f"Found {len(all_output_items)} files: {all_output_items[:5]}..." if all_output_items else "Directory exists but is EMPTY"
|
|
|
|
| 106 |
return f"[{now}] 📁 papers/ Content: {files}\n\n[{now}] 📂 mineru_outputs Status: {output_detail}"
|
| 107 |
|
| 108 |
def save_pdf(file):
|
|
|
|
| 121 |
if os.path.exists(CONFIG_PATH):
|
| 122 |
with open(CONFIG_PATH, "r", encoding="utf-8") as f:
|
| 123 |
config = yaml.safe_load(f) or {}
|
|
|
|
|
|
|
| 124 |
config.setdefault("api_keys", {})["gemini_api_key"] = api_key
|
|
|
|
|
|
|
| 125 |
if api_base_url:
|
| 126 |
config["api_base_url"] = api_base_url
|
|
|
|
|
|
|
| 127 |
with open(CONFIG_PATH, "w", encoding="utf-8") as f:
|
| 128 |
yaml.dump(config, f, allow_unicode=True)
|
|
|
|
| 129 |
success_msg = "✅ Key saved"
|
| 130 |
if api_base_url:
|
| 131 |
success_msg += ", Base URL updated"
|
|
|
|
| 132 |
return success_msg, get_debug_info()
|
| 133 |
except Exception as e:
|
| 134 |
return f"❌ Error: {str(e)}", get_debug_info()
|
| 135 |
|
| 136 |
def run_mineru_parsing_and_dag_gen():
|
|
|
|
| 137 |
if not os.path.exists(PAPERS_DIR) or not any(f.endswith('.pdf') for f in os.listdir(PAPERS_DIR)):
|
| 138 |
yield "❌ No PDF file found", get_debug_info(), "No execution logs."
|
| 139 |
return
|
| 140 |
|
| 141 |
full_log = ""
|
| 142 |
try:
|
|
|
|
| 143 |
env = os.environ.copy()
|
| 144 |
env["MINERU_FORMULA_ENABLE"] = "false"
|
| 145 |
env["MINERU_TABLE_ENABLE"] = "false"
|
|
|
|
| 147 |
env["MINERU_VIRTUAL_VRAM_SIZE"] = "8"
|
| 148 |
|
| 149 |
command_mineru = ["mineru", "-p", PAPERS_DIR, "-o", OUTPUT_DIR]
|
|
|
|
| 150 |
full_log += "--- Mineru Executing ---\n"
|
| 151 |
yield "⏳ Executing Mineru parsing...", get_debug_info(), full_log
|
| 152 |
|
| 153 |
process_mineru = subprocess.Popen(
|
| 154 |
+
command_mineru, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
)
|
|
|
|
| 156 |
for line in iter(process_mineru.stdout.readline, ''):
|
| 157 |
full_log += line
|
| 158 |
yield "⏳ Executing Mineru parsing...", get_debug_info(), full_log
|
|
|
|
| 160 |
returncode_mineru = process_mineru.wait()
|
| 161 |
|
| 162 |
if returncode_mineru != 0:
|
| 163 |
+
yield f"❌ Mineru parsing failed (Exit Code: {returncode_mineru})", get_debug_info(), full_log
|
|
|
|
| 164 |
return
|
| 165 |
|
|
|
|
| 166 |
command_dag = [sys.executable, "gen_dag.py"]
|
|
|
|
| 167 |
full_log += "\n--- DAG Gen Executing ---\n"
|
| 168 |
yield "⏳ Mineru parsing complete, executing DAG generation...", get_debug_info(), full_log
|
| 169 |
|
| 170 |
process_dag = subprocess.Popen(
|
| 171 |
+
command_dag, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
)
|
|
|
|
| 173 |
for line in iter(process_dag.stdout.readline, ''):
|
| 174 |
full_log += line
|
| 175 |
yield "⏳ Executing DAG generation...", get_debug_info(), full_log
|
| 176 |
process_dag.stdout.close()
|
| 177 |
returncode_dag = process_dag.wait()
|
| 178 |
|
| 179 |
+
status = "✅ PDF parsing & DAG generation fully completed" if returncode_dag == 0 else f"❌ DAG generation failed (Exit Code: {returncode_dag})"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
yield status, get_debug_info(), full_log
|
| 181 |
|
| 182 |
except Exception as e:
|
|
|
|
| 184 |
yield "❌ Execution Exception", get_debug_info(), error_log
|
| 185 |
|
| 186 |
def run_final_generation(task_type="all"):
|
|
|
|
| 187 |
if not os.path.exists(OUTPUT_DIR):
|
| 188 |
return "❌ Please run the parsing step first", get_debug_info(), "No output folder found.", None
|
| 189 |
|
| 190 |
scripts_to_run = []
|
| 191 |
+
if task_type == "ppt": scripts_to_run = ["gen_ppt.py"]
|
| 192 |
+
elif task_type == "poster": scripts_to_run = ["gen_poster.py"]
|
| 193 |
+
elif task_type == "pr": scripts_to_run = ["gen_pr.py"]
|
| 194 |
+
elif task_type == "all": scripts_to_run = ["gen_ppt.py", "gen_poster.py", "gen_pr.py"]
|
| 195 |
+
else: return "❌ Unknown task type", get_debug_info(), "Invalid task_type.", None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
|
| 197 |
full_log = f"🚀 Preparing to start {len(scripts_to_run)} tasks...\n"
|
| 198 |
success = True
|
| 199 |
|
| 200 |
def execute_script(script):
|
| 201 |
command = [sys.executable, script]
|
| 202 |
+
return script, subprocess.run(command, capture_output=True, text=True, timeout=600)
|
|
|
|
| 203 |
|
| 204 |
try:
|
| 205 |
with ThreadPoolExecutor(max_workers=len(scripts_to_run)) as executor:
|
| 206 |
future_to_script = {executor.submit(execute_script, s): s for s in scripts_to_run}
|
|
|
|
| 207 |
for future in as_completed(future_to_script):
|
| 208 |
script_name = future_to_script[future]
|
| 209 |
try:
|
| 210 |
_, result = future.result()
|
|
|
|
| 211 |
full_log += f"\n================ ✅ Execution Complete: {script_name} ================\n"
|
| 212 |
full_log += f"--- STDOUT ---\n{result.stdout}\n\n--- STDERR ---\n{result.stderr}\n"
|
|
|
|
| 213 |
if result.returncode != 0:
|
| 214 |
success = False
|
| 215 |
full_log += f"❌ [Error] {script_name} returned non-zero exit code (Exit Code: {result.returncode})\n"
|
|
|
|
| 216 |
except subprocess.TimeoutExpired as e:
|
| 217 |
success = False
|
| 218 |
full_log += f"\n================ ❌ Task Timeout: {script_name} ================\n{str(e)}\n"
|
|
|
|
| 220 |
success = False
|
| 221 |
full_log += f"\n================ ❌ Task Exception: {script_name} ================\n{str(e)}\n"
|
| 222 |
|
| 223 |
+
if not success: return f"❌ {task_type.upper()} contains failed tasks, please check logs", get_debug_info(), full_log, None
|
|
|
|
| 224 |
|
|
|
|
| 225 |
zip_base_name = ZIP_OUTPUT_PATH.replace(".zip", "")
|
| 226 |
shutil.make_archive(zip_base_name, 'zip', OUTPUT_DIR)
|
| 227 |
+
return f"✅ {task_type.upper()} generated and zipped successfully", get_debug_info(), full_log, ZIP_OUTPUT_PATH
|
|
|
|
|
|
|
|
|
|
| 228 |
except Exception as e:
|
| 229 |
error_log = full_log + f"\n[Global Exception] Exception occurred:\n{str(e)}"
|
| 230 |
return "❌ Global exception during final generation", get_debug_info(), error_log, None
|
|
|
|
| 245 |
.dark body, .dark .gradio-container {
|
| 246 |
background-color: #1a1a1a !important;
|
| 247 |
background-image: linear-gradient(rgba(168, 85, 247, .1) 1px, transparent 1px), linear-gradient(90deg, rgba(168, 85, 247, .1) 1px, transparent 1px) !important;
|
|
|
|
| 248 |
}
|
| 249 |
|
| 250 |
+
#col-container { margin: 0 auto; max-width: 1200px; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
|
| 252 |
+
#main-title { text-align: center !important; padding: 1.5rem 0 0.5rem 0; }
|
| 253 |
#main-title h1 {
|
| 254 |
+
font-size: 2.6em !important; font-weight: 800 !important;
|
|
|
|
| 255 |
background: linear-gradient(135deg, #A855F7 0%, #C084FC 50%, #9333EA 100%);
|
| 256 |
background-size: 200% 200%;
|
| 257 |
+
-webkit-background-clip: text; -webkit-text-fill-color: transparent;
|
| 258 |
+
animation: gradient-shift 4s ease infinite; letter-spacing: -0.02em;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
}
|
| 260 |
|
| 261 |
+
@keyframes gradient-shift { 0%, 100% { background-position: 0% 50%; } 50% { background-position: 100% 50%; } }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
|
| 263 |
+
#subtitle { text-align: center !important; margin-bottom: 2rem; }
|
| 264 |
+
#subtitle p { margin: 0 auto; color: #666; font-size: 1.1rem; font-weight: 500; }
|
| 265 |
.dark #subtitle p { color: #DAB2FF; }
|
| 266 |
|
| 267 |
/* Glassmorphism Cards */
|
| 268 |
.gradio-group {
|
| 269 |
background: rgba(255, 255, 255, 0.9) !important;
|
| 270 |
+
border: 2px solid #E9D5FF !important; border-radius: 12px !important;
|
|
|
|
| 271 |
box-shadow: 0 4px 24px rgba(168, 85, 247, 0.08) !important;
|
| 272 |
+
backdrop-filter: blur(10px); transition: all 0.3s ease; padding: 15px !important;
|
|
|
|
|
|
|
| 273 |
}
|
| 274 |
+
.gradio-group:hover { box-shadow: 0 8px 32px rgba(168, 85, 247, 0.12) !important; border-color: #C084FC !important; }
|
| 275 |
+
.dark .gradio-group { background: rgba(30, 30, 30, 0.9) !important; border-color: rgba(168, 85, 247, 0.3) !important; }
|
| 276 |
|
| 277 |
+
/* ======== 新增:1. 隐藏原生组件,自定义全局拖拽与点击上传区域 ======== */
|
| 278 |
+
#pdf-upload-box {
|
| 279 |
+
background: transparent !important;
|
| 280 |
+
border: 2px dashed rgba(192, 132, 252, 0.5) !important;
|
| 281 |
+
border-radius: 12px !important;
|
| 282 |
+
min-height: 100px !important;
|
| 283 |
+
position: relative;
|
| 284 |
+
overflow: hidden;
|
| 285 |
+
display: flex !important; align-items: center !important; justify-content: center !important;
|
| 286 |
+
transition: all 0.3s ease !important;
|
| 287 |
}
|
| 288 |
+
#pdf-upload-box:hover { border-color: #A855F7 !important; background: rgba(168, 85, 247, 0.05) !important; }
|
| 289 |
|
| 290 |
+
/* 将原生的输入框变为全透明但保留点击/拖拽属性 */
|
| 291 |
+
#pdf-upload-box > * { opacity: 0 !important; z-index: 10 !important; position: relative; width: 100%; height: 100%; cursor: pointer;}
|
|
|
|
|
|
|
| 292 |
|
| 293 |
+
/* 自定义文字显示 */
|
| 294 |
+
#pdf-upload-box::before {
|
| 295 |
+
content: "🖱️ 点击此处选择 PDF \A 🌍 或将文件拖拽至全局任意位置";
|
| 296 |
+
white-space: pre-wrap; text-align: center; color: #7E22CE;
|
| 297 |
+
font-weight: 600; font-size: 1.1rem; position: absolute;
|
| 298 |
+
z-index: 1; pointer-events: none; line-height: 1.6;
|
|
|
|
|
|
|
| 299 |
}
|
| 300 |
|
| 301 |
+
/* 激活全局拖拽时的魔术全屏放大样式 */
|
| 302 |
+
body.dragging-file #pdf-upload-box {
|
| 303 |
+
position: fixed !important; top: 0 !important; left: 0 !important;
|
| 304 |
+
width: 100vw !important; height: 100vh !important;
|
| 305 |
+
z-index: 99999 !important; background: rgba(250, 245, 255, 0.85) !important;
|
| 306 |
+
backdrop-filter: blur(8px) !important; border: 6px dashed #A855F7 !important; border-radius: 0 !important;
|
| 307 |
+
}
|
| 308 |
+
body.dragging-file #pdf-upload-box::before {
|
| 309 |
+
content: "📥 松开鼠标,立即上传 PDF"; font-size: 2.5rem; color: #9333EA; font-weight: 800;
|
| 310 |
}
|
| 311 |
|
| 312 |
+
/* ======== 新增:2. 等长圆角按钮及悬浮动效 ======== */
|
| 313 |
+
.action-row { display: flex !important; gap: 15px !important; margin-bottom: 10px !important; }
|
| 314 |
+
.action-btn {
|
| 315 |
+
border-radius: 30px !important;
|
| 316 |
+
background: linear-gradient(135deg, #A855F7, #9333EA) !important;
|
| 317 |
+
color: white !important; font-weight: 600 !important; border: none !important;
|
| 318 |
+
height: 55px !important; flex: 1 !important;
|
| 319 |
+
transition: transform 0.3s cubic-bezier(0.175, 0.885, 0.32, 1.275), box-shadow 0.3s ease !important;
|
| 320 |
+
box-shadow: 0 4px 15px rgba(147, 51, 234, 0.2) !important;
|
| 321 |
+
cursor: pointer !important; display: flex !important; align-items: center !important; justify-content: center !important;
|
| 322 |
}
|
| 323 |
+
.action-btn:hover {
|
| 324 |
+
transform: translateY(-5px) scale(1.03) !important;
|
| 325 |
+
box-shadow: 0 10px 25px rgba(147, 51, 234, 0.4) !important;
|
| 326 |
+
background: linear-gradient(135deg, #C084FC, #A855F7) !important;
|
| 327 |
+
}
|
| 328 |
+
.action-btn:active { transform: translateY(2px) scale(0.98) !important; box-shadow: 0 2px 10px rgba(147, 51, 234, 0.2) !important; }
|
| 329 |
|
| 330 |
+
/* Terminal Log Style */
|
| 331 |
+
.log-box textarea { font-family: 'IBM Plex Mono', monospace !important; font-size: 13px !important; background-color: #1e1e1e !important; color: #DAB2FF !important; border: 1px solid #C084FC !important; border-radius: 8px !important; }
|
| 332 |
+
.status-text textarea { background-color: transparent !important; border: none !important; box-shadow: none !important; font-weight: 600 !important; color: #6B21A8 !important; }
|
| 333 |
+
.dark .status-text textarea { color: #C084FC !important; }
|
| 334 |
::-webkit-scrollbar { width: 8px; height: 8px; }
|
| 335 |
::-webkit-scrollbar-track { background: rgba(168, 85, 247, 0.05); border-radius: 4px; }
|
| 336 |
::-webkit-scrollbar-thumb { background: linear-gradient(135deg, #A855F7, #C084FC); border-radius: 4px; }
|
| 337 |
::-webkit-scrollbar-thumb:hover { background: linear-gradient(135deg, #9333EA, #A855F7); }
|
| 338 |
"""
|
| 339 |
|
| 340 |
+
# ======== 新增:全局拖拽监听 JavaScript ========
|
| 341 |
+
global_drag_js = """
|
| 342 |
+
function() {
|
| 343 |
+
let dragCounter = 0;
|
| 344 |
+
window.addEventListener('dragenter', (e) => {
|
| 345 |
+
e.preventDefault(); dragCounter++; document.body.classList.add('dragging-file');
|
| 346 |
+
});
|
| 347 |
+
window.addEventListener('dragleave', (e) => {
|
| 348 |
+
e.preventDefault(); dragCounter--;
|
| 349 |
+
if(dragCounter === 0) document.body.classList.remove('dragging-file');
|
| 350 |
+
});
|
| 351 |
+
window.addEventListener('drop', (e) => {
|
| 352 |
+
dragCounter = 0;
|
| 353 |
+
// 延迟移除class以确保Gradio能捕捉到底层组件的drop事件
|
| 354 |
+
setTimeout(() => document.body.classList.remove('dragging-file'), 150);
|
| 355 |
+
});
|
| 356 |
+
window.addEventListener('dragover', (e) => { e.preventDefault(); });
|
| 357 |
+
}
|
| 358 |
+
"""
|
| 359 |
+
|
| 360 |
with gr.Blocks(theme=purple_theme, css=custom_css) as demo:
|
| 361 |
with gr.Column(elem_id="col-container"):
|
| 362 |
gr.Markdown("# **PaperX / Mineru Parsing Platform**", elem_id="main-title")
|
|
|
|
| 376 |
# 2. Document Parsing
|
| 377 |
with gr.Group(elem_classes="gradio-group"):
|
| 378 |
gr.Markdown("### 📄 2. Document Parsing")
|
|
|
|
| 379 |
|
| 380 |
+
# 调整了这里的控件,绑定CSS做UI隐藏和特效
|
| 381 |
+
pdf_input = gr.File(label="", file_types=[".pdf"], elem_id="pdf-upload-box")
|
| 382 |
|
| 383 |
+
parse_btn = gr.Button("🚀 Start Mineru & DAG Extraction", variant="primary", size="lg")
|
| 384 |
parse_status = gr.Textbox(
|
| 385 |
+
show_label=False, placeholder="Waiting for document upload...", lines=1, interactive=False, elem_classes="status-text"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 386 |
)
|
| 387 |
|
| 388 |
# 3. Asset Generation
|
|
|
|
| 390 |
gr.Markdown("### 🎯 3. Asset Generation")
|
| 391 |
gr.Markdown("Generate final formats based on DAG structure:")
|
| 392 |
|
| 393 |
+
# 使用自定义的 .action-row 和 .action-btn 实现等宽高丝滑悬浮按钮
|
| 394 |
+
with gr.Row(elem_classes="action-row"):
|
| 395 |
+
gen_ppt_btn = gr.Button("📊 Gen PPT", elem_classes="action-btn")
|
| 396 |
+
gen_poster_btn = gr.Button("🖼️ Gen Poster", elem_classes="action-btn")
|
| 397 |
+
gen_pr_btn = gr.Button("📰 Gen Article", elem_classes="action-btn")
|
| 398 |
|
| 399 |
gen_all_btn = gr.Button("✨ Generate All Assets (ALL)", variant="primary")
|
| 400 |
|
|
|
|
| 405 |
with gr.Group(elem_classes="gradio-group"):
|
| 406 |
gr.Markdown("### 📦 Generation Results & Download")
|
| 407 |
gen_status = gr.Textbox(
|
| 408 |
+
show_label=False, placeholder="No generation task currently...", lines=2, interactive=False, elem_classes="status-text"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
)
|
| 410 |
download_file = gr.File(label="📥 Get Final Zip Archive", interactive=False, visible=False)
|
| 411 |
|
|
|
|
| 415 |
with gr.Tabs():
|
| 416 |
with gr.Tab("📜 Terminal Stream"):
|
| 417 |
cmd_logs = gr.Textbox(
|
| 418 |
+
label="Stdout / Stderr", placeholder="Waiting for task to start...", lines=14, interactive=False, elem_classes="log-box"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 419 |
)
|
| 420 |
|
| 421 |
with gr.Tab("🔍 System Snapshot"):
|
| 422 |
refresh_btn = gr.Button("🔄 Refresh Directory Tree")
|
| 423 |
debug_view = gr.Textbox(
|
| 424 |
+
label="Workspace Files", lines=13, interactive=False, value=get_debug_info(), elem_classes="log-box"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 425 |
)
|
| 426 |
|
| 427 |
# ================= LOGIC BINDINGS =================
|
|
|
|
| 430 |
pdf_input.upload(fn=save_pdf, inputs=pdf_input, outputs=[parse_status, debug_view])
|
| 431 |
pdf_input.clear(fn=lambda: ("ℹ️ File cleared", get_debug_info()), outputs=[parse_status, debug_view])
|
| 432 |
|
| 433 |
+
parse_btn.click(fn=run_mineru_parsing_and_dag_gen, outputs=[parse_status, debug_view, cmd_logs])
|
|
|
|
|
|
|
|
|
|
| 434 |
|
| 435 |
def trigger_gen(task):
|
| 436 |
status, debug, logs, file_path = run_final_generation(task)
|
|
|
|
| 444 |
|
| 445 |
refresh_btn.click(fn=get_debug_info, outputs=debug_view)
|
| 446 |
|
| 447 |
+
# 在页面加载时注入全局拖拽脚本
|
| 448 |
+
demo.load(js=global_drag_js)
|
| 449 |
+
|
| 450 |
if __name__ == "__main__":
|
| 451 |
demo.launch()
|