import os import sys import re import json import shutil import tempfile import traceback import zipfile import base64 import gradio as gr _MIME = {"png": "png", "jpg": "jpeg", "jpeg": "jpeg", "gif": "gif", "webp": "webp"} try: import markdown as _md def _md2html(text: str, img_dir: str = None) -> str: """Markdown โ†’ HTML. If img_dir is given, tags are replaced with base64 data URIs so the browser renders the actual image. Otherwise a grey badge placeholder is shown. """ html = _md.markdown(text, extensions=["nl2br", "tables"]) def _img_handler(m): tag = m.group(0) src_m = re.search(r'src="([^"]*)"', tag) alt_m = re.search(r'alt="([^"]*)"', tag) src = src_m.group(1) if src_m else "" alt = alt_m.group(1) if alt_m else "image" if img_dir and src: img_name = os.path.basename(src) img_path = os.path.join(img_dir, img_name) if os.path.exists(img_path): ext = img_name.rsplit(".", 1)[-1].lower() mime = _MIME.get(ext, "png") with open(img_path, "rb") as f: b64 = base64.b64encode(f.read()).decode() return ( f'{alt}' ) # Fallback badge when image file is not found return ( '๐Ÿ“ท {alt}' ) return re.sub(r']*/?>', _img_handler, html) except ImportError: def _md2html(text: str, img_dir: str = None) -> str: return text.replace("\n", "
") _REPO_ROOT = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, _REPO_ROOT) print(f"[startup] Gradio: {gr.__version__}", flush=True) # โ”€โ”€ i18n โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ T = { "zh": dict( lang_btn="English", subtitle="ๅคšๆจกๆ€็Ÿฅ่ฏ†ๆๅ– Pipeline Demo", desc=( "ไธŠไผ ๆ•™ๆๆˆ–่ฏ•ๅท PDF๏ผŒ็”จ [MinerU](https://mineru.net) ่งฃๆž็‰ˆ้ขใ€ๅ†็”จ LLM ๆๅ–็ป“ๆž„ๅŒ– QA ๅฏน๏ผŒ" "่พ“ๅ‡บ `raw_vqa.jsonl`ใ€‚\n\n" "**ๆต็จ‹๏ผš** PDF ไธŠไผ  โ†’ MinerU ่งฃๆž โ†’ LLM ๆๅ– QA โ†’ ไธ‹่ฝฝ็ป“ๆžœ\n\n" "> ๆ‰€ๆœ‰ API ่ฐƒ็”จๅ‡้€š่ฟ‡ๆ‚จๆไพ›็š„ๅฏ†้’ฅๅฎŒๆˆ๏ผŒๆœฌ Space ไธๅญ˜ๅ‚จไปปไฝ•ๆ•ฐๆฎๆˆ–ๅฏ†้’ฅใ€‚" ), sec_upload="๐Ÿ“„ ไธŠไผ  PDF", upload_label="PDF ๆ–‡ไปถ๏ผˆๅ•ๆ–‡ไปถ๏ผš้ข˜็ญ”ๆททๆŽ’๏ผ›ๅŒๆ–‡ไปถ๏ผš็ฌฌ1ไธช้ข˜็›ฎ๏ผŒ็ฌฌ2ไธช็ญ”ๆกˆ๏ผ‰", task_label="ไปปๅŠกๅ็งฐ", sec_examples="๐Ÿ“‹ ๅ†…็ฝฎ็คบไพ‹ PDF๏ผˆ็‚นๅ‡ปๅŠ ่ฝฝ๏ผ‰", ex1_label="็คบไพ‹ 1๏ผšๅ•ๆ–‡ไปถ้ข˜็ญ”ๆททๆŽ’", ex2_label="็คบไพ‹ 2๏ผšๅŒๆ–‡ไปถ๏ผˆ้ข˜็›ฎ + ็ญ”ๆกˆ๏ผ‰", sec_llm="โš™๏ธ LLM ้…็ฝฎ", api_url_label="API Base URL", llm_key_label="LLM API Key๏ผˆDF_API_KEY๏ผ‰", llm_key_ph="sk-... / AIzaSy...", model_label="ๆจกๅž‹ๅ็งฐ", model_ph="gemini-2.5-pro / gpt-4o / deepseek-r1", sec_mineru="๐Ÿ—๏ธ MinerU ้…็ฝฎ", mineru_key_label="MinerU API Key๏ผˆMINERU_API_KEY๏ผ‰", mineru_key_info="โš ๏ธ ็‹ฌ็ซ‹ไบŽ LLM ็š„็ฌฌไบŒไธช Key๏ผŒๅŽป https://mineru.net/apiManage/token ๅ…่ดน็”ณ่ฏท", workers_label="ๅนถๅ‘ Worker ๆ•ฐ", run_btn="โ–ถ ๅผ€ๅง‹ๆๅ–", stop_btn="โน ไธญๆญข่ฟ่กŒ", sec_output="๐Ÿ“ค ่พ“ๅ‡บ", status_label="่ฟ่กŒ็Šถๆ€", status_ph="็‚นๅ‡ปใ€Œๅผ€ๅง‹ๆๅ–ใ€ๅŽ่ฟ›ๅบฆๆ˜พ็คบๅœจ่ฟ™้‡Œ๏ผˆ่ฟ่กŒ้œ€ๆ•ฐๅˆ†้’Ÿ๏ผŒ่ฏท่€ๅฟƒ็ญ‰ๅพ…๏ผ‰โ€ฆ", output_label="ไธ‹่ฝฝ็ป“ๆžœ๏ผˆvqa_output.zip๏ผŒๅซ JSONL + ๅ›พ็‰‡๏ผ‰", preview_label="็ป“ๆžœ้ข„่งˆ", ), "en": dict( lang_btn="ไธญๆ–‡", subtitle="Multimodal Knowledge Extraction Pipeline Demo", desc=( "Upload textbook or exam PDFs. [MinerU](https://mineru.net) parses the layout and an LLM " "extracts structured QA pairs, outputting `raw_vqa.jsonl`.\n\n" "**Pipeline:** PDF Upload โ†’ MinerU Parsing โ†’ LLM QA Extraction โ†’ Download Results\n\n" "> All API calls use your own keys. This Space does not store any data or keys." ), sec_upload="๐Ÿ“„ Upload PDF", upload_label="PDF File(s) โ€” single: Q&A interleaved; two files: 1st questions, 2nd answers", task_label="Task Name", sec_examples="๐Ÿ“‹ Example PDFs (click to load)", ex1_label="Example 1: Single file (Q&A mixed)", ex2_label="Example 2: Two files (questions + answers)", sec_llm="โš™๏ธ LLM Configuration", api_url_label="API Base URL", llm_key_label="LLM API Key (DF_API_KEY)", llm_key_ph="sk-... / AIzaSy...", model_label="Model Name", model_ph="gemini-2.5-pro / gpt-4o / deepseek-r1", sec_mineru="๐Ÿ—๏ธ MinerU Configuration", mineru_key_label="MinerU API Key (MINERU_API_KEY)", mineru_key_info="โš ๏ธ Independent from LLM key. Get yours at https://mineru.net/apiManage/token", workers_label="Max Workers", run_btn="โ–ถ Start Extraction", stop_btn="โน Stop", sec_output="๐Ÿ“ค Output", status_label="Status", status_ph="Click 'Start Extraction' to begin (may take several minutes)โ€ฆ", output_label="Download Result (vqa_output.zip โ€” JSONL + images)", preview_label="Result Preview", ), } _DEFAULT_LANG = "en" EXAMPLES = [ ("examples/VQA/questionextract_test.pdf",), ("examples/VQA/math_question.pdf", "examples/VQA/math_answer.pdf"), ] # โ”€โ”€ Helpers โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ def _render_preview(jsonl_path: str, lang: str = "en", output_dir: str = None) -> str: """Render up to 3 QA items as styled HTML cards with real image rendering.""" if not jsonl_path or not os.path.exists(jsonl_path): return "" items = [] with open(jsonl_path, encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: items.append(json.loads(line)) except Exception: pass if len(items) >= 3: break if not items: label_empty = "๏ผˆๆ—  QA ๆ•ฐๆฎ๏ผ‰" if lang == "zh" else "(No QA data)" return f'
{label_empty}
' label_q = "้ข˜็›ฎ" if lang == "zh" else "Question" label_a = "็ญ”ๆกˆ" if lang == "zh" else "Answer" label_s = "่งฃ้ข˜่ฟ‡็จ‹" if lang == "zh" else "Solution" cards = [] for i, item in enumerate(items): name = item.get("name", "") # Images live at output_dir/{name}/vqa_images/ img_dir = os.path.join(output_dir, name, "vqa_images") if output_dir else None q_html = _md2html(str(item.get("question", "")), img_dir) a_html = _md2html(str(item.get("answer", "")), img_dir) sol_raw = str(item.get("solution", "")) # Truncate solution before converting (avoids cutting mid-tag) sol_short = (sol_raw[:400] + "\n\nโ€ฆ") if len(sol_raw) > 400 else sol_raw sol_html = _md2html(sol_short, img_dir) sol_block = ( f'
' f'{label_s}:' f'
{sol_html}
' f'
' ) if sol_raw and sol_raw != item.get("answer", "") else "" cards.append(f"""
#{i+1}  ยท  {name}
{label_q}:
{q_html}
{label_a}:
{a_html}
{sol_block}
""") total_hint = "" try: with open(jsonl_path, encoding="utf-8") as f: total = sum(1 for l in f if l.strip()) if total > 3: more = f"๏ผˆๅ…ฑ {total} ๆก๏ผŒไป…ๅฑ•็คบๅ‰ 3 ๆก๏ผ‰" if lang == "zh" else f"{total} items total โ€” showing first 3" total_hint = f'
{more}
' except Exception: pass inner = total_hint + "".join(cards) # Wrap in a container that loads MathJax for $โ€ฆ$ / $$โ€ฆ$$ rendering return ( '
' + inner + "
" + """ """ ) # โ”€โ”€ Backend (generator โ†’ stop button works) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ def run_vqa_extraction( pdf_files, task_name, api_url, llm_api_key, mineru_api_key, model_name, max_workers, lang, ): if pdf_files is None or (isinstance(pdf_files, list) and len(pdf_files) == 0): yield None, "โŒ ่ฏทๅ…ˆไธŠไผ  PDF ๆ–‡ไปถใ€‚" if lang == "zh" else "โŒ Please upload a PDF file first.", "" return if not str(llm_api_key).strip(): msg = "โŒ ่ฏทๅกซๅ†™ LLM API Keyใ€‚" if lang == "zh" else "โŒ Please enter your LLM API Key." yield None, msg, ""; return if not str(mineru_api_key).strip(): msg = ( "โŒ ่ฏทๅกซๅ†™ MinerU API Key๏ผˆ็‹ฌ็ซ‹ไบŽ LLM Key๏ผŒๅŽป https://mineru.net/apiManage/token ็”ณ่ฏท๏ผ‰ใ€‚" if lang == "zh" else "โŒ Please enter your MinerU API Key (get it at https://mineru.net/apiManage/token)." ) yield None, msg, ""; return task_name = str(task_name).strip() or "task1" os.environ["DF_API_KEY"] = str(llm_api_key).strip() os.environ["MINERU_API_KEY"] = str(mineru_api_key).strip() workspace = tempfile.mkdtemp(prefix="dataflow_vqa_") cache_dir = os.path.join(workspace, "cache") os.makedirs(cache_dir, exist_ok=True) original_cwd = os.getcwd() try: os.chdir(workspace) yield None, "โณ [1/4] Preparing PDF filesโ€ฆ" if lang == "en" else "โณ [1/4] ๆ•ด็† PDF ๆ–‡ไปถโ€ฆ", "" if not isinstance(pdf_files, list): pdf_files = [pdf_files] pdf_paths = [] for i, f in enumerate(pdf_files): src = f if isinstance(f, str) else (f.name if hasattr(f, "name") else str(f)) dst = os.path.join(workspace, f"input_{i}.pdf") shutil.copy(src, dst) pdf_paths.append(dst) input_jsonl = os.path.join(workspace, "input.jsonl") with open(input_jsonl, "w") as fout: entry = { "input_pdf_paths": pdf_paths if len(pdf_paths) > 1 else pdf_paths[0], "name": task_name, } fout.write(json.dumps(entry, ensure_ascii=False) + "\n") yield None, "โณ [2/4] Loading pipeline moduleโ€ฆ" if lang == "en" else "โณ [2/4] ๅŠ ่ฝฝ Pipeline ๆจกๅ—โ€ฆ", "" try: from pipelines.vqa_extract_optimized_pipeline import PDF_VQA_extract_optimized_pipeline except Exception: err = f"โŒ Failed to import pipeline:\n{traceback.format_exc()}" yield None, err, ""; return try: pipeline = PDF_VQA_extract_optimized_pipeline( input_file = input_jsonl, api_url = str(api_url).rstrip("/"), model_name = str(model_name), max_workers = int(max_workers), ) pipeline.compile() except ValueError as e: msg = str(e) if "DF_API_KEY" in msg: yield None, "โŒ LLM API Key ่ฏปๅ–ๅคฑ่ดฅใ€‚" if lang == "zh" else "โŒ Failed to read LLM API Key.", "" elif "MINERU_API_KEY" in msg: yield None, "โŒ MinerU API Key ่ฏปๅ–ๅคฑ่ดฅใ€‚" if lang == "zh" else "โŒ Failed to read MinerU API Key.", "" else: yield None, f"โŒ {msg}", "" return yield None, ( "โณ [3/4] MinerU parsing + LLM QA extraction (may take several minutes)โ€ฆ" if lang == "en" else "โณ [3/4] MinerU ่งฃๆž PDF + LLM ๆๅ– QA๏ผˆๅฏ่ƒฝ้œ€่ฆๆ•ฐๅˆ†้’Ÿ๏ผ‰โ€ฆ" ), "" try: pipeline.forward() except RuntimeError as e: msg = str(e) if "no api found" in msg.lower() or "Apply upload urls failed" in msg: err = ( "โŒ MinerU API Key invalid or expired. Get a new one at https://mineru.net/apiManage/token\n\n" + msg if lang == "en" else "โŒ MinerU API Key ๆ— ๆ•ˆๆˆ–ๅทฒ่ฟ‡ๆœŸใ€‚่ฏทๅˆฐ https://mineru.net/apiManage/token ้‡ๆ–ฐ็”ณ่ฏทใ€‚\n\n" + msg ) elif "Cannot connect to LLM server" in msg: err = ("โŒ Cannot connect to LLM API. Check Base URL.\n\n" if lang == "en" else "โŒ ๆ— ๆณ•่ฟžๆŽฅ LLM API๏ผŒ่ฏทๆฃ€ๆŸฅ Base URLใ€‚\n\n") + msg else: err = f"โŒ {msg}" yield None, err, ""; return yield None, "โณ [4/4] Collecting outputโ€ฆ" if lang == "en" else "โณ [4/4] ๆ•ด็†่พ“ๅ‡บ็ป“ๆžœโ€ฆ", "" step_files = [f for f in os.listdir(cache_dir) if re.match(r"vqa_step\d+\.jsonl", f)] if not step_files: msg = "โŒ Pipeline finished but no output file found." if lang == "en" else "โŒ Pipeline ๅฎŒๆˆไฝ†ๆœชๆ‰พๅˆฐ่พ“ๅ‡บๆ–‡ไปถใ€‚" yield None, msg, ""; return max_step = max(int(re.findall(r"vqa_step(\d+)\.jsonl", f)[0]) for f in step_files) max_step_file = os.path.join(cache_dir, f"vqa_step{max_step}.jsonl") # โ”€โ”€ Collect QA pairs & copy per-task image directories โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ output_dir = os.path.join(workspace, "output") os.makedirs(output_dir, exist_ok=True) jsonl_path = os.path.join(output_dir, "raw_vqa.jsonl") count = 0 image_dirs_found = 0 with open(max_step_file) as f_in, open(jsonl_path, "w") as f_out: for line in f_in: data = json.loads(line) qa_item = data.get("vqa_pair") if not qa_item: continue name = data.get("name", task_name) out = {"name": name, **qa_item, "image_basedir": "."} if not out.get("solution"): out["solution"] = out.get("answer", "") f_out.write(json.dumps(out, ensure_ascii=False) + "\n") count += 1 # Copy cache/{name}/ โ†’ output/{name}/ (contains vqa_images/) src_task_dir = os.path.join(cache_dir, name) dst_task_dir = os.path.join(output_dir, name) if os.path.isdir(src_task_dir) and not os.path.exists(dst_task_dir): shutil.copytree(src_task_dir, dst_task_dir) image_dirs_found += 1 # โ”€โ”€ Pack into a zip so images + JSONL download together โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ zip_path = os.path.join(workspace, "vqa_output.zip") with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf: for root, dirs, files in os.walk(output_dir): for fname in files: full = os.path.join(root, fname) arcname = os.path.relpath(full, output_dir) zf.write(full, arcname) img_note = ( f" ({image_dirs_found} image folder(s) bundled)" if lang == "en" else f"๏ผˆๅซ {image_dirs_found} ไธชๅ›พ็‰‡ๆ–‡ไปถๅคน๏ผ‰" ) done = ( f"โœ… Done! Extracted {count} QA pairs{img_note}. Download the zip to get images + JSONL." if lang == "en" else f"โœ… ๅฎŒๆˆ๏ผๅ…ฑๆๅ– {count} ๆก QA ๅฏน{img_note}ใ€‚ไธ‹่ฝฝ zip ๅฏ่Žทๅพ— JSONL ๅ’Œๅ›พ็‰‡ใ€‚" ) yield zip_path, done, _render_preview(jsonl_path, lang, output_dir) except Exception: yield None, f"โŒ Unexpected error:\n{traceback.format_exc()}", "" finally: os.chdir(original_cwd) # โ”€โ”€ UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ CSS = """ #title-row { align-items: center; } #lang-btn { min-width: 90px; } .example-btn { margin: 4px 0 !important; } .md-body p { margin: 0 0 6px; } .md-body ul, .md-body ol { margin: 4px 0 4px 18px; padding: 0; } .md-body li { margin-bottom: 2px; } .md-body code { background:#f3f4f6; border-radius:3px; padding:1px 4px; font-size:12px; } .md-body pre { background:#f3f4f6; border-radius:6px; padding:8px; overflow-x:auto; font-size:12px; } .md-body table { border-collapse:collapse; width:100%; font-size:13px; } .md-body th, .md-body td { border:1px solid #e5e7eb; padding:4px 8px; } .md-body th { background:#f9fafb; } """ _L = _DEFAULT_LANG # shorthand for initial render with gr.Blocks( title="FlipVQA-Miner", theme=gr.themes.Soft(primary_hue="blue", neutral_hue="slate"), css=CSS, ) as demo: lang_state = gr.State(_DEFAULT_LANG) # โ”€โ”€ Header โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ with gr.Row(elem_id="title-row"): with gr.Column(scale=5): gr.Markdown("# FlipVQA-Miner: Multimodal Knowledge Extraction") with gr.Column(scale=0, min_width=110): lang_btn = gr.Button(T[_L]["lang_btn"], elem_id="lang-btn", size="sm") subtitle_md = gr.Markdown(T[_L]["subtitle"]) desc_md = gr.Markdown(T[_L]["desc"]) # โ”€โ”€ Main layout โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ with gr.Row(): # โ”€โ”€ Left column: inputs โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ with gr.Column(scale=1): # 1. Upload PDF sec_upload_md = gr.Markdown(f"### {T[_L]['sec_upload']}") pdf_files = gr.File( label=T[_L]["upload_label"], file_types=[".pdf"], file_count="multiple", ) task_name = gr.Textbox(label=T[_L]["task_label"], value="task1") # 2. Example PDFs (between upload and LLM config) sec_examples_md = gr.Markdown(f"### {T[_L]['sec_examples']}") with gr.Row(): ex1_btn = gr.Button(T[_L]["ex1_label"], elem_classes="example-btn", scale=1) ex2_btn = gr.Button(T[_L]["ex2_label"], elem_classes="example-btn", scale=1) # 3. LLM config sec_llm_md = gr.Markdown(f"### {T[_L]['sec_llm']}") api_url = gr.Textbox( label=T[_L]["api_url_label"], placeholder="https://api.openai.com/v1", ) llm_api_key = gr.Textbox( label=T[_L]["llm_key_label"], type="password", placeholder=T[_L]["llm_key_ph"], ) model_name = gr.Textbox( label=T[_L]["model_label"], value="gemini-2.5-pro", placeholder=T[_L]["model_ph"], ) # 4. MinerU config sec_mineru_md = gr.Markdown(f"### {T[_L]['sec_mineru']}") mineru_api_key = gr.Textbox( label=T[_L]["mineru_key_label"], type="password", placeholder="sk2-...", info=T[_L]["mineru_key_info"], ) max_workers = gr.Slider(label=T[_L]["workers_label"], minimum=1, maximum=30, value=5, step=1) with gr.Row(): run_btn = gr.Button(T[_L]["run_btn"], variant="primary", scale=4) stop_btn = gr.Button(T[_L]["stop_btn"], variant="stop", scale=1) # โ”€โ”€ Right column: outputs โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ with gr.Column(scale=1): sec_output_md = gr.Markdown(f"### {T[_L]['sec_output']}") status_box = gr.Textbox( label=T[_L]["status_label"], interactive=False, lines=6, placeholder=T[_L]["status_ph"], ) output_file = gr.File(label=T[_L]["output_label"], interactive=False) preview_md = gr.Markdown(f"#### {T[_L]['preview_label']}") preview_box = gr.HTML(value="") # โ”€โ”€ Event: Run โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ run_event = run_btn.click( fn=run_vqa_extraction, inputs=[pdf_files, task_name, api_url, llm_api_key, mineru_api_key, model_name, max_workers, lang_state], outputs=[output_file, status_box, preview_box], api_name="run_vqa_extraction", ) stop_btn.click(fn=None, cancels=[run_event]) # โ”€โ”€ Event: Example buttons โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ def load_example(ex_index): return [os.path.join(_REPO_ROOT, p) for p in EXAMPLES[ex_index]] ex1_btn.click(fn=lambda: load_example(0), inputs=[], outputs=[pdf_files]) ex2_btn.click(fn=lambda: load_example(1), inputs=[], outputs=[pdf_files]) # โ”€โ”€ Event: Language toggle โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ def toggle_lang(current_lang): new = "en" if current_lang == "zh" else "zh" t = T[new] return ( new, gr.update(value=t["lang_btn"]), gr.update(value=t["subtitle"]), gr.update(value=t["desc"]), gr.update(value=f"### {t['sec_upload']}"), gr.update(label=t["upload_label"]), gr.update(label=t["task_label"]), gr.update(value=f"### {t['sec_examples']}"), gr.update(value=t["ex1_label"]), gr.update(value=t["ex2_label"]), gr.update(value=f"### {t['sec_llm']}"), gr.update(label=t["api_url_label"]), gr.update(label=t["llm_key_label"], placeholder=t["llm_key_ph"]), gr.update(label=t["model_label"], placeholder=t["model_ph"]), gr.update(value=f"### {t['sec_mineru']}"), gr.update(label=t["mineru_key_label"], info=t["mineru_key_info"]), gr.update(label=t["workers_label"]), gr.update(value=t["run_btn"]), gr.update(value=t["stop_btn"]), gr.update(value=f"### {t['sec_output']}"), gr.update(label=t["status_label"], placeholder=t["status_ph"]), gr.update(label=t["output_label"]), gr.update(value=f"#### {t['preview_label']}"), ) lang_btn.click( fn=toggle_lang, inputs=[lang_state], outputs=[ lang_state, lang_btn, subtitle_md, desc_md, sec_upload_md, pdf_files, task_name, sec_examples_md, ex1_btn, ex2_btn, sec_llm_md, api_url, llm_api_key, model_name, sec_mineru_md, mineru_api_key, max_workers, run_btn, stop_btn, sec_output_md, status_box, output_file, preview_md, ], ) if __name__ == "__main__": demo.launch(allowed_paths=[os.path.join(_REPO_ROOT, "examples")])