PDF-to-Word

Runtime error

File size: 27,582 Bytes

import gradio as gr
from pdf2docx import Converter
import os
import zipfile
import cv2
import numpy as np
import shutil
import uuid
import glob
import fitz
import logging
from docx import Document

# --- 1. SYSTEM CONFIGURATION ---
os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"

HAS_OCR = False
ocr_pipeline = None

try:
    # UPDATED: Using PaddleOCRVL as requested
    # Note: Import AFTER renaming this file to avoid circular import
    from paddleocr import PaddleOCRVL
    # Initialize the specific pipeline version
    ocr_pipeline = PaddleOCRVL(pipeline_version="v1.5")
    HAS_OCR = True
    print("[OK] PaddleOCRVL initialized successfully.")
except Exception as e:
    print(f"[WARN] Could not initialize PaddleOCRVL. Error: {e}")
    print("Ensure you have installed: pip install paddlepaddle paddleocr")

# --- 2. LANGUAGE DICTIONARY ---
TRANS = {
    "vi": {
        "app_name": "PRO DOCUMENT TOOLKIT",
        "app_desc": "Xử lý tài liệu thông minh: Chuyển PDF → Word & Phân tích bố cục OCR",
        "menu_pdf": "📄 PDF sang Word",
        "menu_ocr": "👁️ Phân tích tài liệu (OCR-VL)",
        "lang_btn": "🇬🇧 English",
        "pdf_head": "Chuyển đổi PDF",
        "pdf_sub": "Giữ định dạng gốc, hỗ trợ nhiều file cùng lúc.",
        "pdf_label": "Tải lên file PDF (có thể nhiều file)",
        "pdf_btn": "🚀 Chuyển đổi",
        "pdf_success": "Chuyển đổi thành công!",
        "ocr_head": "Phân tích tài liệu & OCR",
        "ocr_sub": "Sử dụng PaddleOCRVL để nhận diện văn bản và cấu trúc (Markdown/JSON).",
        "ocr_label": "Tải ảnh lên (PNG, JPG, BMP)",
        "ocr_btn": "🔍 Phân tích Bố cục",
        "ocr_rs_text": "Kết quả Markdown",
        "ocr_rs_img": "Hình ảnh gốc",
        "err_nofile": "Vui lòng tải file lên!",
        "err_ocr": "OCR chưa sẵn sàng (kiểm tra cài đặt PaddleOCR).",
        "footer": "© 2024-2026 Developed by Chu Viet Kien. Powered by PaddleOCRVL & Gradio."
    },
    "en": {
        "app_name": "PRO DOCUMENT TOOLKIT",
        "app_desc": "Smart Document Processing: PDF to Word & OCR Layout Analysis",
        "menu_pdf": "📄 PDF to Word",
        "menu_ocr": "👁️ Doc Analysis (OCR-VL)",
        "lang_btn": "🇻🇳 Tiếng Việt",
        "pdf_head": "PDF Converter",
        "pdf_sub": "Preserve original layout, batch support.",
        "pdf_label": "Upload PDF files (multiple allowed)",
        "pdf_btn": "🚀 Convert",
        "pdf_success": "Conversion successful!",
        "ocr_head": "Document Analysis & OCR",
        "ocr_sub": "Uses PaddleOCRVL to extract text and structure (Markdown/JSON).",
        "ocr_label": "Upload Image (PNG, JPG, BMP)",
        "ocr_btn": "🔍 Analyze Layout",
        "ocr_rs_text": "Markdown Result",
        "ocr_rs_img": "Original Image",
        "err_nofile": "Please upload a file first!",
        "err_ocr": "OCR not available (check PaddleOCR installation).",
        "footer": "© 2024-2026 Developed by Chu Viet Kien. Powered by PaddleOCRVL & Gradio."
    }
}

# --- 3. PROCESSING LOGIC ---

def _is_scanned_pdf(pdf_path):
    """Check if a PDF has no extractable text layer (i.e. scanned/image-only)."""
    try:
        doc = fitz.open(pdf_path)
        has_text = any(page.get_text().strip() for page in doc)
        doc.close()
        return not has_text
    except Exception:
        return False


def _docx_is_empty(docx_path):
    """Check if a DOCX has no meaningful text."""
    try:
        doc = Document(docx_path)
        for para in doc.paragraphs:
            if para.text.strip():
                return False
        for table in doc.tables:
            for row in table.rows:
                for cell in row.cells:
                    if cell.text.strip():
                        return False
        return True
    except Exception:
        return True


def _ocr_pdf_to_docx(pdf_path, docx_path):
    """Fallback: convert a scanned PDF to DOCX using PaddleOCRVL."""
    pdf_doc = fitz.open(pdf_path)
    word_doc = Document()
    temp_base = f"temp_ocr_pdf_{uuid.uuid4().hex}"
    os.makedirs(temp_base, exist_ok=True)

    try:
        for i, page in enumerate(pdf_doc):
            # Render page to image (~288 DPI)
            mat = fitz.Matrix(2, 2)
            pix = page.get_pixmap(matrix=mat)
            img_path = os.path.join(temp_base, f"page_{i}.png")
            pix.save(img_path)

            # Run PaddleOCRVL
            output = ocr_pipeline.predict(img_path)
            page_out_dir = os.path.join(temp_base, f"out_{i}")
            os.makedirs(page_out_dir, exist_ok=True)

            for res in output:
                res.save_to_markdown(save_path=page_out_dir)

            md_files = glob.glob(os.path.join(page_out_dir, "*.md"))
            if md_files:
                with open(md_files[0], "r", encoding="utf-8") as f:
                    page_md = f.read().strip()
                if page_md:
                    for line in page_md.splitlines():
                        stripped = line.strip()
                        if stripped:
                            word_doc.add_paragraph(stripped)
                    if i < len(pdf_doc) - 1:
                        word_doc.add_page_break()

        word_doc.save(docx_path)
    finally:
        pdf_doc.close()
        shutil.rmtree(temp_base, ignore_errors=True)


def convert_pdfs_to_word(pdf_files, lang_code, progress=gr.Progress()):
    T = TRANS[lang_code]
    if not pdf_files:
        gr.Warning(T["err_nofile"])
        return None

    if not isinstance(pdf_files, list):
        pdf_files = [pdf_files]

    converted_files = []
    try:
        progress(0, desc="Starting...")
        for idx, pdf_file in enumerate(pdf_files):
            file_name = os.path.basename(pdf_file.name)
            docx_name = os.path.splitext(file_name)[0] + ".docx"

            # Route 1: obvious scanned PDF -> OCR directly
            if _is_scanned_pdf(pdf_file.name) and HAS_OCR:
                progress((idx + 1) / len(pdf_files), desc=f"OCR: {file_name}")
                _ocr_pdf_to_docx(pdf_file.name, docx_name)
            else:
                progress((idx + 1) / len(pdf_files), desc=f"Converting: {file_name}")
                # Temporarily bump root logger to ERROR so pdf2docx info/warning spam is hidden
                _root_logger = logging.getLogger()
                _old_level = _root_logger.level
                _root_logger.setLevel(logging.ERROR)
                try:
                    cv = Converter(pdf_file.name)
                    cv.convert(docx_name)
                    cv.close()
                finally:
                    _root_logger.setLevel(_old_level)

                # Route 2: pdf2docx produced empty docx -> OCR fallback
                if HAS_OCR and _docx_is_empty(docx_name):
                    os.remove(docx_name)
                    _ocr_pdf_to_docx(pdf_file.name, docx_name)

            converted_files.append(docx_name)

        gr.Info(f"✅ {T['pdf_success']}")

        if len(converted_files) == 1:
            return converted_files[0]
        else:
            zip_name = "Converted_Documents.zip"
            with zipfile.ZipFile(zip_name, 'w') as zf:
                for f in converted_files:
                    zf.write(f)
            return zip_name
    except Exception as e:
        gr.Error(f"Error: {str(e)}")
        return None

def run_ocr_func(image, lang_code):
    T = TRANS[lang_code]
    
    # 1. Validation
    if not HAS_OCR:
        gr.Error(T["err_ocr"])
        return None, None, None
    if image is None:
        gr.Warning(T["err_nofile"])
        return None, None, None

    # Setup temporary paths to handle file I/O for PaddleOCRVL
    session_id = uuid.uuid4().hex
    temp_dir = f"temp_ocr_{session_id}"
    os.makedirs(temp_dir, exist_ok=True)
    
    input_img_path = os.path.join(temp_dir, "input_image.png")
    output_save_path = os.path.join(temp_dir, "output")
    
    try:
        gr.Info("⏳ Initializing OCR-VL Pipeline...")
        
        # 2. Save Gradio Image (Numpy) to File (Required by Pipeline)
        # Convert RGB (Gradio) to BGR (OpenCV)
        img_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        cv2.imwrite(input_img_path, img_bgr)

        # 3. Run PaddleOCRVL Logic
        # See https://www.paddleocr.ai/latest/version3.x/pipeline_usage/PaddleOCR-VL.html
        output = ocr_pipeline.predict(input_img_path)

        markdown_text = ""
        
        # 4. Process Results
        for res in output:
            # res.print() # Optional: print to console logs
            res.save_to_json(save_path=output_save_path)
            res.save_to_markdown(save_path=output_save_path)

        # 5. Retrieve Generated Content
        # Find the generated markdown file (name varies based on library version)
        md_files = glob.glob(os.path.join(output_save_path, "*.md"))
        if md_files:
            with open(md_files[0], "r", encoding="utf-8") as f:
                markdown_text = f.read()
        else:
            markdown_text = "Analysis complete, but no markdown file was found."

        # 6. Package Results (Zip JSON & Markdown)
        zip_output_path = f"OCR_Result_{session_id}.zip"
        shutil.make_archive(zip_output_path.replace('.zip', ''), 'zip', output_save_path)

        # Return: Markdown text, Original Image (no drawing needed as VL focuses on structure), Zip file
        return markdown_text, image, zip_output_path

    except Exception as e:
        gr.Error(f"OCR Pipeline Error: {str(e)}")
        return None, None, None
    finally:
        # Cleanup temp directory (optional, keeping zip file)
        shutil.rmtree(temp_dir, ignore_errors=True)

# --- 4. UI HELPERS ---
def change_lang(lang):
    new = "en" if lang == "vi" else "vi"
    T = TRANS[new]
    return (
        new,                                # lang_state
        T["lang_btn"],                      # btn_lang
        gr.update(choices=[T["menu_pdf"], T["menu_ocr"]], value=T["menu_pdf"]), # radio_menu
        f"## {T['app_name']}",              # txt_title
        T["app_desc"],                      # txt_desc
        T["footer"],                        # txt_footer
        f"### {T['pdf_head']}",             # pdf_head_md
        T["pdf_sub"],                       # pdf_sub_md
        gr.update(label=T["pdf_label"]),    # in_pdf label
        T["pdf_btn"],                       # btn_pdf
        f"### {T['ocr_head']}",             # ocr_head_md
        T["ocr_sub"],                       # ocr_sub_md
        gr.update(label=T["ocr_label"]),    # in_img label
        T["ocr_btn"],                       # btn_ocr
        gr.update(label=T["ocr_rs_text"]),   # out_txt label
        gr.update(label=T["ocr_rs_img"])     # out_img_viz label
    )

def toggle_view(menu_val, lang):
    is_pdf = menu_val in [TRANS["vi"]["menu_pdf"], TRANS["en"]["menu_pdf"]]
    return gr.update(visible=is_pdf), gr.update(visible=not is_pdf)

# --- 5. THEME ---
theme = gr.themes.Soft(
    primary_hue="blue",
    neutral_hue="slate",
    font=[gr.themes.GoogleFont('Inter'), 'system-ui', 'sans-serif'],
).set(
    body_background_fill="#f8fafc",
    block_background_fill="white",
    block_border_width="1px",
    button_primary_background_fill="linear-gradient(90deg, #3b82f6, #2563eb)",
    button_primary_text_color="white",
)

# --- 6. INTERFACE ---
with gr.Blocks(title="Pro Document Toolkit") as demo:
    lang_state = gr.State("vi")

    with gr.Row():
        with gr.Column(scale=1, min_width=250):
            gr.Markdown("### 🛠️ Dashboard")
            btn_lang = gr.Button(TRANS["vi"]["lang_btn"], variant="secondary")
            radio_menu = gr.Radio(
                choices=[TRANS["vi"]["menu_pdf"], TRANS["vi"]["menu_ocr"]],
                value=TRANS["vi"]["menu_pdf"],
                label="Function / Chức năng",
                type="value"
            )
            gr.Markdown("---")
            gr.Markdown(f"**Status:** 🟢 Online | VL-OCR: {'Ready' if HAS_OCR else 'N/A'}")

        with gr.Column(scale=4):
            txt_title = gr.Markdown(f"## {TRANS['vi']['app_name']}")
            txt_desc = gr.Markdown(TRANS['vi']['app_desc'])

            # PDF Tool
            with gr.Group(visible=True) as group_pdf:
                pdf_head_md = gr.Markdown(f"### {TRANS['vi']['pdf_head']}")
                pdf_sub_md = gr.Markdown(TRANS['vi']['pdf_sub'])
                with gr.Row(equal_height=True):
                    with gr.Column():
                        in_pdf = gr.File(
                            label=TRANS["vi"]["pdf_label"],
                            file_types=[".pdf"],
                            file_count="multiple",
                            height=300
                        )
                        btn_pdf = gr.Button(TRANS["vi"]["pdf_btn"], variant="primary", size="lg")
                    with gr.Column():
                        gr.Markdown("### Output")
                        out_word = gr.File(label="Download DOCX / ZIP", height=250)

            # OCR Tool (Updated for VL)
            with gr.Group(visible=False) as group_ocr:
                ocr_head_md = gr.Markdown(f"### {TRANS['vi']['ocr_head']}")
                ocr_sub_md = gr.Markdown(TRANS['vi']['ocr_sub'])
                with gr.Row():
                    with gr.Column(scale=1):
                        in_img = gr.Image(
                            label=TRANS["vi"]["ocr_label"],
                            type="numpy",
                            height=450,
                            sources=["upload", "clipboard"]
                        )
                        btn_ocr = gr.Button(TRANS["vi"]["ocr_btn"], variant="primary", size="lg")
                    with gr.Column(scale=1):
                        with gr.Tabs():
                            with gr.Tab("Markdown"):
                                out_txt = gr.Textbox(
                                    label=TRANS["vi"]["ocr_rs_text"],
                                    lines=20,
                                    placeholder="Markdown content will appear here...",
                                    buttons=["copy"]
                                )
                                out_file = gr.File(label="Download Full Results (.zip)")
                            with gr.Tab("Source Image"):
                                out_img_viz = gr.Image(
                                    label=TRANS["vi"]["ocr_rs_img"],
                                    interactive=False
                                )

            txt_footer = gr.Markdown(TRANS['vi']['footer'])

    # EVENTS
    radio_menu.change(toggle_view, inputs=[radio_menu, lang_state], outputs=[group_pdf, group_ocr])
    btn_pdf.click(convert_pdfs_to_word, inputs=[in_pdf, lang_state], outputs=out_word)
    btn_ocr.click(run_ocr_func, inputs=[in_img, lang_state], outputs=[out_txt, out_img_viz, out_file])
    btn_lang.click(
        change_lang,
        inputs=[lang_state],
        outputs=[
            lang_state, btn_lang, radio_menu,
            txt_title, txt_desc, txt_footer,
            pdf_head_md, pdf_sub_md, in_pdf, btn_pdf,
            ocr_head_md, ocr_sub_md, in_img, btn_ocr,
            out_txt, out_img_viz
        ]
    )

if __name__ == "__main__":
    demo.queue().launch(
        server_name="0.0.0.0",
        server_port=7860,
        theme=theme,
    )

# --- 1. SYSTEM CONFIGURATION ---
os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"

HAS_OCR = False
ocr_pipeline = None

try:
    # UPDATED: Using PaddleOCRVL as requested
    from paddleocr import PaddleOCRVL
    # Initialize the specific pipeline version
    ocr_pipeline = PaddleOCRVL(pipeline_version="PaddlePaddle/v1.5")
    HAS_OCR = True
    print("✅ PaddleOCRVL initialized successfully.")
except Exception as e:
    print(f"⚠️ Warning: Could not initialize PaddleOCRVL. Error: {e}")
    print("Ensure you have installed: pip install paddlepaddle paddleocr")

# --- 2. LANGUAGE DICTIONARY ---
TRANS = {
    "vi": {
        "app_name": "PRO DOCUMENT TOOLKIT",
        "app_desc": "Xử lý tài liệu thông minh: Chuyển PDF → Word & Phân tích bố cục OCR",
        "menu_pdf": "📄 PDF sang Word",
        "menu_ocr": "👁️ Phân tích tài liệu (OCR-VL)",
        "lang_btn": "🇬🇧 English",
        "pdf_head": "Chuyển đổi PDF",
        "pdf_sub": "Giữ định dạng gốc, hỗ trợ nhiều file cùng lúc.",
        "pdf_label": "Tải lên file PDF (có thể nhiều file)",
        "pdf_btn": "🚀 Chuyển đổi",
        "pdf_success": "Chuyển đổi thành công!",
        "ocr_head": "Phân tích tài liệu & OCR",
        "ocr_sub": "Sử dụng PaddleOCRVL để nhận diện văn bản và cấu trúc (Markdown/JSON).",
        "ocr_label": "Tải ảnh lên (PNG, JPG, BMP)",
        "ocr_btn": "🔍 Phân tích Bố cục",
        "ocr_rs_text": "Kết quả Markdown",
        "ocr_rs_img": "Hình ảnh gốc",
        "err_nofile": "Vui lòng tải file lên!",
        "err_ocr": "OCR chưa sẵn sàng (kiểm tra cài đặt PaddleOCR).",
        "footer": "© 2024-2026 Developed by Chu Viet Kien. Powered by PaddleOCRVL & Gradio."
    },
    "en": {
        "app_name": "PRO DOCUMENT TOOLKIT",
        "app_desc": "Smart Document Processing: PDF to Word & OCR Layout Analysis",
        "menu_pdf": "📄 PDF to Word",
        "menu_ocr": "👁️ Doc Analysis (OCR-VL)",
        "lang_btn": "🇻🇳 Tiếng Việt",
        "pdf_head": "PDF Converter",
        "pdf_sub": "Preserve original layout, batch support.",
        "pdf_label": "Upload PDF files (multiple allowed)",
        "pdf_btn": "🚀 Convert",
        "pdf_success": "Conversion successful!",
        "ocr_head": "Document Analysis & OCR",
        "ocr_sub": "Uses PaddleOCRVL to extract text and structure (Markdown/JSON).",
        "ocr_label": "Upload Image (PNG, JPG, BMP)",
        "ocr_btn": "🔍 Analyze Layout",
        "ocr_rs_text": "Markdown Result",
        "ocr_rs_img": "Original Image",
        "err_nofile": "Please upload a file first!",
        "err_ocr": "OCR not available (check PaddleOCR installation).",
        "footer": "© 2024-2026 Developed by Chu Viet Kien. Powered by PaddleOCRVL & Gradio."
    }
}

# --- 3. PROCESSING LOGIC ---
def convert_pdfs_to_word(pdf_files, lang_code, progress=gr.Progress()):
    T = TRANS[lang_code]
    if not pdf_files:
        gr.Warning(T["err_nofile"])
        return None

    if not isinstance(pdf_files, list):
        pdf_files = [pdf_files]

    converted_files = []
    try:
        progress(0, desc="Starting...")
        for idx, pdf_file in enumerate(pdf_files):
            file_name = os.path.basename(pdf_file.name)
            progress((idx + 1) / len(pdf_files), desc=f"Processing: {file_name}")
            docx_name = os.path.splitext(file_name)[0] + ".docx"
            cv = Converter(pdf_file.name)
            cv.convert(docx_name)
            cv.close()
            converted_files.append(docx_name)

        gr.Info(f"✅ {T['pdf_success']}")

        if len(converted_files) == 1:
            return converted_files[0]
        else:
            zip_name = "Converted_Documents.zip"
            with zipfile.ZipFile(zip_name, 'w') as zf:
                for f in converted_files:
                    zf.write(f)
            return zip_name
    except Exception as e:
        gr.Error(f"Error: {str(e)}")
        return None

def run_ocr_func(image, lang_code):
    T = TRANS[lang_code]
    
    # 1. Validation
    if not HAS_OCR:
        gr.Error(T["err_ocr"])
        return None, None, None
    if image is None:
        gr.Warning(T["err_nofile"])
        return None, None, None

    # Setup temporary paths to handle file I/O for PaddleOCRVL
    session_id = uuid.uuid4().hex
    temp_dir = f"temp_ocr_{session_id}"
    os.makedirs(temp_dir, exist_ok=True)
    
    input_img_path = os.path.join(temp_dir, "input_image.png")
    output_save_path = os.path.join(temp_dir, "output")
    
    try:
        gr.Info("⏳ Initializing OCR-VL Pipeline...")
        
        # 2. Save Gradio Image (Numpy) to File (Required by Pipeline)
        # Convert RGB (Gradio) to BGR (OpenCV)
        img_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        cv2.imwrite(input_img_path, img_bgr)

        # 3. Run PaddleOCRVL Logic
        # See https://www.paddleocr.ai/latest/version3.x/pipeline_usage/PaddleOCR-VL.html
        output = ocr_pipeline.predict(input_img_path)

        markdown_text = ""
        
        # 4. Process Results
        for res in output:
            # res.print() # Optional: print to console logs
            res.save_to_json(save_path=output_save_path)
            res.save_to_markdown(save_path=output_save_path)

        # 5. Retrieve Generated Content
        # Find the generated markdown file (name varies based on library version)
        md_files = glob.glob(os.path.join(output_save_path, "*.md"))
        if md_files:
            with open(md_files[0], "r", encoding="utf-8") as f:
                markdown_text = f.read()
        else:
            markdown_text = "Analysis complete, but no markdown file was found."

        # 6. Package Results (Zip JSON & Markdown)
        zip_output_path = f"OCR_Result_{session_id}.zip"
        shutil.make_archive(zip_output_path.replace('.zip', ''), 'zip', output_save_path)

        # Return: Markdown text, Original Image (no drawing needed as VL focuses on structure), Zip file
        return markdown_text, image, zip_output_path

    except Exception as e:
        gr.Error(f"OCR Pipeline Error: {str(e)}")
        return None, None, None
    finally:
        # Cleanup temp directory (optional, keeping zip file)
        shutil.rmtree(temp_dir, ignore_errors=True)

# --- 4. UI HELPERS ---
def change_lang(lang):
    new = "en" if lang == "vi" else "vi"
    T = TRANS[new]
    return (
        new,                                # lang_state
        T["lang_btn"],                      # btn_lang
        gr.update(choices=[T["menu_pdf"], T["menu_ocr"]], value=T["menu_pdf"]), # radio_menu
        f"## {T['app_name']}",              # txt_title
        T["app_desc"],                      # txt_desc
        T["footer"],                        # txt_footer
        f"### {T['pdf_head']}",             # pdf_head_md
        T["pdf_sub"],                       # pdf_sub_md
        gr.update(label=T["pdf_label"]),    # in_pdf label
        T["pdf_btn"],                       # btn_pdf
        f"### {T['ocr_head']}",             # ocr_head_md
        T["ocr_sub"],                       # ocr_sub_md
        gr.update(label=T["ocr_label"]),    # in_img label
        T["ocr_btn"],                       # btn_ocr
        gr.update(label=T["ocr_rs_text"]),   # out_txt label
        gr.update(label=T["ocr_rs_img"])     # out_img_viz label
    )

def toggle_view(menu_val, lang):
    is_pdf = menu_val in [TRANS["vi"]["menu_pdf"], TRANS["en"]["menu_pdf"]]
    return gr.update(visible=is_pdf), gr.update(visible=not is_pdf)

# --- 5. THEME ---
theme = gr.themes.Soft(
    primary_hue="blue",
    neutral_hue="slate",
    font=[gr.themes.GoogleFont('Inter'), 'system-ui', 'sans-serif'],
).set(
    body_background_fill="#f8fafc",
    block_background_fill="white",
    block_border_width="1px",
    button_primary_background_fill="linear-gradient(90deg, #3b82f6, #2563eb)",
    button_primary_text_color="white",
)

# --- 6. INTERFACE ---
with gr.Blocks(title="Pro Document Toolkit") as demo:
    lang_state = gr.State("vi")

    with gr.Row():
        with gr.Column(scale=1, min_width=250):
            gr.Markdown("### 🛠️ Dashboard")
            btn_lang = gr.Button(TRANS["vi"]["lang_btn"], variant="secondary")
            radio_menu = gr.Radio(
                choices=[TRANS["vi"]["menu_pdf"], TRANS["vi"]["menu_ocr"]],
                value=TRANS["vi"]["menu_pdf"],
                label="Function / Chức năng",
                type="value"
            )
            gr.Markdown("---")
            gr.Markdown(f"**Status:** 🟢 Online | VL-OCR: {'Ready' if HAS_OCR else 'N/A'}")

        with gr.Column(scale=4):
            txt_title = gr.Markdown(f"## {TRANS['vi']['app_name']}")
            txt_desc = gr.Markdown(TRANS['vi']['app_desc'])

            # PDF Tool
            with gr.Group(visible=True) as group_pdf:
                pdf_head_md = gr.Markdown(f"### {TRANS['vi']['pdf_head']}")
                pdf_sub_md = gr.Markdown(TRANS['vi']['pdf_sub'])
                with gr.Row(equal_height=True):
                    with gr.Column():
                        in_pdf = gr.File(
                            label=TRANS["vi"]["pdf_label"],
                            file_types=[".pdf"],
                            file_count="multiple",
                            height=300
                        )
                        btn_pdf = gr.Button(TRANS["vi"]["pdf_btn"], variant="primary", size="lg")
                    with gr.Column():
                        gr.Markdown("### Output")
                        out_word = gr.File(label="Download DOCX / ZIP", height=250)

            # OCR Tool (Updated for VL)
            with gr.Group(visible=False) as group_ocr:
                ocr_head_md = gr.Markdown(f"### {TRANS['vi']['ocr_head']}")
                ocr_sub_md = gr.Markdown(TRANS['vi']['ocr_sub'])
                with gr.Row():
                    with gr.Column(scale=1):
                        in_img = gr.Image(
                            label=TRANS["vi"]["ocr_label"],
                            type="numpy",
                            height=450,
                            sources=["upload", "clipboard"]
                        )
                        btn_ocr = gr.Button(TRANS["vi"]["ocr_btn"], variant="primary", size="lg")
                    with gr.Column(scale=1):
                        with gr.Tabs():
                            with gr.Tab("Markdown"):
                                out_txt = gr.Textbox(
                                    label=TRANS["vi"]["ocr_rs_text"],
                                    lines=20,
                                    placeholder="Markdown content will appear here...",
                                    buttons=["copy"]
                                )
                                out_file = gr.File(label="Download Full Results (.zip)")
                            with gr.Tab("Source Image"):
                                out_img_viz = gr.Image(
                                    label=TRANS["vi"]["ocr_rs_img"],
                                    interactive=False
                                )

            txt_footer = gr.Markdown(TRANS['vi']['footer'])

    # EVENTS
    radio_menu.change(toggle_view, inputs=[radio_menu, lang_state], outputs=[group_pdf, group_ocr])
    btn_pdf.click(convert_pdfs_to_word, inputs=[in_pdf, lang_state], outputs=out_word)
    btn_ocr.click(run_ocr_func, inputs=[in_img, lang_state], outputs=[out_txt, out_img_viz, out_file])
    btn_lang.click(
        change_lang,
        inputs=[lang_state],
        outputs=[
            lang_state, btn_lang, radio_menu,
            txt_title, txt_desc, txt_footer,
            pdf_head_md, pdf_sub_md, in_pdf, btn_pdf,
            ocr_head_md, ocr_sub_md, in_img, btn_ocr,
            out_txt, out_img_viz
        ]
    )

if __name__ == "__main__":
    demo.queue().launch(
        server_name="0.0.0.0",
        server_port=7860,
        theme=theme,
    )