Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from pdf2docx import Converter | |
| import os | |
| import zipfile | |
| import cv2 | |
| import numpy as np | |
| import shutil | |
| import uuid | |
| import glob | |
| import fitz | |
| import logging | |
| from docx import Document | |
| # --- 1. SYSTEM CONFIGURATION --- | |
| os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True" | |
| HAS_OCR = False | |
| ocr_pipeline = None | |
| try: | |
| # UPDATED: Using PaddleOCRVL as requested | |
| # Note: Import AFTER renaming this file to avoid circular import | |
| from paddleocr import PaddleOCRVL | |
| # Initialize the specific pipeline version | |
| ocr_pipeline = PaddleOCRVL(pipeline_version="v1.5") | |
| HAS_OCR = True | |
| print("[OK] PaddleOCRVL initialized successfully.") | |
| except Exception as e: | |
| print(f"[WARN] Could not initialize PaddleOCRVL. Error: {e}") | |
| print("Ensure you have installed: pip install paddlepaddle paddleocr") | |
| # --- 2. LANGUAGE DICTIONARY --- | |
| TRANS = { | |
| "vi": { | |
| "app_name": "PRO DOCUMENT TOOLKIT", | |
| "app_desc": "Xử lý tài liệu thông minh: Chuyển PDF → Word & Phân tích bố cục OCR", | |
| "menu_pdf": "📄 PDF sang Word", | |
| "menu_ocr": "👁️ Phân tích tài liệu (OCR-VL)", | |
| "lang_btn": "🇬🇧 English", | |
| "pdf_head": "Chuyển đổi PDF", | |
| "pdf_sub": "Giữ định dạng gốc, hỗ trợ nhiều file cùng lúc.", | |
| "pdf_label": "Tải lên file PDF (có thể nhiều file)", | |
| "pdf_btn": "🚀 Chuyển đổi", | |
| "pdf_success": "Chuyển đổi thành công!", | |
| "ocr_head": "Phân tích tài liệu & OCR", | |
| "ocr_sub": "Sử dụng PaddleOCRVL để nhận diện văn bản và cấu trúc (Markdown/JSON).", | |
| "ocr_label": "Tải ảnh lên (PNG, JPG, BMP)", | |
| "ocr_btn": "🔍 Phân tích Bố cục", | |
| "ocr_rs_text": "Kết quả Markdown", | |
| "ocr_rs_img": "Hình ảnh gốc", | |
| "err_nofile": "Vui lòng tải file lên!", | |
| "err_ocr": "OCR chưa sẵn sàng (kiểm tra cài đặt PaddleOCR).", | |
| "footer": "© 2024-2026 Developed by Chu Viet Kien. Powered by PaddleOCRVL & Gradio." | |
| }, | |
| "en": { | |
| "app_name": "PRO DOCUMENT TOOLKIT", | |
| "app_desc": "Smart Document Processing: PDF to Word & OCR Layout Analysis", | |
| "menu_pdf": "📄 PDF to Word", | |
| "menu_ocr": "👁️ Doc Analysis (OCR-VL)", | |
| "lang_btn": "🇻🇳 Tiếng Việt", | |
| "pdf_head": "PDF Converter", | |
| "pdf_sub": "Preserve original layout, batch support.", | |
| "pdf_label": "Upload PDF files (multiple allowed)", | |
| "pdf_btn": "🚀 Convert", | |
| "pdf_success": "Conversion successful!", | |
| "ocr_head": "Document Analysis & OCR", | |
| "ocr_sub": "Uses PaddleOCRVL to extract text and structure (Markdown/JSON).", | |
| "ocr_label": "Upload Image (PNG, JPG, BMP)", | |
| "ocr_btn": "🔍 Analyze Layout", | |
| "ocr_rs_text": "Markdown Result", | |
| "ocr_rs_img": "Original Image", | |
| "err_nofile": "Please upload a file first!", | |
| "err_ocr": "OCR not available (check PaddleOCR installation).", | |
| "footer": "© 2024-2026 Developed by Chu Viet Kien. Powered by PaddleOCRVL & Gradio." | |
| } | |
| } | |
| # --- 3. PROCESSING LOGIC --- | |
| def _is_scanned_pdf(pdf_path): | |
| """Check if a PDF has no extractable text layer (i.e. scanned/image-only).""" | |
| try: | |
| doc = fitz.open(pdf_path) | |
| has_text = any(page.get_text().strip() for page in doc) | |
| doc.close() | |
| return not has_text | |
| except Exception: | |
| return False | |
| def _docx_is_empty(docx_path): | |
| """Check if a DOCX has no meaningful text.""" | |
| try: | |
| doc = Document(docx_path) | |
| for para in doc.paragraphs: | |
| if para.text.strip(): | |
| return False | |
| for table in doc.tables: | |
| for row in table.rows: | |
| for cell in row.cells: | |
| if cell.text.strip(): | |
| return False | |
| return True | |
| except Exception: | |
| return True | |
| def _ocr_pdf_to_docx(pdf_path, docx_path): | |
| """Fallback: convert a scanned PDF to DOCX using PaddleOCRVL.""" | |
| pdf_doc = fitz.open(pdf_path) | |
| word_doc = Document() | |
| temp_base = f"temp_ocr_pdf_{uuid.uuid4().hex}" | |
| os.makedirs(temp_base, exist_ok=True) | |
| try: | |
| for i, page in enumerate(pdf_doc): | |
| # Render page to image (~288 DPI) | |
| mat = fitz.Matrix(2, 2) | |
| pix = page.get_pixmap(matrix=mat) | |
| img_path = os.path.join(temp_base, f"page_{i}.png") | |
| pix.save(img_path) | |
| # Run PaddleOCRVL | |
| output = ocr_pipeline.predict(img_path) | |
| page_out_dir = os.path.join(temp_base, f"out_{i}") | |
| os.makedirs(page_out_dir, exist_ok=True) | |
| for res in output: | |
| res.save_to_markdown(save_path=page_out_dir) | |
| md_files = glob.glob(os.path.join(page_out_dir, "*.md")) | |
| if md_files: | |
| with open(md_files[0], "r", encoding="utf-8") as f: | |
| page_md = f.read().strip() | |
| if page_md: | |
| for line in page_md.splitlines(): | |
| stripped = line.strip() | |
| if stripped: | |
| word_doc.add_paragraph(stripped) | |
| if i < len(pdf_doc) - 1: | |
| word_doc.add_page_break() | |
| word_doc.save(docx_path) | |
| finally: | |
| pdf_doc.close() | |
| shutil.rmtree(temp_base, ignore_errors=True) | |
| def convert_pdfs_to_word(pdf_files, lang_code, progress=gr.Progress()): | |
| T = TRANS[lang_code] | |
| if not pdf_files: | |
| gr.Warning(T["err_nofile"]) | |
| return None | |
| if not isinstance(pdf_files, list): | |
| pdf_files = [pdf_files] | |
| converted_files = [] | |
| try: | |
| progress(0, desc="Starting...") | |
| for idx, pdf_file in enumerate(pdf_files): | |
| file_name = os.path.basename(pdf_file.name) | |
| docx_name = os.path.splitext(file_name)[0] + ".docx" | |
| # Route 1: obvious scanned PDF -> OCR directly | |
| if _is_scanned_pdf(pdf_file.name) and HAS_OCR: | |
| progress((idx + 1) / len(pdf_files), desc=f"OCR: {file_name}") | |
| _ocr_pdf_to_docx(pdf_file.name, docx_name) | |
| else: | |
| progress((idx + 1) / len(pdf_files), desc=f"Converting: {file_name}") | |
| # Temporarily bump root logger to ERROR so pdf2docx info/warning spam is hidden | |
| _root_logger = logging.getLogger() | |
| _old_level = _root_logger.level | |
| _root_logger.setLevel(logging.ERROR) | |
| try: | |
| cv = Converter(pdf_file.name) | |
| cv.convert(docx_name) | |
| cv.close() | |
| finally: | |
| _root_logger.setLevel(_old_level) | |
| # Route 2: pdf2docx produced empty docx -> OCR fallback | |
| if HAS_OCR and _docx_is_empty(docx_name): | |
| os.remove(docx_name) | |
| _ocr_pdf_to_docx(pdf_file.name, docx_name) | |
| converted_files.append(docx_name) | |
| gr.Info(f"✅ {T['pdf_success']}") | |
| if len(converted_files) == 1: | |
| return converted_files[0] | |
| else: | |
| zip_name = "Converted_Documents.zip" | |
| with zipfile.ZipFile(zip_name, 'w') as zf: | |
| for f in converted_files: | |
| zf.write(f) | |
| return zip_name | |
| except Exception as e: | |
| gr.Error(f"Error: {str(e)}") | |
| return None | |
| def run_ocr_func(image, lang_code): | |
| T = TRANS[lang_code] | |
| # 1. Validation | |
| if not HAS_OCR: | |
| gr.Error(T["err_ocr"]) | |
| return None, None, None | |
| if image is None: | |
| gr.Warning(T["err_nofile"]) | |
| return None, None, None | |
| # Setup temporary paths to handle file I/O for PaddleOCRVL | |
| session_id = uuid.uuid4().hex | |
| temp_dir = f"temp_ocr_{session_id}" | |
| os.makedirs(temp_dir, exist_ok=True) | |
| input_img_path = os.path.join(temp_dir, "input_image.png") | |
| output_save_path = os.path.join(temp_dir, "output") | |
| try: | |
| gr.Info("⏳ Initializing OCR-VL Pipeline...") | |
| # 2. Save Gradio Image (Numpy) to File (Required by Pipeline) | |
| # Convert RGB (Gradio) to BGR (OpenCV) | |
| img_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) | |
| cv2.imwrite(input_img_path, img_bgr) | |
| # 3. Run PaddleOCRVL Logic | |
| # See https://www.paddleocr.ai/latest/version3.x/pipeline_usage/PaddleOCR-VL.html | |
| output = ocr_pipeline.predict(input_img_path) | |
| markdown_text = "" | |
| # 4. Process Results | |
| for res in output: | |
| # res.print() # Optional: print to console logs | |
| res.save_to_json(save_path=output_save_path) | |
| res.save_to_markdown(save_path=output_save_path) | |
| # 5. Retrieve Generated Content | |
| # Find the generated markdown file (name varies based on library version) | |
| md_files = glob.glob(os.path.join(output_save_path, "*.md")) | |
| if md_files: | |
| with open(md_files[0], "r", encoding="utf-8") as f: | |
| markdown_text = f.read() | |
| else: | |
| markdown_text = "Analysis complete, but no markdown file was found." | |
| # 6. Package Results (Zip JSON & Markdown) | |
| zip_output_path = f"OCR_Result_{session_id}.zip" | |
| shutil.make_archive(zip_output_path.replace('.zip', ''), 'zip', output_save_path) | |
| # Return: Markdown text, Original Image (no drawing needed as VL focuses on structure), Zip file | |
| return markdown_text, image, zip_output_path | |
| except Exception as e: | |
| gr.Error(f"OCR Pipeline Error: {str(e)}") | |
| return None, None, None | |
| finally: | |
| # Cleanup temp directory (optional, keeping zip file) | |
| shutil.rmtree(temp_dir, ignore_errors=True) | |
| # --- 4. UI HELPERS --- | |
| def change_lang(lang): | |
| new = "en" if lang == "vi" else "vi" | |
| T = TRANS[new] | |
| return ( | |
| new, # lang_state | |
| T["lang_btn"], # btn_lang | |
| gr.update(choices=[T["menu_pdf"], T["menu_ocr"]], value=T["menu_pdf"]), # radio_menu | |
| f"## {T['app_name']}", # txt_title | |
| T["app_desc"], # txt_desc | |
| T["footer"], # txt_footer | |
| f"### {T['pdf_head']}", # pdf_head_md | |
| T["pdf_sub"], # pdf_sub_md | |
| gr.update(label=T["pdf_label"]), # in_pdf label | |
| T["pdf_btn"], # btn_pdf | |
| f"### {T['ocr_head']}", # ocr_head_md | |
| T["ocr_sub"], # ocr_sub_md | |
| gr.update(label=T["ocr_label"]), # in_img label | |
| T["ocr_btn"], # btn_ocr | |
| gr.update(label=T["ocr_rs_text"]), # out_txt label | |
| gr.update(label=T["ocr_rs_img"]) # out_img_viz label | |
| ) | |
| def toggle_view(menu_val, lang): | |
| is_pdf = menu_val in [TRANS["vi"]["menu_pdf"], TRANS["en"]["menu_pdf"]] | |
| return gr.update(visible=is_pdf), gr.update(visible=not is_pdf) | |
| # --- 5. THEME --- | |
| theme = gr.themes.Soft( | |
| primary_hue="blue", | |
| neutral_hue="slate", | |
| font=[gr.themes.GoogleFont('Inter'), 'system-ui', 'sans-serif'], | |
| ).set( | |
| body_background_fill="#f8fafc", | |
| block_background_fill="white", | |
| block_border_width="1px", | |
| button_primary_background_fill="linear-gradient(90deg, #3b82f6, #2563eb)", | |
| button_primary_text_color="white", | |
| ) | |
| # --- 6. INTERFACE --- | |
| with gr.Blocks(title="Pro Document Toolkit") as demo: | |
| lang_state = gr.State("vi") | |
| with gr.Row(): | |
| with gr.Column(scale=1, min_width=250): | |
| gr.Markdown("### 🛠️ Dashboard") | |
| btn_lang = gr.Button(TRANS["vi"]["lang_btn"], variant="secondary") | |
| radio_menu = gr.Radio( | |
| choices=[TRANS["vi"]["menu_pdf"], TRANS["vi"]["menu_ocr"]], | |
| value=TRANS["vi"]["menu_pdf"], | |
| label="Function / Chức năng", | |
| type="value" | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown(f"**Status:** 🟢 Online | VL-OCR: {'Ready' if HAS_OCR else 'N/A'}") | |
| with gr.Column(scale=4): | |
| txt_title = gr.Markdown(f"## {TRANS['vi']['app_name']}") | |
| txt_desc = gr.Markdown(TRANS['vi']['app_desc']) | |
| # PDF Tool | |
| with gr.Group(visible=True) as group_pdf: | |
| pdf_head_md = gr.Markdown(f"### {TRANS['vi']['pdf_head']}") | |
| pdf_sub_md = gr.Markdown(TRANS['vi']['pdf_sub']) | |
| with gr.Row(equal_height=True): | |
| with gr.Column(): | |
| in_pdf = gr.File( | |
| label=TRANS["vi"]["pdf_label"], | |
| file_types=[".pdf"], | |
| file_count="multiple", | |
| height=300 | |
| ) | |
| btn_pdf = gr.Button(TRANS["vi"]["pdf_btn"], variant="primary", size="lg") | |
| with gr.Column(): | |
| gr.Markdown("### Output") | |
| out_word = gr.File(label="Download DOCX / ZIP", height=250) | |
| # OCR Tool (Updated for VL) | |
| with gr.Group(visible=False) as group_ocr: | |
| ocr_head_md = gr.Markdown(f"### {TRANS['vi']['ocr_head']}") | |
| ocr_sub_md = gr.Markdown(TRANS['vi']['ocr_sub']) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| in_img = gr.Image( | |
| label=TRANS["vi"]["ocr_label"], | |
| type="numpy", | |
| height=450, | |
| sources=["upload", "clipboard"] | |
| ) | |
| btn_ocr = gr.Button(TRANS["vi"]["ocr_btn"], variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| with gr.Tabs(): | |
| with gr.Tab("Markdown"): | |
| out_txt = gr.Textbox( | |
| label=TRANS["vi"]["ocr_rs_text"], | |
| lines=20, | |
| placeholder="Markdown content will appear here...", | |
| buttons=["copy"] | |
| ) | |
| out_file = gr.File(label="Download Full Results (.zip)") | |
| with gr.Tab("Source Image"): | |
| out_img_viz = gr.Image( | |
| label=TRANS["vi"]["ocr_rs_img"], | |
| interactive=False | |
| ) | |
| txt_footer = gr.Markdown(TRANS['vi']['footer']) | |
| # EVENTS | |
| radio_menu.change(toggle_view, inputs=[radio_menu, lang_state], outputs=[group_pdf, group_ocr]) | |
| btn_pdf.click(convert_pdfs_to_word, inputs=[in_pdf, lang_state], outputs=out_word) | |
| btn_ocr.click(run_ocr_func, inputs=[in_img, lang_state], outputs=[out_txt, out_img_viz, out_file]) | |
| btn_lang.click( | |
| change_lang, | |
| inputs=[lang_state], | |
| outputs=[ | |
| lang_state, btn_lang, radio_menu, | |
| txt_title, txt_desc, txt_footer, | |
| pdf_head_md, pdf_sub_md, in_pdf, btn_pdf, | |
| ocr_head_md, ocr_sub_md, in_img, btn_ocr, | |
| out_txt, out_img_viz | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue().launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| theme=theme, | |
| ) | |
| # --- 1. SYSTEM CONFIGURATION --- | |
| os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True" | |
| HAS_OCR = False | |
| ocr_pipeline = None | |
| try: | |
| # UPDATED: Using PaddleOCRVL as requested | |
| from paddleocr import PaddleOCRVL | |
| # Initialize the specific pipeline version | |
| ocr_pipeline = PaddleOCRVL(pipeline_version="PaddlePaddle/v1.5") | |
| HAS_OCR = True | |
| print("✅ PaddleOCRVL initialized successfully.") | |
| except Exception as e: | |
| print(f"⚠️ Warning: Could not initialize PaddleOCRVL. Error: {e}") | |
| print("Ensure you have installed: pip install paddlepaddle paddleocr") | |
| # --- 2. LANGUAGE DICTIONARY --- | |
| TRANS = { | |
| "vi": { | |
| "app_name": "PRO DOCUMENT TOOLKIT", | |
| "app_desc": "Xử lý tài liệu thông minh: Chuyển PDF → Word & Phân tích bố cục OCR", | |
| "menu_pdf": "📄 PDF sang Word", | |
| "menu_ocr": "👁️ Phân tích tài liệu (OCR-VL)", | |
| "lang_btn": "🇬🇧 English", | |
| "pdf_head": "Chuyển đổi PDF", | |
| "pdf_sub": "Giữ định dạng gốc, hỗ trợ nhiều file cùng lúc.", | |
| "pdf_label": "Tải lên file PDF (có thể nhiều file)", | |
| "pdf_btn": "🚀 Chuyển đổi", | |
| "pdf_success": "Chuyển đổi thành công!", | |
| "ocr_head": "Phân tích tài liệu & OCR", | |
| "ocr_sub": "Sử dụng PaddleOCRVL để nhận diện văn bản và cấu trúc (Markdown/JSON).", | |
| "ocr_label": "Tải ảnh lên (PNG, JPG, BMP)", | |
| "ocr_btn": "🔍 Phân tích Bố cục", | |
| "ocr_rs_text": "Kết quả Markdown", | |
| "ocr_rs_img": "Hình ảnh gốc", | |
| "err_nofile": "Vui lòng tải file lên!", | |
| "err_ocr": "OCR chưa sẵn sàng (kiểm tra cài đặt PaddleOCR).", | |
| "footer": "© 2024-2026 Developed by Chu Viet Kien. Powered by PaddleOCRVL & Gradio." | |
| }, | |
| "en": { | |
| "app_name": "PRO DOCUMENT TOOLKIT", | |
| "app_desc": "Smart Document Processing: PDF to Word & OCR Layout Analysis", | |
| "menu_pdf": "📄 PDF to Word", | |
| "menu_ocr": "👁️ Doc Analysis (OCR-VL)", | |
| "lang_btn": "🇻🇳 Tiếng Việt", | |
| "pdf_head": "PDF Converter", | |
| "pdf_sub": "Preserve original layout, batch support.", | |
| "pdf_label": "Upload PDF files (multiple allowed)", | |
| "pdf_btn": "🚀 Convert", | |
| "pdf_success": "Conversion successful!", | |
| "ocr_head": "Document Analysis & OCR", | |
| "ocr_sub": "Uses PaddleOCRVL to extract text and structure (Markdown/JSON).", | |
| "ocr_label": "Upload Image (PNG, JPG, BMP)", | |
| "ocr_btn": "🔍 Analyze Layout", | |
| "ocr_rs_text": "Markdown Result", | |
| "ocr_rs_img": "Original Image", | |
| "err_nofile": "Please upload a file first!", | |
| "err_ocr": "OCR not available (check PaddleOCR installation).", | |
| "footer": "© 2024-2026 Developed by Chu Viet Kien. Powered by PaddleOCRVL & Gradio." | |
| } | |
| } | |
| # --- 3. PROCESSING LOGIC --- | |
| def convert_pdfs_to_word(pdf_files, lang_code, progress=gr.Progress()): | |
| T = TRANS[lang_code] | |
| if not pdf_files: | |
| gr.Warning(T["err_nofile"]) | |
| return None | |
| if not isinstance(pdf_files, list): | |
| pdf_files = [pdf_files] | |
| converted_files = [] | |
| try: | |
| progress(0, desc="Starting...") | |
| for idx, pdf_file in enumerate(pdf_files): | |
| file_name = os.path.basename(pdf_file.name) | |
| progress((idx + 1) / len(pdf_files), desc=f"Processing: {file_name}") | |
| docx_name = os.path.splitext(file_name)[0] + ".docx" | |
| cv = Converter(pdf_file.name) | |
| cv.convert(docx_name) | |
| cv.close() | |
| converted_files.append(docx_name) | |
| gr.Info(f"✅ {T['pdf_success']}") | |
| if len(converted_files) == 1: | |
| return converted_files[0] | |
| else: | |
| zip_name = "Converted_Documents.zip" | |
| with zipfile.ZipFile(zip_name, 'w') as zf: | |
| for f in converted_files: | |
| zf.write(f) | |
| return zip_name | |
| except Exception as e: | |
| gr.Error(f"Error: {str(e)}") | |
| return None | |
| def run_ocr_func(image, lang_code): | |
| T = TRANS[lang_code] | |
| # 1. Validation | |
| if not HAS_OCR: | |
| gr.Error(T["err_ocr"]) | |
| return None, None, None | |
| if image is None: | |
| gr.Warning(T["err_nofile"]) | |
| return None, None, None | |
| # Setup temporary paths to handle file I/O for PaddleOCRVL | |
| session_id = uuid.uuid4().hex | |
| temp_dir = f"temp_ocr_{session_id}" | |
| os.makedirs(temp_dir, exist_ok=True) | |
| input_img_path = os.path.join(temp_dir, "input_image.png") | |
| output_save_path = os.path.join(temp_dir, "output") | |
| try: | |
| gr.Info("⏳ Initializing OCR-VL Pipeline...") | |
| # 2. Save Gradio Image (Numpy) to File (Required by Pipeline) | |
| # Convert RGB (Gradio) to BGR (OpenCV) | |
| img_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) | |
| cv2.imwrite(input_img_path, img_bgr) | |
| # 3. Run PaddleOCRVL Logic | |
| # See https://www.paddleocr.ai/latest/version3.x/pipeline_usage/PaddleOCR-VL.html | |
| output = ocr_pipeline.predict(input_img_path) | |
| markdown_text = "" | |
| # 4. Process Results | |
| for res in output: | |
| # res.print() # Optional: print to console logs | |
| res.save_to_json(save_path=output_save_path) | |
| res.save_to_markdown(save_path=output_save_path) | |
| # 5. Retrieve Generated Content | |
| # Find the generated markdown file (name varies based on library version) | |
| md_files = glob.glob(os.path.join(output_save_path, "*.md")) | |
| if md_files: | |
| with open(md_files[0], "r", encoding="utf-8") as f: | |
| markdown_text = f.read() | |
| else: | |
| markdown_text = "Analysis complete, but no markdown file was found." | |
| # 6. Package Results (Zip JSON & Markdown) | |
| zip_output_path = f"OCR_Result_{session_id}.zip" | |
| shutil.make_archive(zip_output_path.replace('.zip', ''), 'zip', output_save_path) | |
| # Return: Markdown text, Original Image (no drawing needed as VL focuses on structure), Zip file | |
| return markdown_text, image, zip_output_path | |
| except Exception as e: | |
| gr.Error(f"OCR Pipeline Error: {str(e)}") | |
| return None, None, None | |
| finally: | |
| # Cleanup temp directory (optional, keeping zip file) | |
| shutil.rmtree(temp_dir, ignore_errors=True) | |
| # --- 4. UI HELPERS --- | |
| def change_lang(lang): | |
| new = "en" if lang == "vi" else "vi" | |
| T = TRANS[new] | |
| return ( | |
| new, # lang_state | |
| T["lang_btn"], # btn_lang | |
| gr.update(choices=[T["menu_pdf"], T["menu_ocr"]], value=T["menu_pdf"]), # radio_menu | |
| f"## {T['app_name']}", # txt_title | |
| T["app_desc"], # txt_desc | |
| T["footer"], # txt_footer | |
| f"### {T['pdf_head']}", # pdf_head_md | |
| T["pdf_sub"], # pdf_sub_md | |
| gr.update(label=T["pdf_label"]), # in_pdf label | |
| T["pdf_btn"], # btn_pdf | |
| f"### {T['ocr_head']}", # ocr_head_md | |
| T["ocr_sub"], # ocr_sub_md | |
| gr.update(label=T["ocr_label"]), # in_img label | |
| T["ocr_btn"], # btn_ocr | |
| gr.update(label=T["ocr_rs_text"]), # out_txt label | |
| gr.update(label=T["ocr_rs_img"]) # out_img_viz label | |
| ) | |
| def toggle_view(menu_val, lang): | |
| is_pdf = menu_val in [TRANS["vi"]["menu_pdf"], TRANS["en"]["menu_pdf"]] | |
| return gr.update(visible=is_pdf), gr.update(visible=not is_pdf) | |
| # --- 5. THEME --- | |
| theme = gr.themes.Soft( | |
| primary_hue="blue", | |
| neutral_hue="slate", | |
| font=[gr.themes.GoogleFont('Inter'), 'system-ui', 'sans-serif'], | |
| ).set( | |
| body_background_fill="#f8fafc", | |
| block_background_fill="white", | |
| block_border_width="1px", | |
| button_primary_background_fill="linear-gradient(90deg, #3b82f6, #2563eb)", | |
| button_primary_text_color="white", | |
| ) | |
| # --- 6. INTERFACE --- | |
| with gr.Blocks(title="Pro Document Toolkit") as demo: | |
| lang_state = gr.State("vi") | |
| with gr.Row(): | |
| with gr.Column(scale=1, min_width=250): | |
| gr.Markdown("### 🛠️ Dashboard") | |
| btn_lang = gr.Button(TRANS["vi"]["lang_btn"], variant="secondary") | |
| radio_menu = gr.Radio( | |
| choices=[TRANS["vi"]["menu_pdf"], TRANS["vi"]["menu_ocr"]], | |
| value=TRANS["vi"]["menu_pdf"], | |
| label="Function / Chức năng", | |
| type="value" | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown(f"**Status:** 🟢 Online | VL-OCR: {'Ready' if HAS_OCR else 'N/A'}") | |
| with gr.Column(scale=4): | |
| txt_title = gr.Markdown(f"## {TRANS['vi']['app_name']}") | |
| txt_desc = gr.Markdown(TRANS['vi']['app_desc']) | |
| # PDF Tool | |
| with gr.Group(visible=True) as group_pdf: | |
| pdf_head_md = gr.Markdown(f"### {TRANS['vi']['pdf_head']}") | |
| pdf_sub_md = gr.Markdown(TRANS['vi']['pdf_sub']) | |
| with gr.Row(equal_height=True): | |
| with gr.Column(): | |
| in_pdf = gr.File( | |
| label=TRANS["vi"]["pdf_label"], | |
| file_types=[".pdf"], | |
| file_count="multiple", | |
| height=300 | |
| ) | |
| btn_pdf = gr.Button(TRANS["vi"]["pdf_btn"], variant="primary", size="lg") | |
| with gr.Column(): | |
| gr.Markdown("### Output") | |
| out_word = gr.File(label="Download DOCX / ZIP", height=250) | |
| # OCR Tool (Updated for VL) | |
| with gr.Group(visible=False) as group_ocr: | |
| ocr_head_md = gr.Markdown(f"### {TRANS['vi']['ocr_head']}") | |
| ocr_sub_md = gr.Markdown(TRANS['vi']['ocr_sub']) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| in_img = gr.Image( | |
| label=TRANS["vi"]["ocr_label"], | |
| type="numpy", | |
| height=450, | |
| sources=["upload", "clipboard"] | |
| ) | |
| btn_ocr = gr.Button(TRANS["vi"]["ocr_btn"], variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| with gr.Tabs(): | |
| with gr.Tab("Markdown"): | |
| out_txt = gr.Textbox( | |
| label=TRANS["vi"]["ocr_rs_text"], | |
| lines=20, | |
| placeholder="Markdown content will appear here...", | |
| buttons=["copy"] | |
| ) | |
| out_file = gr.File(label="Download Full Results (.zip)") | |
| with gr.Tab("Source Image"): | |
| out_img_viz = gr.Image( | |
| label=TRANS["vi"]["ocr_rs_img"], | |
| interactive=False | |
| ) | |
| txt_footer = gr.Markdown(TRANS['vi']['footer']) | |
| # EVENTS | |
| radio_menu.change(toggle_view, inputs=[radio_menu, lang_state], outputs=[group_pdf, group_ocr]) | |
| btn_pdf.click(convert_pdfs_to_word, inputs=[in_pdf, lang_state], outputs=out_word) | |
| btn_ocr.click(run_ocr_func, inputs=[in_img, lang_state], outputs=[out_txt, out_img_viz, out_file]) | |
| btn_lang.click( | |
| change_lang, | |
| inputs=[lang_state], | |
| outputs=[ | |
| lang_state, btn_lang, radio_menu, | |
| txt_title, txt_desc, txt_footer, | |
| pdf_head_md, pdf_sub_md, in_pdf, btn_pdf, | |
| ocr_head_md, ocr_sub_md, in_img, btn_ocr, | |
| out_txt, out_img_viz | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue().launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| theme=theme, | |
| ) |