import gradio as gr from pdf2docx import Converter import os import zipfile import cv2 import numpy as np import shutil import uuid import glob import fitz import logging from docx import Document # --- 1. SYSTEM CONFIGURATION --- os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True" HAS_OCR = False ocr_pipeline = None try: # UPDATED: Using PaddleOCRVL as requested # Note: Import AFTER renaming this file to avoid circular import from paddleocr import PaddleOCRVL # Initialize the specific pipeline version ocr_pipeline = PaddleOCRVL(pipeline_version="v1.5") HAS_OCR = True print("[OK] PaddleOCRVL initialized successfully.") except Exception as e: print(f"[WARN] Could not initialize PaddleOCRVL. Error: {e}") print("Ensure you have installed: pip install paddlepaddle paddleocr") # --- 2. LANGUAGE DICTIONARY --- TRANS = { "vi": { "app_name": "PRO DOCUMENT TOOLKIT", "app_desc": "Xử lý tài liệu thông minh: Chuyển PDF → Word & Phân tích bố cục OCR", "menu_pdf": "📄 PDF sang Word", "menu_ocr": "👁️ Phân tích tài liệu (OCR-VL)", "lang_btn": "🇬🇧 English", "pdf_head": "Chuyển đổi PDF", "pdf_sub": "Giữ định dạng gốc, hỗ trợ nhiều file cùng lúc.", "pdf_label": "Tải lên file PDF (có thể nhiều file)", "pdf_btn": "🚀 Chuyển đổi", "pdf_success": "Chuyển đổi thành công!", "ocr_head": "Phân tích tài liệu & OCR", "ocr_sub": "Sử dụng PaddleOCRVL để nhận diện văn bản và cấu trúc (Markdown/JSON).", "ocr_label": "Tải ảnh lên (PNG, JPG, BMP)", "ocr_btn": "🔍 Phân tích Bố cục", "ocr_rs_text": "Kết quả Markdown", "ocr_rs_img": "Hình ảnh gốc", "err_nofile": "Vui lòng tải file lên!", "err_ocr": "OCR chưa sẵn sàng (kiểm tra cài đặt PaddleOCR).", "footer": "© 2024-2026 Developed by Chu Viet Kien. Powered by PaddleOCRVL & Gradio." }, "en": { "app_name": "PRO DOCUMENT TOOLKIT", "app_desc": "Smart Document Processing: PDF to Word & OCR Layout Analysis", "menu_pdf": "📄 PDF to Word", "menu_ocr": "👁️ Doc Analysis (OCR-VL)", "lang_btn": "🇻🇳 Tiếng Việt", "pdf_head": "PDF Converter", "pdf_sub": "Preserve original layout, batch support.", "pdf_label": "Upload PDF files (multiple allowed)", "pdf_btn": "🚀 Convert", "pdf_success": "Conversion successful!", "ocr_head": "Document Analysis & OCR", "ocr_sub": "Uses PaddleOCRVL to extract text and structure (Markdown/JSON).", "ocr_label": "Upload Image (PNG, JPG, BMP)", "ocr_btn": "🔍 Analyze Layout", "ocr_rs_text": "Markdown Result", "ocr_rs_img": "Original Image", "err_nofile": "Please upload a file first!", "err_ocr": "OCR not available (check PaddleOCR installation).", "footer": "© 2024-2026 Developed by Chu Viet Kien. Powered by PaddleOCRVL & Gradio." } } # --- 3. PROCESSING LOGIC --- def _is_scanned_pdf(pdf_path): """Check if a PDF has no extractable text layer (i.e. scanned/image-only).""" try: doc = fitz.open(pdf_path) has_text = any(page.get_text().strip() for page in doc) doc.close() return not has_text except Exception: return False def _docx_is_empty(docx_path): """Check if a DOCX has no meaningful text.""" try: doc = Document(docx_path) for para in doc.paragraphs: if para.text.strip(): return False for table in doc.tables: for row in table.rows: for cell in row.cells: if cell.text.strip(): return False return True except Exception: return True def _ocr_pdf_to_docx(pdf_path, docx_path): """Fallback: convert a scanned PDF to DOCX using PaddleOCRVL.""" pdf_doc = fitz.open(pdf_path) word_doc = Document() temp_base = f"temp_ocr_pdf_{uuid.uuid4().hex}" os.makedirs(temp_base, exist_ok=True) try: for i, page in enumerate(pdf_doc): # Render page to image (~288 DPI) mat = fitz.Matrix(2, 2) pix = page.get_pixmap(matrix=mat) img_path = os.path.join(temp_base, f"page_{i}.png") pix.save(img_path) # Run PaddleOCRVL output = ocr_pipeline.predict(img_path) page_out_dir = os.path.join(temp_base, f"out_{i}") os.makedirs(page_out_dir, exist_ok=True) for res in output: res.save_to_markdown(save_path=page_out_dir) md_files = glob.glob(os.path.join(page_out_dir, "*.md")) if md_files: with open(md_files[0], "r", encoding="utf-8") as f: page_md = f.read().strip() if page_md: for line in page_md.splitlines(): stripped = line.strip() if stripped: word_doc.add_paragraph(stripped) if i < len(pdf_doc) - 1: word_doc.add_page_break() word_doc.save(docx_path) finally: pdf_doc.close() shutil.rmtree(temp_base, ignore_errors=True) def convert_pdfs_to_word(pdf_files, lang_code, progress=gr.Progress()): T = TRANS[lang_code] if not pdf_files: gr.Warning(T["err_nofile"]) return None if not isinstance(pdf_files, list): pdf_files = [pdf_files] converted_files = [] try: progress(0, desc="Starting...") for idx, pdf_file in enumerate(pdf_files): file_name = os.path.basename(pdf_file.name) docx_name = os.path.splitext(file_name)[0] + ".docx" # Route 1: obvious scanned PDF -> OCR directly if _is_scanned_pdf(pdf_file.name) and HAS_OCR: progress((idx + 1) / len(pdf_files), desc=f"OCR: {file_name}") _ocr_pdf_to_docx(pdf_file.name, docx_name) else: progress((idx + 1) / len(pdf_files), desc=f"Converting: {file_name}") # Temporarily bump root logger to ERROR so pdf2docx info/warning spam is hidden _root_logger = logging.getLogger() _old_level = _root_logger.level _root_logger.setLevel(logging.ERROR) try: cv = Converter(pdf_file.name) cv.convert(docx_name) cv.close() finally: _root_logger.setLevel(_old_level) # Route 2: pdf2docx produced empty docx -> OCR fallback if HAS_OCR and _docx_is_empty(docx_name): os.remove(docx_name) _ocr_pdf_to_docx(pdf_file.name, docx_name) converted_files.append(docx_name) gr.Info(f"✅ {T['pdf_success']}") if len(converted_files) == 1: return converted_files[0] else: zip_name = "Converted_Documents.zip" with zipfile.ZipFile(zip_name, 'w') as zf: for f in converted_files: zf.write(f) return zip_name except Exception as e: gr.Error(f"Error: {str(e)}") return None def run_ocr_func(image, lang_code): T = TRANS[lang_code] # 1. Validation if not HAS_OCR: gr.Error(T["err_ocr"]) return None, None, None if image is None: gr.Warning(T["err_nofile"]) return None, None, None # Setup temporary paths to handle file I/O for PaddleOCRVL session_id = uuid.uuid4().hex temp_dir = f"temp_ocr_{session_id}" os.makedirs(temp_dir, exist_ok=True) input_img_path = os.path.join(temp_dir, "input_image.png") output_save_path = os.path.join(temp_dir, "output") try: gr.Info("⏳ Initializing OCR-VL Pipeline...") # 2. Save Gradio Image (Numpy) to File (Required by Pipeline) # Convert RGB (Gradio) to BGR (OpenCV) img_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imwrite(input_img_path, img_bgr) # 3. Run PaddleOCRVL Logic # See https://www.paddleocr.ai/latest/version3.x/pipeline_usage/PaddleOCR-VL.html output = ocr_pipeline.predict(input_img_path) markdown_text = "" # 4. Process Results for res in output: # res.print() # Optional: print to console logs res.save_to_json(save_path=output_save_path) res.save_to_markdown(save_path=output_save_path) # 5. Retrieve Generated Content # Find the generated markdown file (name varies based on library version) md_files = glob.glob(os.path.join(output_save_path, "*.md")) if md_files: with open(md_files[0], "r", encoding="utf-8") as f: markdown_text = f.read() else: markdown_text = "Analysis complete, but no markdown file was found." # 6. Package Results (Zip JSON & Markdown) zip_output_path = f"OCR_Result_{session_id}.zip" shutil.make_archive(zip_output_path.replace('.zip', ''), 'zip', output_save_path) # Return: Markdown text, Original Image (no drawing needed as VL focuses on structure), Zip file return markdown_text, image, zip_output_path except Exception as e: gr.Error(f"OCR Pipeline Error: {str(e)}") return None, None, None finally: # Cleanup temp directory (optional, keeping zip file) shutil.rmtree(temp_dir, ignore_errors=True) # --- 4. UI HELPERS --- def change_lang(lang): new = "en" if lang == "vi" else "vi" T = TRANS[new] return ( new, # lang_state T["lang_btn"], # btn_lang gr.update(choices=[T["menu_pdf"], T["menu_ocr"]], value=T["menu_pdf"]), # radio_menu f"## {T['app_name']}", # txt_title T["app_desc"], # txt_desc T["footer"], # txt_footer f"### {T['pdf_head']}", # pdf_head_md T["pdf_sub"], # pdf_sub_md gr.update(label=T["pdf_label"]), # in_pdf label T["pdf_btn"], # btn_pdf f"### {T['ocr_head']}", # ocr_head_md T["ocr_sub"], # ocr_sub_md gr.update(label=T["ocr_label"]), # in_img label T["ocr_btn"], # btn_ocr gr.update(label=T["ocr_rs_text"]), # out_txt label gr.update(label=T["ocr_rs_img"]) # out_img_viz label ) def toggle_view(menu_val, lang): is_pdf = menu_val in [TRANS["vi"]["menu_pdf"], TRANS["en"]["menu_pdf"]] return gr.update(visible=is_pdf), gr.update(visible=not is_pdf) # --- 5. THEME --- theme = gr.themes.Soft( primary_hue="blue", neutral_hue="slate", font=[gr.themes.GoogleFont('Inter'), 'system-ui', 'sans-serif'], ).set( body_background_fill="#f8fafc", block_background_fill="white", block_border_width="1px", button_primary_background_fill="linear-gradient(90deg, #3b82f6, #2563eb)", button_primary_text_color="white", ) # --- 6. INTERFACE --- with gr.Blocks(title="Pro Document Toolkit") as demo: lang_state = gr.State("vi") with gr.Row(): with gr.Column(scale=1, min_width=250): gr.Markdown("### 🛠️ Dashboard") btn_lang = gr.Button(TRANS["vi"]["lang_btn"], variant="secondary") radio_menu = gr.Radio( choices=[TRANS["vi"]["menu_pdf"], TRANS["vi"]["menu_ocr"]], value=TRANS["vi"]["menu_pdf"], label="Function / Chức năng", type="value" ) gr.Markdown("---") gr.Markdown(f"**Status:** 🟢 Online | VL-OCR: {'Ready' if HAS_OCR else 'N/A'}") with gr.Column(scale=4): txt_title = gr.Markdown(f"## {TRANS['vi']['app_name']}") txt_desc = gr.Markdown(TRANS['vi']['app_desc']) # PDF Tool with gr.Group(visible=True) as group_pdf: pdf_head_md = gr.Markdown(f"### {TRANS['vi']['pdf_head']}") pdf_sub_md = gr.Markdown(TRANS['vi']['pdf_sub']) with gr.Row(equal_height=True): with gr.Column(): in_pdf = gr.File( label=TRANS["vi"]["pdf_label"], file_types=[".pdf"], file_count="multiple", height=300 ) btn_pdf = gr.Button(TRANS["vi"]["pdf_btn"], variant="primary", size="lg") with gr.Column(): gr.Markdown("### Output") out_word = gr.File(label="Download DOCX / ZIP", height=250) # OCR Tool (Updated for VL) with gr.Group(visible=False) as group_ocr: ocr_head_md = gr.Markdown(f"### {TRANS['vi']['ocr_head']}") ocr_sub_md = gr.Markdown(TRANS['vi']['ocr_sub']) with gr.Row(): with gr.Column(scale=1): in_img = gr.Image( label=TRANS["vi"]["ocr_label"], type="numpy", height=450, sources=["upload", "clipboard"] ) btn_ocr = gr.Button(TRANS["vi"]["ocr_btn"], variant="primary", size="lg") with gr.Column(scale=1): with gr.Tabs(): with gr.Tab("Markdown"): out_txt = gr.Textbox( label=TRANS["vi"]["ocr_rs_text"], lines=20, placeholder="Markdown content will appear here...", buttons=["copy"] ) out_file = gr.File(label="Download Full Results (.zip)") with gr.Tab("Source Image"): out_img_viz = gr.Image( label=TRANS["vi"]["ocr_rs_img"], interactive=False ) txt_footer = gr.Markdown(TRANS['vi']['footer']) # EVENTS radio_menu.change(toggle_view, inputs=[radio_menu, lang_state], outputs=[group_pdf, group_ocr]) btn_pdf.click(convert_pdfs_to_word, inputs=[in_pdf, lang_state], outputs=out_word) btn_ocr.click(run_ocr_func, inputs=[in_img, lang_state], outputs=[out_txt, out_img_viz, out_file]) btn_lang.click( change_lang, inputs=[lang_state], outputs=[ lang_state, btn_lang, radio_menu, txt_title, txt_desc, txt_footer, pdf_head_md, pdf_sub_md, in_pdf, btn_pdf, ocr_head_md, ocr_sub_md, in_img, btn_ocr, out_txt, out_img_viz ] ) if __name__ == "__main__": demo.queue().launch( server_name="0.0.0.0", server_port=7860, theme=theme, ) # --- 1. SYSTEM CONFIGURATION --- os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True" HAS_OCR = False ocr_pipeline = None try: # UPDATED: Using PaddleOCRVL as requested from paddleocr import PaddleOCRVL # Initialize the specific pipeline version ocr_pipeline = PaddleOCRVL(pipeline_version="PaddlePaddle/v1.5") HAS_OCR = True print("✅ PaddleOCRVL initialized successfully.") except Exception as e: print(f"⚠️ Warning: Could not initialize PaddleOCRVL. Error: {e}") print("Ensure you have installed: pip install paddlepaddle paddleocr") # --- 2. LANGUAGE DICTIONARY --- TRANS = { "vi": { "app_name": "PRO DOCUMENT TOOLKIT", "app_desc": "Xử lý tài liệu thông minh: Chuyển PDF → Word & Phân tích bố cục OCR", "menu_pdf": "📄 PDF sang Word", "menu_ocr": "👁️ Phân tích tài liệu (OCR-VL)", "lang_btn": "🇬🇧 English", "pdf_head": "Chuyển đổi PDF", "pdf_sub": "Giữ định dạng gốc, hỗ trợ nhiều file cùng lúc.", "pdf_label": "Tải lên file PDF (có thể nhiều file)", "pdf_btn": "🚀 Chuyển đổi", "pdf_success": "Chuyển đổi thành công!", "ocr_head": "Phân tích tài liệu & OCR", "ocr_sub": "Sử dụng PaddleOCRVL để nhận diện văn bản và cấu trúc (Markdown/JSON).", "ocr_label": "Tải ảnh lên (PNG, JPG, BMP)", "ocr_btn": "🔍 Phân tích Bố cục", "ocr_rs_text": "Kết quả Markdown", "ocr_rs_img": "Hình ảnh gốc", "err_nofile": "Vui lòng tải file lên!", "err_ocr": "OCR chưa sẵn sàng (kiểm tra cài đặt PaddleOCR).", "footer": "© 2024-2026 Developed by Chu Viet Kien. Powered by PaddleOCRVL & Gradio." }, "en": { "app_name": "PRO DOCUMENT TOOLKIT", "app_desc": "Smart Document Processing: PDF to Word & OCR Layout Analysis", "menu_pdf": "📄 PDF to Word", "menu_ocr": "👁️ Doc Analysis (OCR-VL)", "lang_btn": "🇻🇳 Tiếng Việt", "pdf_head": "PDF Converter", "pdf_sub": "Preserve original layout, batch support.", "pdf_label": "Upload PDF files (multiple allowed)", "pdf_btn": "🚀 Convert", "pdf_success": "Conversion successful!", "ocr_head": "Document Analysis & OCR", "ocr_sub": "Uses PaddleOCRVL to extract text and structure (Markdown/JSON).", "ocr_label": "Upload Image (PNG, JPG, BMP)", "ocr_btn": "🔍 Analyze Layout", "ocr_rs_text": "Markdown Result", "ocr_rs_img": "Original Image", "err_nofile": "Please upload a file first!", "err_ocr": "OCR not available (check PaddleOCR installation).", "footer": "© 2024-2026 Developed by Chu Viet Kien. Powered by PaddleOCRVL & Gradio." } } # --- 3. PROCESSING LOGIC --- def convert_pdfs_to_word(pdf_files, lang_code, progress=gr.Progress()): T = TRANS[lang_code] if not pdf_files: gr.Warning(T["err_nofile"]) return None if not isinstance(pdf_files, list): pdf_files = [pdf_files] converted_files = [] try: progress(0, desc="Starting...") for idx, pdf_file in enumerate(pdf_files): file_name = os.path.basename(pdf_file.name) progress((idx + 1) / len(pdf_files), desc=f"Processing: {file_name}") docx_name = os.path.splitext(file_name)[0] + ".docx" cv = Converter(pdf_file.name) cv.convert(docx_name) cv.close() converted_files.append(docx_name) gr.Info(f"✅ {T['pdf_success']}") if len(converted_files) == 1: return converted_files[0] else: zip_name = "Converted_Documents.zip" with zipfile.ZipFile(zip_name, 'w') as zf: for f in converted_files: zf.write(f) return zip_name except Exception as e: gr.Error(f"Error: {str(e)}") return None def run_ocr_func(image, lang_code): T = TRANS[lang_code] # 1. Validation if not HAS_OCR: gr.Error(T["err_ocr"]) return None, None, None if image is None: gr.Warning(T["err_nofile"]) return None, None, None # Setup temporary paths to handle file I/O for PaddleOCRVL session_id = uuid.uuid4().hex temp_dir = f"temp_ocr_{session_id}" os.makedirs(temp_dir, exist_ok=True) input_img_path = os.path.join(temp_dir, "input_image.png") output_save_path = os.path.join(temp_dir, "output") try: gr.Info("⏳ Initializing OCR-VL Pipeline...") # 2. Save Gradio Image (Numpy) to File (Required by Pipeline) # Convert RGB (Gradio) to BGR (OpenCV) img_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imwrite(input_img_path, img_bgr) # 3. Run PaddleOCRVL Logic # See https://www.paddleocr.ai/latest/version3.x/pipeline_usage/PaddleOCR-VL.html output = ocr_pipeline.predict(input_img_path) markdown_text = "" # 4. Process Results for res in output: # res.print() # Optional: print to console logs res.save_to_json(save_path=output_save_path) res.save_to_markdown(save_path=output_save_path) # 5. Retrieve Generated Content # Find the generated markdown file (name varies based on library version) md_files = glob.glob(os.path.join(output_save_path, "*.md")) if md_files: with open(md_files[0], "r", encoding="utf-8") as f: markdown_text = f.read() else: markdown_text = "Analysis complete, but no markdown file was found." # 6. Package Results (Zip JSON & Markdown) zip_output_path = f"OCR_Result_{session_id}.zip" shutil.make_archive(zip_output_path.replace('.zip', ''), 'zip', output_save_path) # Return: Markdown text, Original Image (no drawing needed as VL focuses on structure), Zip file return markdown_text, image, zip_output_path except Exception as e: gr.Error(f"OCR Pipeline Error: {str(e)}") return None, None, None finally: # Cleanup temp directory (optional, keeping zip file) shutil.rmtree(temp_dir, ignore_errors=True) # --- 4. UI HELPERS --- def change_lang(lang): new = "en" if lang == "vi" else "vi" T = TRANS[new] return ( new, # lang_state T["lang_btn"], # btn_lang gr.update(choices=[T["menu_pdf"], T["menu_ocr"]], value=T["menu_pdf"]), # radio_menu f"## {T['app_name']}", # txt_title T["app_desc"], # txt_desc T["footer"], # txt_footer f"### {T['pdf_head']}", # pdf_head_md T["pdf_sub"], # pdf_sub_md gr.update(label=T["pdf_label"]), # in_pdf label T["pdf_btn"], # btn_pdf f"### {T['ocr_head']}", # ocr_head_md T["ocr_sub"], # ocr_sub_md gr.update(label=T["ocr_label"]), # in_img label T["ocr_btn"], # btn_ocr gr.update(label=T["ocr_rs_text"]), # out_txt label gr.update(label=T["ocr_rs_img"]) # out_img_viz label ) def toggle_view(menu_val, lang): is_pdf = menu_val in [TRANS["vi"]["menu_pdf"], TRANS["en"]["menu_pdf"]] return gr.update(visible=is_pdf), gr.update(visible=not is_pdf) # --- 5. THEME --- theme = gr.themes.Soft( primary_hue="blue", neutral_hue="slate", font=[gr.themes.GoogleFont('Inter'), 'system-ui', 'sans-serif'], ).set( body_background_fill="#f8fafc", block_background_fill="white", block_border_width="1px", button_primary_background_fill="linear-gradient(90deg, #3b82f6, #2563eb)", button_primary_text_color="white", ) # --- 6. INTERFACE --- with gr.Blocks(title="Pro Document Toolkit") as demo: lang_state = gr.State("vi") with gr.Row(): with gr.Column(scale=1, min_width=250): gr.Markdown("### 🛠️ Dashboard") btn_lang = gr.Button(TRANS["vi"]["lang_btn"], variant="secondary") radio_menu = gr.Radio( choices=[TRANS["vi"]["menu_pdf"], TRANS["vi"]["menu_ocr"]], value=TRANS["vi"]["menu_pdf"], label="Function / Chức năng", type="value" ) gr.Markdown("---") gr.Markdown(f"**Status:** 🟢 Online | VL-OCR: {'Ready' if HAS_OCR else 'N/A'}") with gr.Column(scale=4): txt_title = gr.Markdown(f"## {TRANS['vi']['app_name']}") txt_desc = gr.Markdown(TRANS['vi']['app_desc']) # PDF Tool with gr.Group(visible=True) as group_pdf: pdf_head_md = gr.Markdown(f"### {TRANS['vi']['pdf_head']}") pdf_sub_md = gr.Markdown(TRANS['vi']['pdf_sub']) with gr.Row(equal_height=True): with gr.Column(): in_pdf = gr.File( label=TRANS["vi"]["pdf_label"], file_types=[".pdf"], file_count="multiple", height=300 ) btn_pdf = gr.Button(TRANS["vi"]["pdf_btn"], variant="primary", size="lg") with gr.Column(): gr.Markdown("### Output") out_word = gr.File(label="Download DOCX / ZIP", height=250) # OCR Tool (Updated for VL) with gr.Group(visible=False) as group_ocr: ocr_head_md = gr.Markdown(f"### {TRANS['vi']['ocr_head']}") ocr_sub_md = gr.Markdown(TRANS['vi']['ocr_sub']) with gr.Row(): with gr.Column(scale=1): in_img = gr.Image( label=TRANS["vi"]["ocr_label"], type="numpy", height=450, sources=["upload", "clipboard"] ) btn_ocr = gr.Button(TRANS["vi"]["ocr_btn"], variant="primary", size="lg") with gr.Column(scale=1): with gr.Tabs(): with gr.Tab("Markdown"): out_txt = gr.Textbox( label=TRANS["vi"]["ocr_rs_text"], lines=20, placeholder="Markdown content will appear here...", buttons=["copy"] ) out_file = gr.File(label="Download Full Results (.zip)") with gr.Tab("Source Image"): out_img_viz = gr.Image( label=TRANS["vi"]["ocr_rs_img"], interactive=False ) txt_footer = gr.Markdown(TRANS['vi']['footer']) # EVENTS radio_menu.change(toggle_view, inputs=[radio_menu, lang_state], outputs=[group_pdf, group_ocr]) btn_pdf.click(convert_pdfs_to_word, inputs=[in_pdf, lang_state], outputs=out_word) btn_ocr.click(run_ocr_func, inputs=[in_img, lang_state], outputs=[out_txt, out_img_viz, out_file]) btn_lang.click( change_lang, inputs=[lang_state], outputs=[ lang_state, btn_lang, radio_menu, txt_title, txt_desc, txt_footer, pdf_head_md, pdf_sub_md, in_pdf, btn_pdf, ocr_head_md, ocr_sub_md, in_img, btn_ocr, out_txt, out_img_viz ] ) if __name__ == "__main__": demo.queue().launch( server_name="0.0.0.0", server_port=7860, theme=theme, )