PDF-to-Word

Runtime error

App Files Files Community

PDF-to-Word / app.py

hantech

Update app.py

42a4119 verified 19 days ago

raw

history blame contribute delete

27.6 kB

	import gradio as gr
	from pdf2docx import Converter
	import os
	import zipfile
	import cv2
	import numpy as np
	import shutil
	import uuid
	import glob
	import fitz
	import logging
	from docx import Document

	# --- 1. SYSTEM CONFIGURATION ---
	os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"

	HAS_OCR = False
	ocr_pipeline = None

	try:
	# UPDATED: Using PaddleOCRVL as requested
	# Note: Import AFTER renaming this file to avoid circular import
	from paddleocr import PaddleOCRVL
	# Initialize the specific pipeline version
	ocr_pipeline = PaddleOCRVL(pipeline_version="v1.5")
	HAS_OCR = True
	print("[OK] PaddleOCRVL initialized successfully.")
	except Exception as e:
	print(f"[WARN] Could not initialize PaddleOCRVL. Error: {e}")
	print("Ensure you have installed: pip install paddlepaddle paddleocr")

	# --- 2. LANGUAGE DICTIONARY ---
	TRANS = {
	"vi": {
	"app_name": "PRO DOCUMENT TOOLKIT",
	"app_desc": "Xử lý tài liệu thông minh: Chuyển PDF → Word & Phân tích bố cục OCR",
	"menu_pdf": "📄 PDF sang Word",
	"menu_ocr": "👁️ Phân tích tài liệu (OCR-VL)",
	"lang_btn": "🇬🇧 English",
	"pdf_head": "Chuyển đổi PDF",
	"pdf_sub": "Giữ định dạng gốc, hỗ trợ nhiều file cùng lúc.",
	"pdf_label": "Tải lên file PDF (có thể nhiều file)",
	"pdf_btn": "🚀 Chuyển đổi",
	"pdf_success": "Chuyển đổi thành công!",
	"ocr_head": "Phân tích tài liệu & OCR",
	"ocr_sub": "Sử dụng PaddleOCRVL để nhận diện văn bản và cấu trúc (Markdown/JSON).",
	"ocr_label": "Tải ảnh lên (PNG, JPG, BMP)",
	"ocr_btn": "🔍 Phân tích Bố cục",
	"ocr_rs_text": "Kết quả Markdown",
	"ocr_rs_img": "Hình ảnh gốc",
	"err_nofile": "Vui lòng tải file lên!",
	"err_ocr": "OCR chưa sẵn sàng (kiểm tra cài đặt PaddleOCR).",
	"footer": "© 2024-2026 Developed by Chu Viet Kien. Powered by PaddleOCRVL & Gradio."
	},
	"en": {
	"app_name": "PRO DOCUMENT TOOLKIT",
	"app_desc": "Smart Document Processing: PDF to Word & OCR Layout Analysis",
	"menu_pdf": "📄 PDF to Word",
	"menu_ocr": "👁️ Doc Analysis (OCR-VL)",
	"lang_btn": "🇻🇳 Tiếng Việt",
	"pdf_head": "PDF Converter",
	"pdf_sub": "Preserve original layout, batch support.",
	"pdf_label": "Upload PDF files (multiple allowed)",
	"pdf_btn": "🚀 Convert",
	"pdf_success": "Conversion successful!",
	"ocr_head": "Document Analysis & OCR",
	"ocr_sub": "Uses PaddleOCRVL to extract text and structure (Markdown/JSON).",
	"ocr_label": "Upload Image (PNG, JPG, BMP)",
	"ocr_btn": "🔍 Analyze Layout",
	"ocr_rs_text": "Markdown Result",
	"ocr_rs_img": "Original Image",
	"err_nofile": "Please upload a file first!",
	"err_ocr": "OCR not available (check PaddleOCR installation).",
	"footer": "© 2024-2026 Developed by Chu Viet Kien. Powered by PaddleOCRVL & Gradio."
	}
	}

	# --- 3. PROCESSING LOGIC ---

	def _is_scanned_pdf(pdf_path):
	"""Check if a PDF has no extractable text layer (i.e. scanned/image-only)."""
	try:
	doc = fitz.open(pdf_path)
	has_text = any(page.get_text().strip() for page in doc)
	doc.close()
	return not has_text
	except Exception:
	return False


	def _docx_is_empty(docx_path):
	"""Check if a DOCX has no meaningful text."""
	try:
	doc = Document(docx_path)
	for para in doc.paragraphs:
	if para.text.strip():
	return False
	for table in doc.tables:
	for row in table.rows:
	for cell in row.cells:
	if cell.text.strip():
	return False
	return True
	except Exception:
	return True


	def _ocr_pdf_to_docx(pdf_path, docx_path):
	"""Fallback: convert a scanned PDF to DOCX using PaddleOCRVL."""
	pdf_doc = fitz.open(pdf_path)
	word_doc = Document()
	temp_base = f"temp_ocr_pdf_{uuid.uuid4().hex}"
	os.makedirs(temp_base, exist_ok=True)

	try:
	for i, page in enumerate(pdf_doc):
	# Render page to image (~288 DPI)
	mat = fitz.Matrix(2, 2)
	pix = page.get_pixmap(matrix=mat)
	img_path = os.path.join(temp_base, f"page_{i}.png")
	pix.save(img_path)

	# Run PaddleOCRVL
	output = ocr_pipeline.predict(img_path)
	page_out_dir = os.path.join(temp_base, f"out_{i}")
	os.makedirs(page_out_dir, exist_ok=True)

	for res in output:
	res.save_to_markdown(save_path=page_out_dir)

	md_files = glob.glob(os.path.join(page_out_dir, "*.md"))
	if md_files:
	with open(md_files[0], "r", encoding="utf-8") as f:
	page_md = f.read().strip()
	if page_md:
	for line in page_md.splitlines():
	stripped = line.strip()
	if stripped:
	word_doc.add_paragraph(stripped)
	if i < len(pdf_doc) - 1:
	word_doc.add_page_break()

	word_doc.save(docx_path)
	finally:
	pdf_doc.close()
	shutil.rmtree(temp_base, ignore_errors=True)


	def convert_pdfs_to_word(pdf_files, lang_code, progress=gr.Progress()):
	T = TRANS[lang_code]
	if not pdf_files:
	gr.Warning(T["err_nofile"])
	return None

	if not isinstance(pdf_files, list):
	pdf_files = [pdf_files]

	converted_files = []
	try:
	progress(0, desc="Starting...")
	for idx, pdf_file in enumerate(pdf_files):
	file_name = os.path.basename(pdf_file.name)
	docx_name = os.path.splitext(file_name)[0] + ".docx"

	# Route 1: obvious scanned PDF -> OCR directly
	if _is_scanned_pdf(pdf_file.name) and HAS_OCR:
	progress((idx + 1) / len(pdf_files), desc=f"OCR: {file_name}")
	_ocr_pdf_to_docx(pdf_file.name, docx_name)
	else:
	progress((idx + 1) / len(pdf_files), desc=f"Converting: {file_name}")
	# Temporarily bump root logger to ERROR so pdf2docx info/warning spam is hidden
	_root_logger = logging.getLogger()
	_old_level = _root_logger.level
	_root_logger.setLevel(logging.ERROR)
	try:
	cv = Converter(pdf_file.name)
	cv.convert(docx_name)
	cv.close()
	finally:
	_root_logger.setLevel(_old_level)

	# Route 2: pdf2docx produced empty docx -> OCR fallback
	if HAS_OCR and _docx_is_empty(docx_name):
	os.remove(docx_name)
	_ocr_pdf_to_docx(pdf_file.name, docx_name)

	converted_files.append(docx_name)

	gr.Info(f"✅ {T['pdf_success']}")

	if len(converted_files) == 1:
	return converted_files[0]
	else:
	zip_name = "Converted_Documents.zip"
	with zipfile.ZipFile(zip_name, 'w') as zf:
	for f in converted_files:
	zf.write(f)
	return zip_name
	except Exception as e:
	gr.Error(f"Error: {str(e)}")
	return None

	def run_ocr_func(image, lang_code):
	T = TRANS[lang_code]

	# 1. Validation
	if not HAS_OCR:
	gr.Error(T["err_ocr"])
	return None, None, None
	if image is None:
	gr.Warning(T["err_nofile"])
	return None, None, None

	# Setup temporary paths to handle file I/O for PaddleOCRVL
	session_id = uuid.uuid4().hex
	temp_dir = f"temp_ocr_{session_id}"
	os.makedirs(temp_dir, exist_ok=True)

	input_img_path = os.path.join(temp_dir, "input_image.png")
	output_save_path = os.path.join(temp_dir, "output")

	try:
	gr.Info("⏳ Initializing OCR-VL Pipeline...")

	# 2. Save Gradio Image (Numpy) to File (Required by Pipeline)
	# Convert RGB (Gradio) to BGR (OpenCV)
	img_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
	cv2.imwrite(input_img_path, img_bgr)

	# 3. Run PaddleOCRVL Logic
	# See https://www.paddleocr.ai/latest/version3.x/pipeline_usage/PaddleOCR-VL.html
	output = ocr_pipeline.predict(input_img_path)

	markdown_text = ""

	# 4. Process Results
	for res in output:
	# res.print() # Optional: print to console logs
	res.save_to_json(save_path=output_save_path)
	res.save_to_markdown(save_path=output_save_path)

	# 5. Retrieve Generated Content
	# Find the generated markdown file (name varies based on library version)
	md_files = glob.glob(os.path.join(output_save_path, "*.md"))
	if md_files:
	with open(md_files[0], "r", encoding="utf-8") as f:
	markdown_text = f.read()
	else:
	markdown_text = "Analysis complete, but no markdown file was found."

	# 6. Package Results (Zip JSON & Markdown)
	zip_output_path = f"OCR_Result_{session_id}.zip"
	shutil.make_archive(zip_output_path.replace('.zip', ''), 'zip', output_save_path)

	# Return: Markdown text, Original Image (no drawing needed as VL focuses on structure), Zip file
	return markdown_text, image, zip_output_path

	except Exception as e:
	gr.Error(f"OCR Pipeline Error: {str(e)}")
	return None, None, None
	finally:
	# Cleanup temp directory (optional, keeping zip file)
	shutil.rmtree(temp_dir, ignore_errors=True)

	# --- 4. UI HELPERS ---
	def change_lang(lang):
	new = "en" if lang == "vi" else "vi"
	T = TRANS[new]
	return (
	new, # lang_state
	T["lang_btn"], # btn_lang
	gr.update(choices=[T["menu_pdf"], T["menu_ocr"]], value=T["menu_pdf"]), # radio_menu
	f"## {T['app_name']}", # txt_title
	T["app_desc"], # txt_desc
	T["footer"], # txt_footer
	f"### {T['pdf_head']}", # pdf_head_md
	T["pdf_sub"], # pdf_sub_md
	gr.update(label=T["pdf_label"]), # in_pdf label
	T["pdf_btn"], # btn_pdf
	f"### {T['ocr_head']}", # ocr_head_md
	T["ocr_sub"], # ocr_sub_md
	gr.update(label=T["ocr_label"]), # in_img label
	T["ocr_btn"], # btn_ocr
	gr.update(label=T["ocr_rs_text"]), # out_txt label
	gr.update(label=T["ocr_rs_img"]) # out_img_viz label
	)

	def toggle_view(menu_val, lang):
	is_pdf = menu_val in [TRANS["vi"]["menu_pdf"], TRANS["en"]["menu_pdf"]]
	return gr.update(visible=is_pdf), gr.update(visible=not is_pdf)

	# --- 5. THEME ---
	theme = gr.themes.Soft(
	primary_hue="blue",
	neutral_hue="slate",
	font=[gr.themes.GoogleFont('Inter'), 'system-ui', 'sans-serif'],
	).set(
	body_background_fill="#f8fafc",
	block_background_fill="white",
	block_border_width="1px",
	button_primary_background_fill="linear-gradient(90deg, #3b82f6, #2563eb)",
	button_primary_text_color="white",
	)

	# --- 6. INTERFACE ---
	with gr.Blocks(title="Pro Document Toolkit") as demo:
	lang_state = gr.State("vi")

	with gr.Row():
	with gr.Column(scale=1, min_width=250):
	gr.Markdown("### 🛠️ Dashboard")
	btn_lang = gr.Button(TRANS["vi"]["lang_btn"], variant="secondary")
	radio_menu = gr.Radio(
	choices=[TRANS["vi"]["menu_pdf"], TRANS["vi"]["menu_ocr"]],
	value=TRANS["vi"]["menu_pdf"],
	label="Function / Chức năng",
	type="value"
	)
	gr.Markdown("---")
	gr.Markdown(f"Status: 🟢 Online \| VL-OCR: {'Ready' if HAS_OCR else 'N/A'}")

	with gr.Column(scale=4):
	txt_title = gr.Markdown(f"## {TRANS['vi']['app_name']}")
	txt_desc = gr.Markdown(TRANS['vi']['app_desc'])

	# PDF Tool
	with gr.Group(visible=True) as group_pdf:
	pdf_head_md = gr.Markdown(f"### {TRANS['vi']['pdf_head']}")
	pdf_sub_md = gr.Markdown(TRANS['vi']['pdf_sub'])
	with gr.Row(equal_height=True):
	with gr.Column():
	in_pdf = gr.File(
	label=TRANS["vi"]["pdf_label"],
	file_types=[".pdf"],
	file_count="multiple",
	height=300
	)
	btn_pdf = gr.Button(TRANS["vi"]["pdf_btn"], variant="primary", size="lg")
	with gr.Column():
	gr.Markdown("### Output")
	out_word = gr.File(label="Download DOCX / ZIP", height=250)

	# OCR Tool (Updated for VL)
	with gr.Group(visible=False) as group_ocr:
	ocr_head_md = gr.Markdown(f"### {TRANS['vi']['ocr_head']}")
	ocr_sub_md = gr.Markdown(TRANS['vi']['ocr_sub'])
	with gr.Row():
	with gr.Column(scale=1):
	in_img = gr.Image(
	label=TRANS["vi"]["ocr_label"],
	type="numpy",
	height=450,
	sources=["upload", "clipboard"]
	)
	btn_ocr = gr.Button(TRANS["vi"]["ocr_btn"], variant="primary", size="lg")
	with gr.Column(scale=1):
	with gr.Tabs():
	with gr.Tab("Markdown"):
	out_txt = gr.Textbox(
	label=TRANS["vi"]["ocr_rs_text"],
	lines=20,
	placeholder="Markdown content will appear here...",
	buttons=["copy"]
	)
	out_file = gr.File(label="Download Full Results (.zip)")
	with gr.Tab("Source Image"):
	out_img_viz = gr.Image(
	label=TRANS["vi"]["ocr_rs_img"],
	interactive=False
	)

	txt_footer = gr.Markdown(TRANS['vi']['footer'])

	# EVENTS
	radio_menu.change(toggle_view, inputs=[radio_menu, lang_state], outputs=[group_pdf, group_ocr])
	btn_pdf.click(convert_pdfs_to_word, inputs=[in_pdf, lang_state], outputs=out_word)
	btn_ocr.click(run_ocr_func, inputs=[in_img, lang_state], outputs=[out_txt, out_img_viz, out_file])
	btn_lang.click(
	change_lang,
	inputs=[lang_state],
	outputs=[
	lang_state, btn_lang, radio_menu,
	txt_title, txt_desc, txt_footer,
	pdf_head_md, pdf_sub_md, in_pdf, btn_pdf,
	ocr_head_md, ocr_sub_md, in_img, btn_ocr,
	out_txt, out_img_viz
	]
	)

	if __name__ == "__main__":
	demo.queue().launch(
	server_name="0.0.0.0",
	server_port=7860,
	theme=theme,
	)

	# --- 1. SYSTEM CONFIGURATION ---
	os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"

	HAS_OCR = False
	ocr_pipeline = None

	try:
	# UPDATED: Using PaddleOCRVL as requested
	from paddleocr import PaddleOCRVL
	# Initialize the specific pipeline version
	ocr_pipeline = PaddleOCRVL(pipeline_version="PaddlePaddle/v1.5")
	HAS_OCR = True
	print("✅ PaddleOCRVL initialized successfully.")
	except Exception as e:
	print(f"⚠️ Warning: Could not initialize PaddleOCRVL. Error: {e}")
	print("Ensure you have installed: pip install paddlepaddle paddleocr")

	# --- 2. LANGUAGE DICTIONARY ---
	TRANS = {
	"vi": {
	"app_name": "PRO DOCUMENT TOOLKIT",
	"app_desc": "Xử lý tài liệu thông minh: Chuyển PDF → Word & Phân tích bố cục OCR",
	"menu_pdf": "📄 PDF sang Word",
	"menu_ocr": "👁️ Phân tích tài liệu (OCR-VL)",
	"lang_btn": "🇬🇧 English",
	"pdf_head": "Chuyển đổi PDF",
	"pdf_sub": "Giữ định dạng gốc, hỗ trợ nhiều file cùng lúc.",
	"pdf_label": "Tải lên file PDF (có thể nhiều file)",
	"pdf_btn": "🚀 Chuyển đổi",
	"pdf_success": "Chuyển đổi thành công!",
	"ocr_head": "Phân tích tài liệu & OCR",
	"ocr_sub": "Sử dụng PaddleOCRVL để nhận diện văn bản và cấu trúc (Markdown/JSON).",
	"ocr_label": "Tải ảnh lên (PNG, JPG, BMP)",
	"ocr_btn": "🔍 Phân tích Bố cục",
	"ocr_rs_text": "Kết quả Markdown",
	"ocr_rs_img": "Hình ảnh gốc",
	"err_nofile": "Vui lòng tải file lên!",
	"err_ocr": "OCR chưa sẵn sàng (kiểm tra cài đặt PaddleOCR).",
	"footer": "© 2024-2026 Developed by Chu Viet Kien. Powered by PaddleOCRVL & Gradio."
	},
	"en": {
	"app_name": "PRO DOCUMENT TOOLKIT",
	"app_desc": "Smart Document Processing: PDF to Word & OCR Layout Analysis",
	"menu_pdf": "📄 PDF to Word",
	"menu_ocr": "👁️ Doc Analysis (OCR-VL)",
	"lang_btn": "🇻🇳 Tiếng Việt",
	"pdf_head": "PDF Converter",
	"pdf_sub": "Preserve original layout, batch support.",
	"pdf_label": "Upload PDF files (multiple allowed)",
	"pdf_btn": "🚀 Convert",
	"pdf_success": "Conversion successful!",
	"ocr_head": "Document Analysis & OCR",
	"ocr_sub": "Uses PaddleOCRVL to extract text and structure (Markdown/JSON).",
	"ocr_label": "Upload Image (PNG, JPG, BMP)",
	"ocr_btn": "🔍 Analyze Layout",
	"ocr_rs_text": "Markdown Result",
	"ocr_rs_img": "Original Image",
	"err_nofile": "Please upload a file first!",
	"err_ocr": "OCR not available (check PaddleOCR installation).",
	"footer": "© 2024-2026 Developed by Chu Viet Kien. Powered by PaddleOCRVL & Gradio."
	}
	}

	# --- 3. PROCESSING LOGIC ---
	def convert_pdfs_to_word(pdf_files, lang_code, progress=gr.Progress()):
	T = TRANS[lang_code]
	if not pdf_files:
	gr.Warning(T["err_nofile"])
	return None

	if not isinstance(pdf_files, list):
	pdf_files = [pdf_files]

	converted_files = []
	try:
	progress(0, desc="Starting...")
	for idx, pdf_file in enumerate(pdf_files):
	file_name = os.path.basename(pdf_file.name)
	progress((idx + 1) / len(pdf_files), desc=f"Processing: {file_name}")
	docx_name = os.path.splitext(file_name)[0] + ".docx"
	cv = Converter(pdf_file.name)
	cv.convert(docx_name)
	cv.close()
	converted_files.append(docx_name)

	gr.Info(f"✅ {T['pdf_success']}")

	if len(converted_files) == 1:
	return converted_files[0]
	else:
	zip_name = "Converted_Documents.zip"
	with zipfile.ZipFile(zip_name, 'w') as zf:
	for f in converted_files:
	zf.write(f)
	return zip_name
	except Exception as e:
	gr.Error(f"Error: {str(e)}")
	return None

	def run_ocr_func(image, lang_code):
	T = TRANS[lang_code]

	# 1. Validation
	if not HAS_OCR:
	gr.Error(T["err_ocr"])
	return None, None, None
	if image is None:
	gr.Warning(T["err_nofile"])
	return None, None, None

	# Setup temporary paths to handle file I/O for PaddleOCRVL
	session_id = uuid.uuid4().hex
	temp_dir = f"temp_ocr_{session_id}"
	os.makedirs(temp_dir, exist_ok=True)

	input_img_path = os.path.join(temp_dir, "input_image.png")
	output_save_path = os.path.join(temp_dir, "output")

	try:
	gr.Info("⏳ Initializing OCR-VL Pipeline...")

	# 2. Save Gradio Image (Numpy) to File (Required by Pipeline)
	# Convert RGB (Gradio) to BGR (OpenCV)
	img_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
	cv2.imwrite(input_img_path, img_bgr)

	# 3. Run PaddleOCRVL Logic
	# See https://www.paddleocr.ai/latest/version3.x/pipeline_usage/PaddleOCR-VL.html
	output = ocr_pipeline.predict(input_img_path)

	markdown_text = ""

	# 4. Process Results
	for res in output:
	# res.print() # Optional: print to console logs
	res.save_to_json(save_path=output_save_path)
	res.save_to_markdown(save_path=output_save_path)

	# 5. Retrieve Generated Content
	# Find the generated markdown file (name varies based on library version)
	md_files = glob.glob(os.path.join(output_save_path, "*.md"))
	if md_files:
	with open(md_files[0], "r", encoding="utf-8") as f:
	markdown_text = f.read()
	else:
	markdown_text = "Analysis complete, but no markdown file was found."

	# 6. Package Results (Zip JSON & Markdown)
	zip_output_path = f"OCR_Result_{session_id}.zip"
	shutil.make_archive(zip_output_path.replace('.zip', ''), 'zip', output_save_path)

	# Return: Markdown text, Original Image (no drawing needed as VL focuses on structure), Zip file
	return markdown_text, image, zip_output_path

	except Exception as e:
	gr.Error(f"OCR Pipeline Error: {str(e)}")
	return None, None, None
	finally:
	# Cleanup temp directory (optional, keeping zip file)
	shutil.rmtree(temp_dir, ignore_errors=True)

	# --- 4. UI HELPERS ---
	def change_lang(lang):
	new = "en" if lang == "vi" else "vi"
	T = TRANS[new]
	return (
	new, # lang_state
	T["lang_btn"], # btn_lang
	gr.update(choices=[T["menu_pdf"], T["menu_ocr"]], value=T["menu_pdf"]), # radio_menu
	f"## {T['app_name']}", # txt_title
	T["app_desc"], # txt_desc
	T["footer"], # txt_footer
	f"### {T['pdf_head']}", # pdf_head_md
	T["pdf_sub"], # pdf_sub_md
	gr.update(label=T["pdf_label"]), # in_pdf label
	T["pdf_btn"], # btn_pdf
	f"### {T['ocr_head']}", # ocr_head_md
	T["ocr_sub"], # ocr_sub_md
	gr.update(label=T["ocr_label"]), # in_img label
	T["ocr_btn"], # btn_ocr
	gr.update(label=T["ocr_rs_text"]), # out_txt label
	gr.update(label=T["ocr_rs_img"]) # out_img_viz label
	)

	def toggle_view(menu_val, lang):
	is_pdf = menu_val in [TRANS["vi"]["menu_pdf"], TRANS["en"]["menu_pdf"]]
	return gr.update(visible=is_pdf), gr.update(visible=not is_pdf)

	# --- 5. THEME ---
	theme = gr.themes.Soft(
	primary_hue="blue",
	neutral_hue="slate",
	font=[gr.themes.GoogleFont('Inter'), 'system-ui', 'sans-serif'],
	).set(
	body_background_fill="#f8fafc",
	block_background_fill="white",
	block_border_width="1px",
	button_primary_background_fill="linear-gradient(90deg, #3b82f6, #2563eb)",
	button_primary_text_color="white",
	)

	# --- 6. INTERFACE ---
	with gr.Blocks(title="Pro Document Toolkit") as demo:
	lang_state = gr.State("vi")

	with gr.Row():
	with gr.Column(scale=1, min_width=250):
	gr.Markdown("### 🛠️ Dashboard")
	btn_lang = gr.Button(TRANS["vi"]["lang_btn"], variant="secondary")
	radio_menu = gr.Radio(
	choices=[TRANS["vi"]["menu_pdf"], TRANS["vi"]["menu_ocr"]],
	value=TRANS["vi"]["menu_pdf"],
	label="Function / Chức năng",
	type="value"
	)
	gr.Markdown("---")
	gr.Markdown(f"Status: 🟢 Online \| VL-OCR: {'Ready' if HAS_OCR else 'N/A'}")

	with gr.Column(scale=4):
	txt_title = gr.Markdown(f"## {TRANS['vi']['app_name']}")
	txt_desc = gr.Markdown(TRANS['vi']['app_desc'])

	# PDF Tool
	with gr.Group(visible=True) as group_pdf:
	pdf_head_md = gr.Markdown(f"### {TRANS['vi']['pdf_head']}")
	pdf_sub_md = gr.Markdown(TRANS['vi']['pdf_sub'])
	with gr.Row(equal_height=True):
	with gr.Column():
	in_pdf = gr.File(
	label=TRANS["vi"]["pdf_label"],
	file_types=[".pdf"],
	file_count="multiple",
	height=300
	)
	btn_pdf = gr.Button(TRANS["vi"]["pdf_btn"], variant="primary", size="lg")
	with gr.Column():
	gr.Markdown("### Output")
	out_word = gr.File(label="Download DOCX / ZIP", height=250)

	# OCR Tool (Updated for VL)
	with gr.Group(visible=False) as group_ocr:
	ocr_head_md = gr.Markdown(f"### {TRANS['vi']['ocr_head']}")
	ocr_sub_md = gr.Markdown(TRANS['vi']['ocr_sub'])
	with gr.Row():
	with gr.Column(scale=1):
	in_img = gr.Image(
	label=TRANS["vi"]["ocr_label"],
	type="numpy",
	height=450,
	sources=["upload", "clipboard"]
	)
	btn_ocr = gr.Button(TRANS["vi"]["ocr_btn"], variant="primary", size="lg")
	with gr.Column(scale=1):
	with gr.Tabs():
	with gr.Tab("Markdown"):
	out_txt = gr.Textbox(
	label=TRANS["vi"]["ocr_rs_text"],
	lines=20,
	placeholder="Markdown content will appear here...",
	buttons=["copy"]
	)
	out_file = gr.File(label="Download Full Results (.zip)")
	with gr.Tab("Source Image"):
	out_img_viz = gr.Image(
	label=TRANS["vi"]["ocr_rs_img"],
	interactive=False
	)

	txt_footer = gr.Markdown(TRANS['vi']['footer'])

	# EVENTS
	radio_menu.change(toggle_view, inputs=[radio_menu, lang_state], outputs=[group_pdf, group_ocr])
	btn_pdf.click(convert_pdfs_to_word, inputs=[in_pdf, lang_state], outputs=out_word)
	btn_ocr.click(run_ocr_func, inputs=[in_img, lang_state], outputs=[out_txt, out_img_viz, out_file])
	btn_lang.click(
	change_lang,
	inputs=[lang_state],
	outputs=[
	lang_state, btn_lang, radio_menu,
	txt_title, txt_desc, txt_footer,
	pdf_head_md, pdf_sub_md, in_pdf, btn_pdf,
	ocr_head_md, ocr_sub_md, in_img, btn_ocr,
	out_txt, out_img_viz
	]
	)

	if __name__ == "__main__":
	demo.queue().launch(
	server_name="0.0.0.0",
	server_port=7860,
	theme=theme,
	)