PDF-to-Word / app.py
hantech's picture
Update app.py
42a4119 verified
import gradio as gr
from pdf2docx import Converter
import os
import zipfile
import cv2
import numpy as np
import shutil
import uuid
import glob
import fitz
import logging
from docx import Document
# --- 1. SYSTEM CONFIGURATION ---
os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"
HAS_OCR = False
ocr_pipeline = None
try:
# UPDATED: Using PaddleOCRVL as requested
# Note: Import AFTER renaming this file to avoid circular import
from paddleocr import PaddleOCRVL
# Initialize the specific pipeline version
ocr_pipeline = PaddleOCRVL(pipeline_version="v1.5")
HAS_OCR = True
print("[OK] PaddleOCRVL initialized successfully.")
except Exception as e:
print(f"[WARN] Could not initialize PaddleOCRVL. Error: {e}")
print("Ensure you have installed: pip install paddlepaddle paddleocr")
# --- 2. LANGUAGE DICTIONARY ---
TRANS = {
"vi": {
"app_name": "PRO DOCUMENT TOOLKIT",
"app_desc": "Xử lý tài liệu thông minh: Chuyển PDF → Word & Phân tích bố cục OCR",
"menu_pdf": "📄 PDF sang Word",
"menu_ocr": "👁️ Phân tích tài liệu (OCR-VL)",
"lang_btn": "🇬🇧 English",
"pdf_head": "Chuyển đổi PDF",
"pdf_sub": "Giữ định dạng gốc, hỗ trợ nhiều file cùng lúc.",
"pdf_label": "Tải lên file PDF (có thể nhiều file)",
"pdf_btn": "🚀 Chuyển đổi",
"pdf_success": "Chuyển đổi thành công!",
"ocr_head": "Phân tích tài liệu & OCR",
"ocr_sub": "Sử dụng PaddleOCRVL để nhận diện văn bản và cấu trúc (Markdown/JSON).",
"ocr_label": "Tải ảnh lên (PNG, JPG, BMP)",
"ocr_btn": "🔍 Phân tích Bố cục",
"ocr_rs_text": "Kết quả Markdown",
"ocr_rs_img": "Hình ảnh gốc",
"err_nofile": "Vui lòng tải file lên!",
"err_ocr": "OCR chưa sẵn sàng (kiểm tra cài đặt PaddleOCR).",
"footer": "© 2024-2026 Developed by Chu Viet Kien. Powered by PaddleOCRVL & Gradio."
},
"en": {
"app_name": "PRO DOCUMENT TOOLKIT",
"app_desc": "Smart Document Processing: PDF to Word & OCR Layout Analysis",
"menu_pdf": "📄 PDF to Word",
"menu_ocr": "👁️ Doc Analysis (OCR-VL)",
"lang_btn": "🇻🇳 Tiếng Việt",
"pdf_head": "PDF Converter",
"pdf_sub": "Preserve original layout, batch support.",
"pdf_label": "Upload PDF files (multiple allowed)",
"pdf_btn": "🚀 Convert",
"pdf_success": "Conversion successful!",
"ocr_head": "Document Analysis & OCR",
"ocr_sub": "Uses PaddleOCRVL to extract text and structure (Markdown/JSON).",
"ocr_label": "Upload Image (PNG, JPG, BMP)",
"ocr_btn": "🔍 Analyze Layout",
"ocr_rs_text": "Markdown Result",
"ocr_rs_img": "Original Image",
"err_nofile": "Please upload a file first!",
"err_ocr": "OCR not available (check PaddleOCR installation).",
"footer": "© 2024-2026 Developed by Chu Viet Kien. Powered by PaddleOCRVL & Gradio."
}
}
# --- 3. PROCESSING LOGIC ---
def _is_scanned_pdf(pdf_path):
"""Check if a PDF has no extractable text layer (i.e. scanned/image-only)."""
try:
doc = fitz.open(pdf_path)
has_text = any(page.get_text().strip() for page in doc)
doc.close()
return not has_text
except Exception:
return False
def _docx_is_empty(docx_path):
"""Check if a DOCX has no meaningful text."""
try:
doc = Document(docx_path)
for para in doc.paragraphs:
if para.text.strip():
return False
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
if cell.text.strip():
return False
return True
except Exception:
return True
def _ocr_pdf_to_docx(pdf_path, docx_path):
"""Fallback: convert a scanned PDF to DOCX using PaddleOCRVL."""
pdf_doc = fitz.open(pdf_path)
word_doc = Document()
temp_base = f"temp_ocr_pdf_{uuid.uuid4().hex}"
os.makedirs(temp_base, exist_ok=True)
try:
for i, page in enumerate(pdf_doc):
# Render page to image (~288 DPI)
mat = fitz.Matrix(2, 2)
pix = page.get_pixmap(matrix=mat)
img_path = os.path.join(temp_base, f"page_{i}.png")
pix.save(img_path)
# Run PaddleOCRVL
output = ocr_pipeline.predict(img_path)
page_out_dir = os.path.join(temp_base, f"out_{i}")
os.makedirs(page_out_dir, exist_ok=True)
for res in output:
res.save_to_markdown(save_path=page_out_dir)
md_files = glob.glob(os.path.join(page_out_dir, "*.md"))
if md_files:
with open(md_files[0], "r", encoding="utf-8") as f:
page_md = f.read().strip()
if page_md:
for line in page_md.splitlines():
stripped = line.strip()
if stripped:
word_doc.add_paragraph(stripped)
if i < len(pdf_doc) - 1:
word_doc.add_page_break()
word_doc.save(docx_path)
finally:
pdf_doc.close()
shutil.rmtree(temp_base, ignore_errors=True)
def convert_pdfs_to_word(pdf_files, lang_code, progress=gr.Progress()):
T = TRANS[lang_code]
if not pdf_files:
gr.Warning(T["err_nofile"])
return None
if not isinstance(pdf_files, list):
pdf_files = [pdf_files]
converted_files = []
try:
progress(0, desc="Starting...")
for idx, pdf_file in enumerate(pdf_files):
file_name = os.path.basename(pdf_file.name)
docx_name = os.path.splitext(file_name)[0] + ".docx"
# Route 1: obvious scanned PDF -> OCR directly
if _is_scanned_pdf(pdf_file.name) and HAS_OCR:
progress((idx + 1) / len(pdf_files), desc=f"OCR: {file_name}")
_ocr_pdf_to_docx(pdf_file.name, docx_name)
else:
progress((idx + 1) / len(pdf_files), desc=f"Converting: {file_name}")
# Temporarily bump root logger to ERROR so pdf2docx info/warning spam is hidden
_root_logger = logging.getLogger()
_old_level = _root_logger.level
_root_logger.setLevel(logging.ERROR)
try:
cv = Converter(pdf_file.name)
cv.convert(docx_name)
cv.close()
finally:
_root_logger.setLevel(_old_level)
# Route 2: pdf2docx produced empty docx -> OCR fallback
if HAS_OCR and _docx_is_empty(docx_name):
os.remove(docx_name)
_ocr_pdf_to_docx(pdf_file.name, docx_name)
converted_files.append(docx_name)
gr.Info(f"✅ {T['pdf_success']}")
if len(converted_files) == 1:
return converted_files[0]
else:
zip_name = "Converted_Documents.zip"
with zipfile.ZipFile(zip_name, 'w') as zf:
for f in converted_files:
zf.write(f)
return zip_name
except Exception as e:
gr.Error(f"Error: {str(e)}")
return None
def run_ocr_func(image, lang_code):
T = TRANS[lang_code]
# 1. Validation
if not HAS_OCR:
gr.Error(T["err_ocr"])
return None, None, None
if image is None:
gr.Warning(T["err_nofile"])
return None, None, None
# Setup temporary paths to handle file I/O for PaddleOCRVL
session_id = uuid.uuid4().hex
temp_dir = f"temp_ocr_{session_id}"
os.makedirs(temp_dir, exist_ok=True)
input_img_path = os.path.join(temp_dir, "input_image.png")
output_save_path = os.path.join(temp_dir, "output")
try:
gr.Info("⏳ Initializing OCR-VL Pipeline...")
# 2. Save Gradio Image (Numpy) to File (Required by Pipeline)
# Convert RGB (Gradio) to BGR (OpenCV)
img_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
cv2.imwrite(input_img_path, img_bgr)
# 3. Run PaddleOCRVL Logic
# See https://www.paddleocr.ai/latest/version3.x/pipeline_usage/PaddleOCR-VL.html
output = ocr_pipeline.predict(input_img_path)
markdown_text = ""
# 4. Process Results
for res in output:
# res.print() # Optional: print to console logs
res.save_to_json(save_path=output_save_path)
res.save_to_markdown(save_path=output_save_path)
# 5. Retrieve Generated Content
# Find the generated markdown file (name varies based on library version)
md_files = glob.glob(os.path.join(output_save_path, "*.md"))
if md_files:
with open(md_files[0], "r", encoding="utf-8") as f:
markdown_text = f.read()
else:
markdown_text = "Analysis complete, but no markdown file was found."
# 6. Package Results (Zip JSON & Markdown)
zip_output_path = f"OCR_Result_{session_id}.zip"
shutil.make_archive(zip_output_path.replace('.zip', ''), 'zip', output_save_path)
# Return: Markdown text, Original Image (no drawing needed as VL focuses on structure), Zip file
return markdown_text, image, zip_output_path
except Exception as e:
gr.Error(f"OCR Pipeline Error: {str(e)}")
return None, None, None
finally:
# Cleanup temp directory (optional, keeping zip file)
shutil.rmtree(temp_dir, ignore_errors=True)
# --- 4. UI HELPERS ---
def change_lang(lang):
new = "en" if lang == "vi" else "vi"
T = TRANS[new]
return (
new, # lang_state
T["lang_btn"], # btn_lang
gr.update(choices=[T["menu_pdf"], T["menu_ocr"]], value=T["menu_pdf"]), # radio_menu
f"## {T['app_name']}", # txt_title
T["app_desc"], # txt_desc
T["footer"], # txt_footer
f"### {T['pdf_head']}", # pdf_head_md
T["pdf_sub"], # pdf_sub_md
gr.update(label=T["pdf_label"]), # in_pdf label
T["pdf_btn"], # btn_pdf
f"### {T['ocr_head']}", # ocr_head_md
T["ocr_sub"], # ocr_sub_md
gr.update(label=T["ocr_label"]), # in_img label
T["ocr_btn"], # btn_ocr
gr.update(label=T["ocr_rs_text"]), # out_txt label
gr.update(label=T["ocr_rs_img"]) # out_img_viz label
)
def toggle_view(menu_val, lang):
is_pdf = menu_val in [TRANS["vi"]["menu_pdf"], TRANS["en"]["menu_pdf"]]
return gr.update(visible=is_pdf), gr.update(visible=not is_pdf)
# --- 5. THEME ---
theme = gr.themes.Soft(
primary_hue="blue",
neutral_hue="slate",
font=[gr.themes.GoogleFont('Inter'), 'system-ui', 'sans-serif'],
).set(
body_background_fill="#f8fafc",
block_background_fill="white",
block_border_width="1px",
button_primary_background_fill="linear-gradient(90deg, #3b82f6, #2563eb)",
button_primary_text_color="white",
)
# --- 6. INTERFACE ---
with gr.Blocks(title="Pro Document Toolkit") as demo:
lang_state = gr.State("vi")
with gr.Row():
with gr.Column(scale=1, min_width=250):
gr.Markdown("### 🛠️ Dashboard")
btn_lang = gr.Button(TRANS["vi"]["lang_btn"], variant="secondary")
radio_menu = gr.Radio(
choices=[TRANS["vi"]["menu_pdf"], TRANS["vi"]["menu_ocr"]],
value=TRANS["vi"]["menu_pdf"],
label="Function / Chức năng",
type="value"
)
gr.Markdown("---")
gr.Markdown(f"**Status:** 🟢 Online | VL-OCR: {'Ready' if HAS_OCR else 'N/A'}")
with gr.Column(scale=4):
txt_title = gr.Markdown(f"## {TRANS['vi']['app_name']}")
txt_desc = gr.Markdown(TRANS['vi']['app_desc'])
# PDF Tool
with gr.Group(visible=True) as group_pdf:
pdf_head_md = gr.Markdown(f"### {TRANS['vi']['pdf_head']}")
pdf_sub_md = gr.Markdown(TRANS['vi']['pdf_sub'])
with gr.Row(equal_height=True):
with gr.Column():
in_pdf = gr.File(
label=TRANS["vi"]["pdf_label"],
file_types=[".pdf"],
file_count="multiple",
height=300
)
btn_pdf = gr.Button(TRANS["vi"]["pdf_btn"], variant="primary", size="lg")
with gr.Column():
gr.Markdown("### Output")
out_word = gr.File(label="Download DOCX / ZIP", height=250)
# OCR Tool (Updated for VL)
with gr.Group(visible=False) as group_ocr:
ocr_head_md = gr.Markdown(f"### {TRANS['vi']['ocr_head']}")
ocr_sub_md = gr.Markdown(TRANS['vi']['ocr_sub'])
with gr.Row():
with gr.Column(scale=1):
in_img = gr.Image(
label=TRANS["vi"]["ocr_label"],
type="numpy",
height=450,
sources=["upload", "clipboard"]
)
btn_ocr = gr.Button(TRANS["vi"]["ocr_btn"], variant="primary", size="lg")
with gr.Column(scale=1):
with gr.Tabs():
with gr.Tab("Markdown"):
out_txt = gr.Textbox(
label=TRANS["vi"]["ocr_rs_text"],
lines=20,
placeholder="Markdown content will appear here...",
buttons=["copy"]
)
out_file = gr.File(label="Download Full Results (.zip)")
with gr.Tab("Source Image"):
out_img_viz = gr.Image(
label=TRANS["vi"]["ocr_rs_img"],
interactive=False
)
txt_footer = gr.Markdown(TRANS['vi']['footer'])
# EVENTS
radio_menu.change(toggle_view, inputs=[radio_menu, lang_state], outputs=[group_pdf, group_ocr])
btn_pdf.click(convert_pdfs_to_word, inputs=[in_pdf, lang_state], outputs=out_word)
btn_ocr.click(run_ocr_func, inputs=[in_img, lang_state], outputs=[out_txt, out_img_viz, out_file])
btn_lang.click(
change_lang,
inputs=[lang_state],
outputs=[
lang_state, btn_lang, radio_menu,
txt_title, txt_desc, txt_footer,
pdf_head_md, pdf_sub_md, in_pdf, btn_pdf,
ocr_head_md, ocr_sub_md, in_img, btn_ocr,
out_txt, out_img_viz
]
)
if __name__ == "__main__":
demo.queue().launch(
server_name="0.0.0.0",
server_port=7860,
theme=theme,
)
# --- 1. SYSTEM CONFIGURATION ---
os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"
HAS_OCR = False
ocr_pipeline = None
try:
# UPDATED: Using PaddleOCRVL as requested
from paddleocr import PaddleOCRVL
# Initialize the specific pipeline version
ocr_pipeline = PaddleOCRVL(pipeline_version="PaddlePaddle/v1.5")
HAS_OCR = True
print("✅ PaddleOCRVL initialized successfully.")
except Exception as e:
print(f"⚠️ Warning: Could not initialize PaddleOCRVL. Error: {e}")
print("Ensure you have installed: pip install paddlepaddle paddleocr")
# --- 2. LANGUAGE DICTIONARY ---
TRANS = {
"vi": {
"app_name": "PRO DOCUMENT TOOLKIT",
"app_desc": "Xử lý tài liệu thông minh: Chuyển PDF → Word & Phân tích bố cục OCR",
"menu_pdf": "📄 PDF sang Word",
"menu_ocr": "👁️ Phân tích tài liệu (OCR-VL)",
"lang_btn": "🇬🇧 English",
"pdf_head": "Chuyển đổi PDF",
"pdf_sub": "Giữ định dạng gốc, hỗ trợ nhiều file cùng lúc.",
"pdf_label": "Tải lên file PDF (có thể nhiều file)",
"pdf_btn": "🚀 Chuyển đổi",
"pdf_success": "Chuyển đổi thành công!",
"ocr_head": "Phân tích tài liệu & OCR",
"ocr_sub": "Sử dụng PaddleOCRVL để nhận diện văn bản và cấu trúc (Markdown/JSON).",
"ocr_label": "Tải ảnh lên (PNG, JPG, BMP)",
"ocr_btn": "🔍 Phân tích Bố cục",
"ocr_rs_text": "Kết quả Markdown",
"ocr_rs_img": "Hình ảnh gốc",
"err_nofile": "Vui lòng tải file lên!",
"err_ocr": "OCR chưa sẵn sàng (kiểm tra cài đặt PaddleOCR).",
"footer": "© 2024-2026 Developed by Chu Viet Kien. Powered by PaddleOCRVL & Gradio."
},
"en": {
"app_name": "PRO DOCUMENT TOOLKIT",
"app_desc": "Smart Document Processing: PDF to Word & OCR Layout Analysis",
"menu_pdf": "📄 PDF to Word",
"menu_ocr": "👁️ Doc Analysis (OCR-VL)",
"lang_btn": "🇻🇳 Tiếng Việt",
"pdf_head": "PDF Converter",
"pdf_sub": "Preserve original layout, batch support.",
"pdf_label": "Upload PDF files (multiple allowed)",
"pdf_btn": "🚀 Convert",
"pdf_success": "Conversion successful!",
"ocr_head": "Document Analysis & OCR",
"ocr_sub": "Uses PaddleOCRVL to extract text and structure (Markdown/JSON).",
"ocr_label": "Upload Image (PNG, JPG, BMP)",
"ocr_btn": "🔍 Analyze Layout",
"ocr_rs_text": "Markdown Result",
"ocr_rs_img": "Original Image",
"err_nofile": "Please upload a file first!",
"err_ocr": "OCR not available (check PaddleOCR installation).",
"footer": "© 2024-2026 Developed by Chu Viet Kien. Powered by PaddleOCRVL & Gradio."
}
}
# --- 3. PROCESSING LOGIC ---
def convert_pdfs_to_word(pdf_files, lang_code, progress=gr.Progress()):
T = TRANS[lang_code]
if not pdf_files:
gr.Warning(T["err_nofile"])
return None
if not isinstance(pdf_files, list):
pdf_files = [pdf_files]
converted_files = []
try:
progress(0, desc="Starting...")
for idx, pdf_file in enumerate(pdf_files):
file_name = os.path.basename(pdf_file.name)
progress((idx + 1) / len(pdf_files), desc=f"Processing: {file_name}")
docx_name = os.path.splitext(file_name)[0] + ".docx"
cv = Converter(pdf_file.name)
cv.convert(docx_name)
cv.close()
converted_files.append(docx_name)
gr.Info(f"✅ {T['pdf_success']}")
if len(converted_files) == 1:
return converted_files[0]
else:
zip_name = "Converted_Documents.zip"
with zipfile.ZipFile(zip_name, 'w') as zf:
for f in converted_files:
zf.write(f)
return zip_name
except Exception as e:
gr.Error(f"Error: {str(e)}")
return None
def run_ocr_func(image, lang_code):
T = TRANS[lang_code]
# 1. Validation
if not HAS_OCR:
gr.Error(T["err_ocr"])
return None, None, None
if image is None:
gr.Warning(T["err_nofile"])
return None, None, None
# Setup temporary paths to handle file I/O for PaddleOCRVL
session_id = uuid.uuid4().hex
temp_dir = f"temp_ocr_{session_id}"
os.makedirs(temp_dir, exist_ok=True)
input_img_path = os.path.join(temp_dir, "input_image.png")
output_save_path = os.path.join(temp_dir, "output")
try:
gr.Info("⏳ Initializing OCR-VL Pipeline...")
# 2. Save Gradio Image (Numpy) to File (Required by Pipeline)
# Convert RGB (Gradio) to BGR (OpenCV)
img_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
cv2.imwrite(input_img_path, img_bgr)
# 3. Run PaddleOCRVL Logic
# See https://www.paddleocr.ai/latest/version3.x/pipeline_usage/PaddleOCR-VL.html
output = ocr_pipeline.predict(input_img_path)
markdown_text = ""
# 4. Process Results
for res in output:
# res.print() # Optional: print to console logs
res.save_to_json(save_path=output_save_path)
res.save_to_markdown(save_path=output_save_path)
# 5. Retrieve Generated Content
# Find the generated markdown file (name varies based on library version)
md_files = glob.glob(os.path.join(output_save_path, "*.md"))
if md_files:
with open(md_files[0], "r", encoding="utf-8") as f:
markdown_text = f.read()
else:
markdown_text = "Analysis complete, but no markdown file was found."
# 6. Package Results (Zip JSON & Markdown)
zip_output_path = f"OCR_Result_{session_id}.zip"
shutil.make_archive(zip_output_path.replace('.zip', ''), 'zip', output_save_path)
# Return: Markdown text, Original Image (no drawing needed as VL focuses on structure), Zip file
return markdown_text, image, zip_output_path
except Exception as e:
gr.Error(f"OCR Pipeline Error: {str(e)}")
return None, None, None
finally:
# Cleanup temp directory (optional, keeping zip file)
shutil.rmtree(temp_dir, ignore_errors=True)
# --- 4. UI HELPERS ---
def change_lang(lang):
new = "en" if lang == "vi" else "vi"
T = TRANS[new]
return (
new, # lang_state
T["lang_btn"], # btn_lang
gr.update(choices=[T["menu_pdf"], T["menu_ocr"]], value=T["menu_pdf"]), # radio_menu
f"## {T['app_name']}", # txt_title
T["app_desc"], # txt_desc
T["footer"], # txt_footer
f"### {T['pdf_head']}", # pdf_head_md
T["pdf_sub"], # pdf_sub_md
gr.update(label=T["pdf_label"]), # in_pdf label
T["pdf_btn"], # btn_pdf
f"### {T['ocr_head']}", # ocr_head_md
T["ocr_sub"], # ocr_sub_md
gr.update(label=T["ocr_label"]), # in_img label
T["ocr_btn"], # btn_ocr
gr.update(label=T["ocr_rs_text"]), # out_txt label
gr.update(label=T["ocr_rs_img"]) # out_img_viz label
)
def toggle_view(menu_val, lang):
is_pdf = menu_val in [TRANS["vi"]["menu_pdf"], TRANS["en"]["menu_pdf"]]
return gr.update(visible=is_pdf), gr.update(visible=not is_pdf)
# --- 5. THEME ---
theme = gr.themes.Soft(
primary_hue="blue",
neutral_hue="slate",
font=[gr.themes.GoogleFont('Inter'), 'system-ui', 'sans-serif'],
).set(
body_background_fill="#f8fafc",
block_background_fill="white",
block_border_width="1px",
button_primary_background_fill="linear-gradient(90deg, #3b82f6, #2563eb)",
button_primary_text_color="white",
)
# --- 6. INTERFACE ---
with gr.Blocks(title="Pro Document Toolkit") as demo:
lang_state = gr.State("vi")
with gr.Row():
with gr.Column(scale=1, min_width=250):
gr.Markdown("### 🛠️ Dashboard")
btn_lang = gr.Button(TRANS["vi"]["lang_btn"], variant="secondary")
radio_menu = gr.Radio(
choices=[TRANS["vi"]["menu_pdf"], TRANS["vi"]["menu_ocr"]],
value=TRANS["vi"]["menu_pdf"],
label="Function / Chức năng",
type="value"
)
gr.Markdown("---")
gr.Markdown(f"**Status:** 🟢 Online | VL-OCR: {'Ready' if HAS_OCR else 'N/A'}")
with gr.Column(scale=4):
txt_title = gr.Markdown(f"## {TRANS['vi']['app_name']}")
txt_desc = gr.Markdown(TRANS['vi']['app_desc'])
# PDF Tool
with gr.Group(visible=True) as group_pdf:
pdf_head_md = gr.Markdown(f"### {TRANS['vi']['pdf_head']}")
pdf_sub_md = gr.Markdown(TRANS['vi']['pdf_sub'])
with gr.Row(equal_height=True):
with gr.Column():
in_pdf = gr.File(
label=TRANS["vi"]["pdf_label"],
file_types=[".pdf"],
file_count="multiple",
height=300
)
btn_pdf = gr.Button(TRANS["vi"]["pdf_btn"], variant="primary", size="lg")
with gr.Column():
gr.Markdown("### Output")
out_word = gr.File(label="Download DOCX / ZIP", height=250)
# OCR Tool (Updated for VL)
with gr.Group(visible=False) as group_ocr:
ocr_head_md = gr.Markdown(f"### {TRANS['vi']['ocr_head']}")
ocr_sub_md = gr.Markdown(TRANS['vi']['ocr_sub'])
with gr.Row():
with gr.Column(scale=1):
in_img = gr.Image(
label=TRANS["vi"]["ocr_label"],
type="numpy",
height=450,
sources=["upload", "clipboard"]
)
btn_ocr = gr.Button(TRANS["vi"]["ocr_btn"], variant="primary", size="lg")
with gr.Column(scale=1):
with gr.Tabs():
with gr.Tab("Markdown"):
out_txt = gr.Textbox(
label=TRANS["vi"]["ocr_rs_text"],
lines=20,
placeholder="Markdown content will appear here...",
buttons=["copy"]
)
out_file = gr.File(label="Download Full Results (.zip)")
with gr.Tab("Source Image"):
out_img_viz = gr.Image(
label=TRANS["vi"]["ocr_rs_img"],
interactive=False
)
txt_footer = gr.Markdown(TRANS['vi']['footer'])
# EVENTS
radio_menu.change(toggle_view, inputs=[radio_menu, lang_state], outputs=[group_pdf, group_ocr])
btn_pdf.click(convert_pdfs_to_word, inputs=[in_pdf, lang_state], outputs=out_word)
btn_ocr.click(run_ocr_func, inputs=[in_img, lang_state], outputs=[out_txt, out_img_viz, out_file])
btn_lang.click(
change_lang,
inputs=[lang_state],
outputs=[
lang_state, btn_lang, radio_menu,
txt_title, txt_desc, txt_footer,
pdf_head_md, pdf_sub_md, in_pdf, btn_pdf,
ocr_head_md, ocr_sub_md, in_img, btn_ocr,
out_txt, out_img_viz
]
)
if __name__ == "__main__":
demo.queue().launch(
server_name="0.0.0.0",
server_port=7860,
theme=theme,
)