Spaces:

ranbac
/

PaddleOCR

Sleeping

App Files Files Community

PaddleOCR / app.py

ranbac

Update app.py

cfc7ec8 verified 15 days ago

raw

history blame contribute delete

8.85 kB

	import os

	# --- CẤU HÌNH HỆ THỐNG ---
	os.environ["FLAGS_use_mkldnn"] = "0"
	os.environ["FLAGS_enable_mkldnn"] = "0"
	os.environ["DN_ENABLE_MKLDNN"] = "0"
	os.environ["CPP_MIN_LOG_LEVEL"] = "3"

	import logging
	import re
	import gradio as gr
	from paddleocr import PaddleOCR
	from PIL import Image, ImageDraw, ImageFont
	import numpy as np
	import requests

	# Tắt log thừa
	logging.getLogger("ppocr").setLevel(logging.WARNING)

	print("Đang khởi tạo PaddleOCR (Coordinate Sync Mode)...")

	try:
	ocr = PaddleOCR(use_textline_orientation=False, use_doc_orientation_classify=False,
	use_doc_unwarping=False, lang='ch')
	except Exception as e:
	print(f"Lỗi khởi tạo: {e}. Chuyển về chế độ mặc định.")
	ocr = PaddleOCR(lang='ch')

	print("Model đã sẵn sàng!")

	# --- TẢI FONT ---
	def check_and_download_font():
	font_path = "./simfang.ttf"
	if not os.path.exists(font_path):
	try:
	url = "https://github.com/StellarCN/scp_zh/raw/master/fonts/SimFang.ttf"
	r = requests.get(url, allow_redirects=True)
	with open(font_path, 'wb') as f:
	f.write(r.content)
	except:
	return None
	return font_path

	FONT_PATH = check_and_download_font()

	# --- HÀM VẼ ĐA NĂNG ---
	def universal_draw(image, raw_data, font_path):
	if image is None: return image

	# Đảm bảo image là PIL
	if isinstance(image, np.ndarray):
	image = Image.fromarray(image)

	# Copy để vẽ
	canvas = image.copy()
	draw = ImageDraw.Draw(canvas)

	try:
	font_size = 24
	font = ImageFont.truetype(font_path, font_size) if font_path else ImageFont.load_default()
	except:
	font = ImageFont.load_default()

	# Hàm parse box
	def parse_box(b):
	try:
	if hasattr(b, 'tolist'): b = b.tolist()
	if len(b) > 0 and isinstance(b[0], list): return [tuple(p) for p in b]
	if len(b) == 4 and isinstance(b[0], (int, float)):
	return [(b[0], b[1]), (b[2], b[1]), (b[2], b[3]), (b[0], b[3])]
	return None
	except: return None

	items_to_draw = []

	# Logic tìm box/text
	# Ưu tiên cấu trúc PaddleX: rec_texts + dt_polys
	processed = False
	if isinstance(raw_data, list) and len(raw_data) > 0 and isinstance(raw_data[0], dict):
	data_dict = raw_data[0]
	texts = data_dict.get('rec_texts')
	boxes = data_dict.get('dt_polys', data_dict.get('rec_polys', data_dict.get('dt_boxes')))

	if texts and boxes and isinstance(texts, list) and isinstance(boxes, list):
	for i in range(min(len(texts), len(boxes))):
	txt = texts[i]
	box = parse_box(boxes[i])
	if box and txt: items_to_draw.append((box, txt))
	processed = True

	# Fallback Logic
	if not processed:
	def hunt(data):
	if isinstance(data, dict):
	box = None; text = None
	for k in ['points', 'box', 'dt_boxes', 'poly']:
	if k in data: box = parse_box(data[k]); break
	for k in ['transcription', 'text', 'rec_text', 'label']:
	if k in data: text = data[k]; break
	if box and text: items_to_draw.append((box, text)); return
	for v in data.values(): hunt(v)
	elif isinstance(data, (list, tuple)):
	if len(data) == 2 and isinstance(data[0], list) and len(data[0]) == 4:
	box = parse_box(data[0])
	txt_obj = data[1]
	text = txt_obj[0] if isinstance(txt_obj, (list, tuple)) else txt_obj
	if box and isinstance(text, str): items_to_draw.append((box, text)); return
	for item in data: hunt(item)
	hunt(raw_data)

	# Vẽ
	for box, txt in items_to_draw:
	try:
	# Vẽ khung đỏ
	draw.polygon(box, outline="red", width=3)
	# Vẽ chữ
	txt_x, txt_y = box[0]
	if hasattr(draw, "textbbox"):
	text_bbox = draw.textbbox((txt_x, txt_y), txt, font=font, anchor="lb")
	draw.rectangle(text_bbox, fill="red")
	draw.text((txt_x, txt_y), txt, fill="white", font=font, anchor="lb")
	else:
	draw.text((txt_x, txt_y - font_size), txt, fill="white", font=font)
	except: continue

	return canvas

	# --- HÀM XỬ LÝ TEXT ---
	def deep_extract_text(data):
	found_texts = []
	if isinstance(data, str):
	if len(data.strip()) > 0: return [data]
	return []
	if isinstance(data, (list, tuple)):
	for item in data: found_texts.extend(deep_extract_text(item))
	elif isinstance(data, dict):
	for val in data.values(): found_texts.extend(deep_extract_text(val))
	elif hasattr(data, '__dict__'): found_texts.extend(deep_extract_text(data.__dict__))
	return found_texts

	def clean_text_result(text_list):
	cleaned = []
	block_list = ['min', 'max', 'general', 'header', 'footer', 'structure']
	for t in text_list:
	t = t.strip()
	if len(t) < 2 and not any(u'\u4e00' <= c <= u'\u9fff' for c in t): continue
	if t.lower().endswith(('.ttf', '.json', '.pdparams', '.yml', '.log')): continue
	if t.lower() in block_list: continue
	if not re.search(r'[\w\u4e00-\u9fff]', t): continue
	cleaned.append(t)
	return cleaned

	# --- MAIN PREDICT ---
	def predict(image):
	if image is None: return None, "Chưa có ảnh.", "No Data"

	try:
	# Chuẩn bị ảnh đầu vào
	original_pil = image.copy() if isinstance(image, Image.Image) else Image.fromarray(image).copy()
	image_np = np.array(image)

	# 1. OCR
	# Chuyển sang dùng predict() để sửa DeprecationWarning và tương thích tốt hơn với Model v5
	raw_result = ocr.predict(image_np)

	# Đảm bảo kết quả là list để tương thích với logic vẽ phía sau (nếu predict trả về generator/single obj)
	if not isinstance(raw_result, list):
	raw_result = [raw_result]

	# 2. XỬ LÝ ẢNH ĐỂ VẼ (KEY FIX: Lấy ảnh từ Preprocessor nếu có)
	target_image_for_drawing = original_pil

	# Kiểm tra xem Paddle có chỉnh sửa ảnh không (dựa vào key 'doc_preprocessor_res')
	if isinstance(raw_result, list) and len(raw_result) > 0 and isinstance(raw_result[0], dict):
	if 'doc_preprocessor_res' in raw_result[0]:
	proc_res = raw_result[0]['doc_preprocessor_res']
	# Nếu có ảnh đầu ra đã chỉnh sửa (output_img)
	if 'output_img' in proc_res:
	print("Phát hiện ảnh đã qua xử lý hình học. Đang đồng bộ tọa độ...")
	numpy_img = proc_res['output_img']
	target_image_for_drawing = Image.fromarray(numpy_img)

	# 3. Vẽ lên ảnh ĐÚNG (Target Image)
	annotated_image = universal_draw(target_image_for_drawing, raw_result, FONT_PATH)

	# 4. Xử lý Text
	all_texts = deep_extract_text(raw_result)
	final_texts = clean_text_result(all_texts)
	text_output = "\n".join(final_texts) if final_texts else "Không tìm thấy văn bản."

	# Debug Info
	debug_str = str(raw_result)[:1000]
	debug_info = f"Used Image Source: {'Preprocessed' if target_image_for_drawing != original_pil else 'Original'}\nData Preview:\n{debug_str}..."

	return annotated_image, text_output, debug_info

	except Exception as e:
	import traceback
	return image, f"Lỗi: {str(e)}", traceback.format_exc()

	# --- GIAO DIỆN ---
	with gr.Blocks(title="PaddleOCR Perfect Overlay") as iface:
	gr.Markdown("## PaddleOCR Chinese - High Precision Overlay")

	with gr.Row():
	with gr.Column():
	input_img = gr.Image(type="pil", label="Input Image")
	submit_btn = gr.Button("RUN OCR", variant="primary")

	with gr.Column():
	with gr.Tabs():
	with gr.TabItem("🖼️ Kết quả Khớp Tọa Độ"):
	output_img = gr.Image(type="pil", label="Overlay Result")
	with gr.TabItem("📝 Văn bản"):
	output_txt = gr.Textbox(label="Text Content", lines=15)
	with gr.TabItem("🐞 Debug"):
	output_debug = gr.Textbox(label="Debug Info", lines=15)

	submit_btn.click(
	fn=predict,
	inputs=input_img,
	outputs=[output_img, output_txt, output_debug],
	api_name="predict"
	)

	if __name__ == "__main__":
	iface.launch(server_name="0.0.0.0", server_port=7860)