File size: 8,845 Bytes
56d51db 9f6817b 02a63b3 9f6817b 02a63b3 f164e75 02a63b3 7b9a396 3c3f016 233fd7a 3c3f016 f164e75 f4f6b39 3c3f016 f164e75 02a63b3 3c3f016 02a63b3 3c3f016 02a63b3 f633237 095a128 d3f0d33 f633237 02a63b3 d3f0d33 02a63b3 095a128 d3f0d33 02a63b3 233fd7a 3c3f016 7b9a396 4fa9d1e 02a63b3 4fa9d1e 9f6817b 02a63b3 7b9a396 5708279 469e4bb 02a63b3 7b9a396 469e4bb 02a63b3 9f6817b 706aa67 095a128 233fd7a 02a63b3 3c3f016 02a63b3 9f6817b 4fa9d1e 02a63b3 9f6817b 02a63b3 9f6817b 0ea77f8 9f6817b 0ea77f8 3c3f016 095a128 233fd7a 095a128 233fd7a 02a63b3 233fd7a 095a128 02a63b3 095a128 3c3f016 233fd7a f633237 095a128 9f6817b 3c3f016 02a63b3 233fd7a d72652a 02a63b3 233fd7a 02a63b3 639bde7 02a63b3 233fd7a 4fa9d1e 3c3f016 233fd7a 02a63b3 233fd7a 3c3f016 233fd7a 02a63b3 233fd7a 3c3f016 02a63b3 4fa9d1e 02a63b3 233fd7a 02a63b3 3c3f016 233fd7a 7b9a396 02a63b3 7b9a396 3c3f016 0ea77f8 9f6817b 02a63b3 233fd7a 02a63b3 9f6817b 02a63b3 9f6817b 233fd7a 3c3f016 12da6c3 cfc7ec8 233fd7a e8fb7a1 14d33c0 b3f07b4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 | import os
# --- CẤU HÌNH HỆ THỐNG ---
os.environ["FLAGS_use_mkldnn"] = "0"
os.environ["FLAGS_enable_mkldnn"] = "0"
os.environ["DN_ENABLE_MKLDNN"] = "0"
os.environ["CPP_MIN_LOG_LEVEL"] = "3"
import logging
import re
import gradio as gr
from paddleocr import PaddleOCR
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import requests
# Tắt log thừa
logging.getLogger("ppocr").setLevel(logging.WARNING)
print("Đang khởi tạo PaddleOCR (Coordinate Sync Mode)...")
try:
ocr = PaddleOCR(use_textline_orientation=False, use_doc_orientation_classify=False,
use_doc_unwarping=False, lang='ch')
except Exception as e:
print(f"Lỗi khởi tạo: {e}. Chuyển về chế độ mặc định.")
ocr = PaddleOCR(lang='ch')
print("Model đã sẵn sàng!")
# --- TẢI FONT ---
def check_and_download_font():
font_path = "./simfang.ttf"
if not os.path.exists(font_path):
try:
url = "https://github.com/StellarCN/scp_zh/raw/master/fonts/SimFang.ttf"
r = requests.get(url, allow_redirects=True)
with open(font_path, 'wb') as f:
f.write(r.content)
except:
return None
return font_path
FONT_PATH = check_and_download_font()
# --- HÀM VẼ ĐA NĂNG ---
def universal_draw(image, raw_data, font_path):
if image is None: return image
# Đảm bảo image là PIL
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
# Copy để vẽ
canvas = image.copy()
draw = ImageDraw.Draw(canvas)
try:
font_size = 24
font = ImageFont.truetype(font_path, font_size) if font_path else ImageFont.load_default()
except:
font = ImageFont.load_default()
# Hàm parse box
def parse_box(b):
try:
if hasattr(b, 'tolist'): b = b.tolist()
if len(b) > 0 and isinstance(b[0], list): return [tuple(p) for p in b]
if len(b) == 4 and isinstance(b[0], (int, float)):
return [(b[0], b[1]), (b[2], b[1]), (b[2], b[3]), (b[0], b[3])]
return None
except: return None
items_to_draw = []
# Logic tìm box/text
# Ưu tiên cấu trúc PaddleX: rec_texts + dt_polys
processed = False
if isinstance(raw_data, list) and len(raw_data) > 0 and isinstance(raw_data[0], dict):
data_dict = raw_data[0]
texts = data_dict.get('rec_texts')
boxes = data_dict.get('dt_polys', data_dict.get('rec_polys', data_dict.get('dt_boxes')))
if texts and boxes and isinstance(texts, list) and isinstance(boxes, list):
for i in range(min(len(texts), len(boxes))):
txt = texts[i]
box = parse_box(boxes[i])
if box and txt: items_to_draw.append((box, txt))
processed = True
# Fallback Logic
if not processed:
def hunt(data):
if isinstance(data, dict):
box = None; text = None
for k in ['points', 'box', 'dt_boxes', 'poly']:
if k in data: box = parse_box(data[k]); break
for k in ['transcription', 'text', 'rec_text', 'label']:
if k in data: text = data[k]; break
if box and text: items_to_draw.append((box, text)); return
for v in data.values(): hunt(v)
elif isinstance(data, (list, tuple)):
if len(data) == 2 and isinstance(data[0], list) and len(data[0]) == 4:
box = parse_box(data[0])
txt_obj = data[1]
text = txt_obj[0] if isinstance(txt_obj, (list, tuple)) else txt_obj
if box and isinstance(text, str): items_to_draw.append((box, text)); return
for item in data: hunt(item)
hunt(raw_data)
# Vẽ
for box, txt in items_to_draw:
try:
# Vẽ khung đỏ
draw.polygon(box, outline="red", width=3)
# Vẽ chữ
txt_x, txt_y = box[0]
if hasattr(draw, "textbbox"):
text_bbox = draw.textbbox((txt_x, txt_y), txt, font=font, anchor="lb")
draw.rectangle(text_bbox, fill="red")
draw.text((txt_x, txt_y), txt, fill="white", font=font, anchor="lb")
else:
draw.text((txt_x, txt_y - font_size), txt, fill="white", font=font)
except: continue
return canvas
# --- HÀM XỬ LÝ TEXT ---
def deep_extract_text(data):
found_texts = []
if isinstance(data, str):
if len(data.strip()) > 0: return [data]
return []
if isinstance(data, (list, tuple)):
for item in data: found_texts.extend(deep_extract_text(item))
elif isinstance(data, dict):
for val in data.values(): found_texts.extend(deep_extract_text(val))
elif hasattr(data, '__dict__'): found_texts.extend(deep_extract_text(data.__dict__))
return found_texts
def clean_text_result(text_list):
cleaned = []
block_list = ['min', 'max', 'general', 'header', 'footer', 'structure']
for t in text_list:
t = t.strip()
if len(t) < 2 and not any(u'\u4e00' <= c <= u'\u9fff' for c in t): continue
if t.lower().endswith(('.ttf', '.json', '.pdparams', '.yml', '.log')): continue
if t.lower() in block_list: continue
if not re.search(r'[\w\u4e00-\u9fff]', t): continue
cleaned.append(t)
return cleaned
# --- MAIN PREDICT ---
def predict(image):
if image is None: return None, "Chưa có ảnh.", "No Data"
try:
# Chuẩn bị ảnh đầu vào
original_pil = image.copy() if isinstance(image, Image.Image) else Image.fromarray(image).copy()
image_np = np.array(image)
# 1. OCR
# Chuyển sang dùng predict() để sửa DeprecationWarning và tương thích tốt hơn với Model v5
raw_result = ocr.predict(image_np)
# Đảm bảo kết quả là list để tương thích với logic vẽ phía sau (nếu predict trả về generator/single obj)
if not isinstance(raw_result, list):
raw_result = [raw_result]
# 2. XỬ LÝ ẢNH ĐỂ VẼ (KEY FIX: Lấy ảnh từ Preprocessor nếu có)
target_image_for_drawing = original_pil
# Kiểm tra xem Paddle có chỉnh sửa ảnh không (dựa vào key 'doc_preprocessor_res')
if isinstance(raw_result, list) and len(raw_result) > 0 and isinstance(raw_result[0], dict):
if 'doc_preprocessor_res' in raw_result[0]:
proc_res = raw_result[0]['doc_preprocessor_res']
# Nếu có ảnh đầu ra đã chỉnh sửa (output_img)
if 'output_img' in proc_res:
print("Phát hiện ảnh đã qua xử lý hình học. Đang đồng bộ tọa độ...")
numpy_img = proc_res['output_img']
target_image_for_drawing = Image.fromarray(numpy_img)
# 3. Vẽ lên ảnh ĐÚNG (Target Image)
annotated_image = universal_draw(target_image_for_drawing, raw_result, FONT_PATH)
# 4. Xử lý Text
all_texts = deep_extract_text(raw_result)
final_texts = clean_text_result(all_texts)
text_output = "\n".join(final_texts) if final_texts else "Không tìm thấy văn bản."
# Debug Info
debug_str = str(raw_result)[:1000]
debug_info = f"Used Image Source: {'Preprocessed' if target_image_for_drawing != original_pil else 'Original'}\nData Preview:\n{debug_str}..."
return annotated_image, text_output, debug_info
except Exception as e:
import traceback
return image, f"Lỗi: {str(e)}", traceback.format_exc()
# --- GIAO DIỆN ---
with gr.Blocks(title="PaddleOCR Perfect Overlay") as iface:
gr.Markdown("## PaddleOCR Chinese - High Precision Overlay")
with gr.Row():
with gr.Column():
input_img = gr.Image(type="pil", label="Input Image")
submit_btn = gr.Button("RUN OCR", variant="primary")
with gr.Column():
with gr.Tabs():
with gr.TabItem("🖼️ Kết quả Khớp Tọa Độ"):
output_img = gr.Image(type="pil", label="Overlay Result")
with gr.TabItem("📝 Văn bản"):
output_txt = gr.Textbox(label="Text Content", lines=15)
with gr.TabItem("🐞 Debug"):
output_debug = gr.Textbox(label="Debug Info", lines=15)
submit_btn.click(
fn=predict,
inputs=input_img,
outputs=[output_img, output_txt, output_debug],
api_name="predict"
)
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860) |