| | import os |
| |
|
| | |
| | os.environ["FLAGS_use_mkldnn"] = "0" |
| | os.environ["FLAGS_enable_mkldnn"] = "0" |
| | os.environ["DN_ENABLE_MKLDNN"] = "0" |
| | os.environ["CPP_MIN_LOG_LEVEL"] = "3" |
| |
|
| | import logging |
| | import re |
| | import gradio as gr |
| | from paddleocr import PaddleOCR |
| | from PIL import Image, ImageDraw, ImageFont |
| | import numpy as np |
| | import requests |
| |
|
| | |
| | logging.getLogger("ppocr").setLevel(logging.WARNING) |
| |
|
| | print("Đang khởi tạo PaddleOCR (Coordinate Sync Mode)...") |
| |
|
| | try: |
| | ocr = PaddleOCR(use_textline_orientation=False, use_doc_orientation_classify=False, |
| | use_doc_unwarping=False, lang='ch') |
| | except Exception as e: |
| | print(f"Lỗi khởi tạo: {e}. Chuyển về chế độ mặc định.") |
| | ocr = PaddleOCR(lang='ch') |
| |
|
| | print("Model đã sẵn sàng!") |
| |
|
| | |
| | def check_and_download_font(): |
| | font_path = "./simfang.ttf" |
| | if not os.path.exists(font_path): |
| | try: |
| | url = "https://github.com/StellarCN/scp_zh/raw/master/fonts/SimFang.ttf" |
| | r = requests.get(url, allow_redirects=True) |
| | with open(font_path, 'wb') as f: |
| | f.write(r.content) |
| | except: |
| | return None |
| | return font_path |
| |
|
| | FONT_PATH = check_and_download_font() |
| |
|
| | |
| | def universal_draw(image, raw_data, font_path): |
| | if image is None: return image |
| | |
| | |
| | if isinstance(image, np.ndarray): |
| | image = Image.fromarray(image) |
| | |
| | |
| | canvas = image.copy() |
| | draw = ImageDraw.Draw(canvas) |
| | |
| | try: |
| | font_size = 24 |
| | font = ImageFont.truetype(font_path, font_size) if font_path else ImageFont.load_default() |
| | except: |
| | font = ImageFont.load_default() |
| |
|
| | |
| | def parse_box(b): |
| | try: |
| | if hasattr(b, 'tolist'): b = b.tolist() |
| | if len(b) > 0 and isinstance(b[0], list): return [tuple(p) for p in b] |
| | if len(b) == 4 and isinstance(b[0], (int, float)): |
| | return [(b[0], b[1]), (b[2], b[1]), (b[2], b[3]), (b[0], b[3])] |
| | return None |
| | except: return None |
| |
|
| | items_to_draw = [] |
| |
|
| | |
| | |
| | processed = False |
| | if isinstance(raw_data, list) and len(raw_data) > 0 and isinstance(raw_data[0], dict): |
| | data_dict = raw_data[0] |
| | texts = data_dict.get('rec_texts') |
| | boxes = data_dict.get('dt_polys', data_dict.get('rec_polys', data_dict.get('dt_boxes'))) |
| | |
| | if texts and boxes and isinstance(texts, list) and isinstance(boxes, list): |
| | for i in range(min(len(texts), len(boxes))): |
| | txt = texts[i] |
| | box = parse_box(boxes[i]) |
| | if box and txt: items_to_draw.append((box, txt)) |
| | processed = True |
| |
|
| | |
| | if not processed: |
| | def hunt(data): |
| | if isinstance(data, dict): |
| | box = None; text = None |
| | for k in ['points', 'box', 'dt_boxes', 'poly']: |
| | if k in data: box = parse_box(data[k]); break |
| | for k in ['transcription', 'text', 'rec_text', 'label']: |
| | if k in data: text = data[k]; break |
| | if box and text: items_to_draw.append((box, text)); return |
| | for v in data.values(): hunt(v) |
| | elif isinstance(data, (list, tuple)): |
| | if len(data) == 2 and isinstance(data[0], list) and len(data[0]) == 4: |
| | box = parse_box(data[0]) |
| | txt_obj = data[1] |
| | text = txt_obj[0] if isinstance(txt_obj, (list, tuple)) else txt_obj |
| | if box and isinstance(text, str): items_to_draw.append((box, text)); return |
| | for item in data: hunt(item) |
| | hunt(raw_data) |
| |
|
| | |
| | for box, txt in items_to_draw: |
| | try: |
| | |
| | draw.polygon(box, outline="red", width=3) |
| | |
| | txt_x, txt_y = box[0] |
| | if hasattr(draw, "textbbox"): |
| | text_bbox = draw.textbbox((txt_x, txt_y), txt, font=font, anchor="lb") |
| | draw.rectangle(text_bbox, fill="red") |
| | draw.text((txt_x, txt_y), txt, fill="white", font=font, anchor="lb") |
| | else: |
| | draw.text((txt_x, txt_y - font_size), txt, fill="white", font=font) |
| | except: continue |
| |
|
| | return canvas |
| |
|
| | |
| | def deep_extract_text(data): |
| | found_texts = [] |
| | if isinstance(data, str): |
| | if len(data.strip()) > 0: return [data] |
| | return [] |
| | if isinstance(data, (list, tuple)): |
| | for item in data: found_texts.extend(deep_extract_text(item)) |
| | elif isinstance(data, dict): |
| | for val in data.values(): found_texts.extend(deep_extract_text(val)) |
| | elif hasattr(data, '__dict__'): found_texts.extend(deep_extract_text(data.__dict__)) |
| | return found_texts |
| |
|
| | def clean_text_result(text_list): |
| | cleaned = [] |
| | block_list = ['min', 'max', 'general', 'header', 'footer', 'structure'] |
| | for t in text_list: |
| | t = t.strip() |
| | if len(t) < 2 and not any(u'\u4e00' <= c <= u'\u9fff' for c in t): continue |
| | if t.lower().endswith(('.ttf', '.json', '.pdparams', '.yml', '.log')): continue |
| | if t.lower() in block_list: continue |
| | if not re.search(r'[\w\u4e00-\u9fff]', t): continue |
| | cleaned.append(t) |
| | return cleaned |
| |
|
| | |
| | def predict(image): |
| | if image is None: return None, "Chưa có ảnh.", "No Data" |
| |
|
| | try: |
| | |
| | original_pil = image.copy() if isinstance(image, Image.Image) else Image.fromarray(image).copy() |
| | image_np = np.array(image) |
| | |
| | |
| | |
| | raw_result = ocr.predict(image_np) |
| | |
| | |
| | if not isinstance(raw_result, list): |
| | raw_result = [raw_result] |
| | |
| | |
| | target_image_for_drawing = original_pil |
| | |
| | |
| | if isinstance(raw_result, list) and len(raw_result) > 0 and isinstance(raw_result[0], dict): |
| | if 'doc_preprocessor_res' in raw_result[0]: |
| | proc_res = raw_result[0]['doc_preprocessor_res'] |
| | |
| | if 'output_img' in proc_res: |
| | print("Phát hiện ảnh đã qua xử lý hình học. Đang đồng bộ tọa độ...") |
| | numpy_img = proc_res['output_img'] |
| | target_image_for_drawing = Image.fromarray(numpy_img) |
| |
|
| | |
| | annotated_image = universal_draw(target_image_for_drawing, raw_result, FONT_PATH) |
| |
|
| | |
| | all_texts = deep_extract_text(raw_result) |
| | final_texts = clean_text_result(all_texts) |
| | text_output = "\n".join(final_texts) if final_texts else "Không tìm thấy văn bản." |
| |
|
| | |
| | debug_str = str(raw_result)[:1000] |
| | debug_info = f"Used Image Source: {'Preprocessed' if target_image_for_drawing != original_pil else 'Original'}\nData Preview:\n{debug_str}..." |
| |
|
| | return annotated_image, text_output, debug_info |
| |
|
| | except Exception as e: |
| | import traceback |
| | return image, f"Lỗi: {str(e)}", traceback.format_exc() |
| |
|
| | |
| | with gr.Blocks(title="PaddleOCR Perfect Overlay") as iface: |
| | gr.Markdown("## PaddleOCR Chinese - High Precision Overlay") |
| | |
| | with gr.Row(): |
| | with gr.Column(): |
| | input_img = gr.Image(type="pil", label="Input Image") |
| | submit_btn = gr.Button("RUN OCR", variant="primary") |
| | |
| | with gr.Column(): |
| | with gr.Tabs(): |
| | with gr.TabItem("🖼️ Kết quả Khớp Tọa Độ"): |
| | output_img = gr.Image(type="pil", label="Overlay Result") |
| | with gr.TabItem("📝 Văn bản"): |
| | output_txt = gr.Textbox(label="Text Content", lines=15) |
| | with gr.TabItem("🐞 Debug"): |
| | output_debug = gr.Textbox(label="Debug Info", lines=15) |
| |
|
| | submit_btn.click( |
| | fn=predict, |
| | inputs=input_img, |
| | outputs=[output_img, output_txt, output_debug], |
| | api_name="predict" |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | iface.launch(server_name="0.0.0.0", server_port=7860) |