Spaces:

ranbac
/

PaddleOCR

Sleeping

App Files Files Community

ranbac commited on 24 days ago

Commit

469e4bb

verified ·

1 Parent(s): ac29fc5

Update app.py

Browse files

Files changed (1) hide show

app.py +180 -136

app.py CHANGED Viewed

@@ -1,176 +1,220 @@
 import os
-import cv2
-import numpy as np
-import requests
-import gradio as gr
-from paddleocr import PaddleOCR
-from PIL import Image, ImageDraw, ImageFont
-# --- 1. CẤU HÌNH HỆ THỐNG ---
 os.environ["FLAGS_use_mkldnn"] = "0"
 os.environ["FLAGS_enable_mkldnn"] = "0"
 os.environ["CPP_MIN_LOG_LEVEL"] = "3"
-# --- 2. TẢI FONT CHỮ TRUNG QUỐC ---
 def check_and_download_font():
     font_path = "./simfang.ttf"
     if not os.path.exists(font_path):
-        print("Đang tải font SimFang để hiển thị tiếng Trung...")
         try:
             url = "https://github.com/StellarCN/scp_zh/raw/master/fonts/SimFang.ttf"
             r = requests.get(url, allow_redirects=True)
             with open(font_path, 'wb') as f:
                 f.write(r.content)
-            print("Đã tải font thành công!")
-        except Exception as e:
-            print(f"Lỗi tải font: {e}")
             return None
     return font_path
 FONT_PATH = check_and_download_font()
-# --- 3. KHỞI TẠO PADDLE OCR (TỐI ƯU CHO CHỮ VIẾT TAY) ---
-print("Đang khởi tạo PaddleOCR...")
-# det_db_thresh=0.3: Giảm ngưỡng để bắt nét mỏng
-# use_angle_cls=True: Tự động xoay ảnh nếu chữ bị nghiêng
-ocr = PaddleOCR(
-    use_angle_cls=True,
-    lang='ch',
-    det_db_thresh=0.3,
-    det_db_box_thresh=0.5,
-)
-print("Model đã sẵn sàng!")
-# --- 4. HÀM XỬ LÝ ẢNH NÂNG CAO ---
-def preprocess_red_handwriting(pil_image):
-    """
-    Kỹ thuật tách mực đỏ trên giấy kẻ:
-    - Mực đỏ hấp thụ ánh sáng xanh lá (Green).
-    - Trong kênh Green, mực đỏ sẽ rất tối (gần đen), còn nền trắng/kẻ xanh sẽ sáng.
-    """
-    # Convert PIL sang OpenCV
-    img = np.array(pil_image)
-    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
-    # 1. Tách lấy kênh Green (Kênh hiệu quả nhất với mực đỏ)
-    b, g, r = cv2.split(img)
-    # 2. Tăng tương phản cục bộ (CLAHE) để làm rõ nét chữ
-    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
-    enhanced = clahe.apply(g)
-    # 3. Nhị phân hóa (Adaptive Threshold) để loại bỏ nền giấy và dòng kẻ mờ
-    # Block size 21, C 10 giúp giữ lại nét chữ mà xóa nhiễu nền
-    binary = cv2.adaptiveThreshold(
-        enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-        cv2.THRESH_BINARY, 21, 10
-    )
-    # 4. Khử nhiễu đốm nhỏ (Denoise)
-    clean = cv2.fastNlMeansDenoising(binary, None, 10, 7, 21)
-    # Chuyển lại sang RGB để đưa vào Model
-    processed_pil = Image.fromarray(cv2.cvtColor(clean, cv2.COLOR_GRAY2RGB))
-    return processed_pil
-# --- 5. HÀM DỰ ĐOÁN CHÍNH ---
 def predict(image):
-    if image is None:
-        return None, None, "Vui lòng tải ảnh lên.", ""
     try:
-        # Bước 1: Xử lý ảnh để làm rõ chữ
-        processed_image = preprocess_red_handwriting(image)
-        # Bước 2: Chạy OCR trên ảnh ĐÃ XỬ LÝ
-        # Chuyển sang numpy array cho Paddle
-        img_np = np.array(processed_image)
-        result = ocr.ocr(img_np, cls=True)
-        # Bước 3: Vẽ kết quả lên ảnh GỐC (để người dùng dễ đối chiếu)
-        draw_img = image.copy()
-        draw = ImageDraw.Draw(draw_img)
-        try:
-            font = ImageFont.truetype(FONT_PATH, 24) if FONT_PATH else ImageFont.load_default()
-        except:
-            font = ImageFont.load_default()
-        texts_output = []
-        raw_data_log = []
-        if result and result[0]:
-            # Sắp xếp kết quả từ trên xuống dưới theo tọa độ Y
-            # result[0] là list các box. Mỗi item: [ [[x1,y1],...], (text, confidence) ]
-            sorted_res = sorted(result[0], key=lambda x: x[0][0][1])
-            for line in sorted_res:
-                box = line[0]        # Tọa độ 4 điểm
-                txt_obj = line[1]    # (Text, Score)
-                text_content = txt_obj[0]
-                score = txt_obj[1]
-                # Log debug
-                raw_data_log.append(f"Confidence: {score:.2f} | Text: {text_content}")
-                # Chỉ lấy kết quả có độ tin cậy > 0.5 để lọc rác
-                if score > 0.5:
-                    texts_output.append(text_content)
-                    # Vẽ khung và chữ
-                    poly = [(p[0], p[1]) for p in box]
-                    draw.polygon(poly, outline="red", width=2)
-                    # Vẽ nền cho chữ để dễ đọc
-                    if hasattr(draw, "textbbox"):
-                        bbox = draw.textbbox((poly[0][0], poly[0][1]-25), text_content, font=font)
-                        draw.rectangle(bbox, fill="red")
-                    draw.text((poly[0][0], poly[0][1]-25), text_content, fill="white", font=font)
-        else:
-            texts_output.append("Không tìm thấy văn bản nào.")
-        final_text = "\n".join(texts_output)
-        debug_info = "\n".join(raw_data_log)
-        return draw_img, processed_image, final_text, debug_info
     except Exception as e:
         import traceback
-        return image, image, f"Lỗi hệ thống: {str(e)}", traceback.format_exc()
-# --- 6. GIAO DIỆN GRADIO ---
-css = """
-.container { max-width: 1200px; margin: auto; }
-"""
-with gr.Blocks(css=css, title="Handwriting OCR Pro") as iface:
-    gr.Markdown("# 🧧 AI Nhận Diện Chữ Viết Tay Tiếng Trung (Bản Tối Ưu)")
-    gr.Markdown("Hệ thống tối ưu riêng cho **Mực đỏ** trên **Giấy kẻ ngang**.")
-    with gr.Row():
-        with gr.Column(scale=1):
-            input_img = gr.Image(type="pil", label="Tải ảnh gốc lên")
-            run_btn = gr.Button("🚀 BẮT ĐẦU NHẬN DIỆN", variant="primary", size="lg")
-        with gr.Column(scale=1):
-            with gr.Tabs():
-                with gr.TabItem("🖼️ Kết quả Overlay"):
-                    output_overlay = gr.Image(type="pil", label="Ảnh gốc + Chữ nhận diện")
-                with gr.TabItem("🌑 Ảnh đã xử lý (AI Vision)"):
-                    output_processed = gr.Image(type="pil", label="Cách AI nhìn thấy ảnh này")
-                    gr.Markdown("*Đây là ảnh sau khi lọc bỏ dòng kẻ và tách mực đỏ.*")
     with gr.Row():
         with gr.Column():
-            output_text = gr.Textbox(label="📄 Văn bản trích xuất", lines=10, show_copy_button=True)
         with gr.Column():
-            output_debug = gr.Textbox(label="🐞 Debug Log (Độ tin cậy)", lines=10)
-    run_btn.click(
         fn=predict,
         inputs=input_img,
-        outputs=[output_overlay, output_processed, output_text, output_debug]
     )
 if __name__ == "__main__":

 import os
+# --- CẤU HÌNH HỆ THỐNG ---
 os.environ["FLAGS_use_mkldnn"] = "0"
 os.environ["FLAGS_enable_mkldnn"] = "0"
+os.environ["DN_ENABLE_MKLDNN"] = "0"
 os.environ["CPP_MIN_LOG_LEVEL"] = "3"
+import logging
+import re
+import gradio as gr
+from paddleocr import PaddleOCR
+from PIL import Image, ImageDraw, ImageFont
+import numpy as np
+import requests
+# Tắt log thừa
+logging.getLogger("ppocr").setLevel(logging.WARNING)
+print("Đang khởi tạo PaddleOCR (Coordinate Sync Mode)...")
+try:
+    ocr = PaddleOCR(use_textline_orientation=True, use_doc_orientation_classify=False,
+        use_doc_unwarping=False, lang='ch')
+except Exception as e:
+    print(f"Lỗi khởi tạo: {e}. Chuyển về chế độ mặc định.")
+    ocr = PaddleOCR(lang='ch')
+print("Model đã sẵn sàng!")
+# --- TẢI FONT ---
 def check_and_download_font():
     font_path = "./simfang.ttf"
     if not os.path.exists(font_path):
         try:
             url = "https://github.com/StellarCN/scp_zh/raw/master/fonts/SimFang.ttf"
             r = requests.get(url, allow_redirects=True)
             with open(font_path, 'wb') as f:
                 f.write(r.content)
+        except:
             return None
     return font_path
 FONT_PATH = check_and_download_font()
+# --- HÀM VẼ ĐA NĂNG ---
+def universal_draw(image, raw_data, font_path):
+    if image is None: return image
+    # Đảm bảo image là PIL
+    if isinstance(image, np.ndarray):
+        image = Image.fromarray(image)
+    # Copy để vẽ
+    canvas = image.copy()
+    draw = ImageDraw.Draw(canvas)
+    try:
+        font_size = 24
+        font = ImageFont.truetype(font_path, font_size) if font_path else ImageFont.load_default()
+    except:
+        font = ImageFont.load_default()
+    # Hàm parse box
+    def parse_box(b):
+        try:
+            if hasattr(b, 'tolist'): b = b.tolist()
+            if len(b) > 0 and isinstance(b[0], list): return [tuple(p) for p in b]
+            if len(b) == 4 and isinstance(b[0], (int, float)):
+                 return [(b[0], b[1]), (b[2], b[1]), (b[2], b[3]), (b[0], b[3])]
+            return None
+        except: return None
+    items_to_draw = []
+    # Logic tìm box/text
+    # Ưu tiên cấu trúc PaddleX: rec_texts + dt_polys
+    processed = False
+    if isinstance(raw_data, list) and len(raw_data) > 0 and isinstance(raw_data[0], dict):
+        data_dict = raw_data[0]
+        texts = data_dict.get('rec_texts')
+        boxes = data_dict.get('dt_polys', data_dict.get('rec_polys', data_dict.get('dt_boxes')))
+        if texts and boxes and isinstance(texts, list) and isinstance(boxes, list):
+            for i in range(min(len(texts), len(boxes))):
+                txt = texts[i]
+                box = parse_box(boxes[i])
+                if box and txt: items_to_draw.append((box, txt))
+            processed = True
+    # Fallback Logic
+    if not processed:
+        def hunt(data):
+            if isinstance(data, dict):
+                box = None; text = None
+                for k in ['points', 'box', 'dt_boxes', 'poly']:
+                    if k in data: box = parse_box(data[k]); break
+                for k in ['transcription', 'text', 'rec_text', 'label']:
+                    if k in data: text = data[k]; break
+                if box and text: items_to_draw.append((box, text)); return
+                for v in data.values(): hunt(v)
+            elif isinstance(data, (list, tuple)):
+                if len(data) == 2 and isinstance(data[0], list) and len(data[0]) == 4:
+                    box = parse_box(data[0])
+                    txt_obj = data[1]
+                    text = txt_obj[0] if isinstance(txt_obj, (list, tuple)) else txt_obj
+                    if box and isinstance(text, str): items_to_draw.append((box, text)); return
+                for item in data: hunt(item)
+        hunt(raw_data)
+    # Vẽ
+    for box, txt in items_to_draw:
+        try:
+            # Vẽ khung đỏ
+            draw.polygon(box, outline="red", width=3)
+            # Vẽ chữ
+            txt_x, txt_y = box[0]
+            if hasattr(draw, "textbbox"):
+                text_bbox = draw.textbbox((txt_x, txt_y), txt, font=font, anchor="lb")
+                draw.rectangle(text_bbox, fill="red")
+                draw.text((txt_x, txt_y), txt, fill="white", font=font, anchor="lb")
+            else:
+                draw.text((txt_x, txt_y - font_size), txt, fill="white", font=font)
+        except: continue
+    return canvas
+# --- HÀM XỬ LÝ TEXT ---
+def deep_extract_text(data):
+    found_texts = []
+    if isinstance(data, str):
+        if len(data.strip()) > 0: return [data]
+        return []
+    if isinstance(data, (list, tuple)):
+        for item in data: found_texts.extend(deep_extract_text(item))
+    elif isinstance(data, dict):
+        for val in data.values(): found_texts.extend(deep_extract_text(val))
+    elif hasattr(data, '__dict__'): found_texts.extend(deep_extract_text(data.__dict__))
+    return found_texts
+def clean_text_result(text_list):
+    cleaned = []
+    block_list = ['min', 'max', 'general', 'header', 'footer', 'structure']
+    for t in text_list:
+        t = t.strip()
+        if len(t) < 2 and not any(u'\u4e00' <= c <= u'\u9fff' for c in t): continue
+        if t.lower().endswith(('.ttf', '.json', '.pdparams', '.yml', '.log')): continue
+        if t.lower() in block_list: continue
+        if not re.search(r'[\w\u4e00-\u9fff]', t): continue
+        cleaned.append(t)
+    return cleaned
+# --- MAIN PREDICT ---
 def predict(image):
+    if image is None: return None, "Chưa có ảnh.", "No Data"
     try:
+        # Chuẩn bị ảnh đầu vào
+        original_pil = image.copy() if isinstance(image, Image.Image) else Image.fromarray(image).copy()
+        image_np = np.array(image)
+        # 1. OCR
+        raw_result = ocr.ocr(image_np)
+        # 2. XỬ LÝ ẢNH ĐỂ VẼ (KEY FIX: Lấy ảnh từ Preprocessor nếu có)
+        target_image_for_drawing = original_pil
+        # Kiểm tra xem Paddle có chỉnh sửa ảnh không (dựa vào key 'doc_preprocessor_res')
+        if isinstance(raw_result, list) and len(raw_result) > 0 and isinstance(raw_result[0], dict):
+            if 'doc_preprocessor_res' in raw_result[0]:
+                proc_res = raw_result[0]['doc_preprocessor_res']
+                # Nếu có ảnh đầu ra đã chỉnh sửa (output_img)
+                if 'output_img' in proc_res:
+                    print("Phát hiện ảnh đã qua xử lý hình học. Đang đồng bộ tọa độ...")
+                    numpy_img = proc_res['output_img']
+                    target_image_for_drawing = Image.fromarray(numpy_img)
+        # 3. Vẽ lên ảnh ĐÚNG (Target Image)
+        annotated_image = universal_draw(target_image_for_drawing, raw_result, FONT_PATH)
+        # 4. Xử lý Text
+        all_texts = deep_extract_text(raw_result)
+        final_texts = clean_text_result(all_texts)
+        text_output = "\n".join(final_texts) if final_texts else "Không tìm thấy văn bản."
+        # Debug Info
+        debug_str = str(raw_result)[:1000]
+        debug_info = f"Used Image Source: {'Preprocessed' if target_image_for_drawing != original_pil else 'Original'}\nData Preview:\n{debug_str}..."
+        return annotated_image, text_output, debug_info
     except Exception as e:
         import traceback
+        return image, f"Lỗi: {str(e)}", traceback.format_exc()
+# --- GIAO DIỆN ---
+with gr.Blocks(title="PaddleOCR Perfect Overlay") as iface:
+    gr.Markdown("## PaddleOCR Chinese - High Precision Overlay")
     with gr.Row():
         with gr.Column():
+            input_img = gr.Image(type="pil", label="Input Image")
+            submit_btn = gr.Button("RUN OCR", variant="primary")
         with gr.Column():
+            with gr.Tabs():
+                with gr.TabItem("🖼️ Kết quả Khớp Tọa Độ"):
+                    output_img = gr.Image(type="pil", label="Overlay Result")
+                with gr.TabItem("📝 Văn bản"):
+                    output_txt = gr.Textbox(label="Text Content", lines=15)
+                with gr.TabItem("🐞 Debug"):
+                    output_debug = gr.Textbox(label="Debug Info", lines=15)
+    submit_btn.click(
         fn=predict,
         inputs=input_img,
+        outputs=[output_img, output_txt, output_debug]
     )
 if __name__ == "__main__":