Spaces:

ranbac
/

PaddleOCR

Running

App Files Files Community

ranbac commited on 24 days ago

Commit

d3f0d33

verified ·

1 Parent(s): 37b255c

Update app.py

Browse files

Files changed (1) hide show

app.py +139 -65

app.py CHANGED Viewed

@@ -1,118 +1,192 @@
-import gradio as gr
-import logging
 import os
 import re
 from paddleocr import PaddleOCR
-from PIL import Image
 import numpy as np
-# Tắt log hệ thống
-os.environ["CPP_MIN_LOG_LEVEL"] = "3"
 logging.getLogger("ppocr").setLevel(logging.WARNING)
-print("Đang khởi tạo PaddleOCR (PaddleX 3.0)...")
-# --- PHẦN SỬA LỖI QUAN TRỌNG ---
-# Đã xóa hoàn toàn 'use_gpu=False' để tránh lỗi ValueError
 try:
-    ocr = PaddleOCR(
-        use_textline_orientation=True,
-        lang='ch'
-    )
 except Exception as e:
-    print(f"Lỗi khởi tạo tham số: {e}. Chuyển sang chế độ mặc định.")
     ocr = PaddleOCR(lang='ch')
 print("Model đã sẵn sàng!")
-# --- HÀM QUÉT ĐỆ QUY (Để lấy text từ cấu trúc phức tạp) ---
 def deep_extract_text(data):
     found_texts = []
-    # Nếu là chuỗi -> Lấy luôn
     if isinstance(data, str):
-        if len(data.strip()) > 0:
-            return [data]
         return []
-    # Nếu là List/Tuple -> Quét từng phần tử
     if isinstance(data, (list, tuple)):
         for item in data:
             found_texts.extend(deep_extract_text(item))
-    # Nếu là Dict -> Quét Values
     elif isinstance(data, dict):
         for val in data.values():
             found_texts.extend(deep_extract_text(val))
-    # Nếu là Object lạ -> Quét thuộc tính
     elif hasattr(data, '__dict__'):
         found_texts.extend(deep_extract_text(data.__dict__))
     return found_texts
-# --- HÀM LỌC RÁC (Loại bỏ min, general, .ttf) ---
 def clean_text_result(text_list):
     cleaned = []
-    # Danh sách từ khóa rác cần chặn
     block_list = ['min', 'max', 'general', 'header', 'footer', 'structure']
     for t in text_list:
         t = t.strip()
-        # 1. Bỏ qua chuỗi quá ngắn (trừ khi là chữ Hán)
-        if len(t) < 2:
-            # Kiểm tra Unicode range chữ Hán
-            if not any(u'\u4e00' <= c <= u'\u9fff' for c in t):
-                continue
-        # 2. Bỏ qua file hệ thống
-        if t.lower().endswith(('.ttf', '.json', '.pdparams', '.yml', '.yaml')):
-            continue
-        # 3. Bỏ qua từ khóa hệ thống
-        if t.lower() in block_list:
-            continue
-        # 4. Chỉ lấy dòng có nội dung thực sự
-        if not re.search(r'[\w\u4e00-\u9fff]', t):
-            continue
         cleaned.append(t)
     return cleaned
 def predict(image):
     if image is None:
-        return "请上传图片 / Vui lòng tải ảnh lên."
     try:
         if isinstance(image, Image.Image):
             image = np.array(image)
-        # Gọi OCR (Không truyền tham số nào khác)
         raw_result = ocr.ocr(image)
-        # Trích xuất toàn bộ text
         all_texts = deep_extract_text(raw_result)
-        # Lọc sạch kết quả
         final_texts = clean_text_result(all_texts)
-        if len(final_texts) > 0:
-            return "\n".join(final_texts)
-        else:
-            return "Không tìm thấy văn bản hợp lệ."
     except Exception as e:
         import traceback
         traceback.print_exc()
-        return f"Lỗi xử lý: {str(e)}"
-# Giao diện Gradio
-iface = gr.Interface(
-    fn=predict,
-    inputs=gr.Image(type="pil", label="Input Image"),
-    # Bỏ show_copy_button=True để tránh lỗi với Gradio cũ
-    outputs=gr.Textbox(label="Kết quả (Đã lọc nhiễu)", lines=15),
-    title="PaddleOCR Tiếng Trung (PaddleX 3.0 Clean)",
-    description="Phiên bản đã Fix lỗi use_gpu và tích hợp bộ lọc rác thông minh.",
-    examples=[]
-)
 if __name__ == "__main__":
     iface.launch(server_name="0.0.0.0", server_port=7860)

 import os
+# --- CẤU HÌNH HỆ THỐNG ---
+os.environ["FLAGS_use_mkldnn"] = "0"
+os.environ["FLAGS_enable_mkldnn"] = "0"
+os.environ["DN_ENABLE_MKLDNN"] = "0"
+os.environ["CPP_MIN_LOG_LEVEL"] = "3"
+import logging
 import re
+import gradio as gr
 from paddleocr import PaddleOCR
+from PIL import Image, ImageDraw, ImageFont
 import numpy as np
+import requests
+# Tắt log Python
 logging.getLogger("ppocr").setLevel(logging.WARNING)
+print("Đang khởi tạo PaddleOCR (Overlay Mode)...")
 try:
+    # Khởi tạo OCR
+    ocr = PaddleOCR(use_textline_orientation=True, lang='ch')
 except Exception as e:
+    print(f"Lỗi khởi tạo: {e}. Fallback...")
     ocr = PaddleOCR(lang='ch')
 print("Model đã sẵn sàng!")
+# --- HÀM TẢI FONT (Bắt buộc để vẽ tiếng Trung) ---
+def check_and_download_font():
+    font_path = "./simfang.ttf"
+    if not os.path.exists(font_path):
+        print("Đang tải font tiếng Trung (SimFang)...")
+        try:
+            # Link tải font SimFang (Font chuẩn cho tiếng Trung)
+            url = "https://github.com/StellarCN/scp_zh/raw/master/fonts/SimFang.ttf"
+            r = requests.get(url, allow_redirects=True)
+            with open(font_path, 'wb') as f:
+                f.write(r.content)
+            print("Đã tải font thành công!")
+        except Exception as e:
+            print(f"Lỗi tải font: {e}. Sẽ dùng font mặc định (có thể lỗi hiển thị).")
+            return None
+    return font_path
+# Tải font ngay khi khởi động app
+FONT_PATH = check_and_download_font()
+# --- HÀM VẼ OVERLAY ---
+def draw_ocr_results(image, ocr_result, font_path):
+    if image is None or ocr_result is None:
+        return image
+    # Chuyển sang PIL Image để vẽ (nếu chưa phải)
+    if isinstance(image, np.ndarray):
+        image = Image.fromarray(image)
+    draw = ImageDraw.Draw(image)
+    # Load Font
+    try:
+        font_size = 20
+        font = ImageFont.truetype(font_path, font_size) if font_path else ImageFont.load_default()
+    except:
+        font = ImageFont.load_default()
+    # Xử lý cấu trúc dữ liệu để vẽ
+    data_list = ocr_result
+    if isinstance(ocr_result, list) and len(ocr_result) > 0 and isinstance(ocr_result[0], list):
+        data_list = ocr_result[0]
+    for line in data_list:
+        # Cấu trúc mong đợi: [[box], [text, score]]
+        # box là [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
+        if isinstance(line, list) and len(line) == 2:
+            box = line[0]
+            text_info = line[1]
+            if isinstance(box, list) and isinstance(text_info, (list, tuple)):
+                txt = text_info[0]
+                # 1. Vẽ Khung (Polygon)
+                try:
+                    # Chuyển đổi box thành list các tuple (x,y)
+                    poly_points = [tuple(pt) for pt in box]
+                    draw.polygon(poly_points, outline="red", width=2)
+                    # 2. Vẽ Chữ (Text)
+                    # Lấy toạ độ điểm đầu tiên để đặt chữ
+                    txt_x, txt_y = poly_points[0]
+                    # Vẽ nền đen nhỏ dưới chữ để dễ đọc
+                    text_bbox = draw.textbbox((txt_x, txt_y - font_size), txt, font=font)
+                    draw.rectangle(text_bbox, fill="red")
+                    # Vẽ chữ màu trắng
+                    draw.text((txt_x, txt_y - font_size), txt, fill="white", font=font)
+                except Exception as e:
+                    continue # Bỏ qua nếu lỗi toạ độ
+    return image
+# --- CÁC HÀM XỬ LÝ VĂN BẢN CŨ (Giữ nguyên) ---
 def deep_extract_text(data):
     found_texts = []
     if isinstance(data, str):
+        if len(data.strip()) > 0: return [data]
         return []
     if isinstance(data, (list, tuple)):
         for item in data:
             found_texts.extend(deep_extract_text(item))
     elif isinstance(data, dict):
         for val in data.values():
             found_texts.extend(deep_extract_text(val))
     elif hasattr(data, '__dict__'):
         found_texts.extend(deep_extract_text(data.__dict__))
     return found_texts
 def clean_text_result(text_list):
     cleaned = []
     block_list = ['min', 'max', 'general', 'header', 'footer', 'structure']
     for t in text_list:
         t = t.strip()
+        if len(t) < 2 and not any(u'\u4e00' <= c <= u'\u9fff' for c in t): continue
+        if t.lower().endswith(('.ttf', '.json', '.pdparams', '.yml', '.log')): continue
+        if t.lower() in block_list: continue
+        if not re.search(r'[\w\u4e00-\u9fff]', t): continue
         cleaned.append(t)
     return cleaned
+# --- HÀM PREDICT CHÍNH ---
 def predict(image):
     if image is None:
+        return None, "Vui lòng tải ảnh."
     try:
+        # Giữ lại bản copy của ảnh gốc để vẽ
+        original_image = image.copy()
+        # Convert sang numpy cho OCR engine
         if isinstance(image, Image.Image):
             image = np.array(image)
+        # 1. Thực hiện OCR
+        # cls=True bị bỏ để tránh lỗi, model tự xử lý
         raw_result = ocr.ocr(image)
+        # 2. Xử lý Văn bản (List Text)
         all_texts = deep_extract_text(raw_result)
         final_texts = clean_text_result(all_texts)
+        text_output = "\n".join(final_texts) if final_texts else "Không tìm thấy văn bản."
+        # 3. Vẽ Overlay (Image Output)
+        try:
+            # Truyền ảnh gốc (PIL) và kết quả thô vào hàm vẽ
+            annotated_image = draw_ocr_results(original_image, raw_result, FONT_PATH)
+        except Exception as e:
+            print(f"Lỗi vẽ ảnh: {e}")
+            annotated_image = original_image # Trả về ảnh gốc nếu lỗi vẽ
+        return annotated_image, text_output
     except Exception as e:
         import traceback
         traceback.print_exc()
+        return None, f"Lỗi hệ thống: {str(e)}"
+# --- GIAO DIỆN GRADIO ---
+with gr.Blocks(title="PaddleOCR Overlay") as iface:
+    gr.Markdown("# PaddleOCR Chinese - Overlay Mode")
+    gr.Markdown("Nhận dạng tiếng Trung và vẽ trực tiếp lên ảnh.")
+    with gr.Row():
+        with gr.Column():
+            input_img = gr.Image(type="pil", label="Input Image")
+            submit_btn = gr.Button("Nhận dạng / Predict", variant="primary")
+        with gr.Column():
+            # Output 1: Ảnh đã vẽ chữ
+            output_img = gr.Image(type="pil", label="Image Result")
+            # Output 2: Text trích xuất
+            output_txt = gr.Textbox(label="Text Result", lines=10)
+    submit_btn.click(
+        fn=predict,
+        inputs=input_img,
+        outputs=[output_img, output_txt]
+    )
 if __name__ == "__main__":
     iface.launch(server_name="0.0.0.0", server_port=7860)