Spaces:

ranbac
/

PaddleOCR

Sleeping

App Files Files Community

ranbac commited on 21 days ago

Commit

095a128

verified ·

1 Parent(s): acd370b

Update app.py

Browse files

Files changed (1) hide show

app.py +132 -151

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import os
 # --- CẤU HÌNH HỆ THỐNG ---
-# Tắt MKLDNN để tránh lỗi Segmentation Fault trên Spaces
 os.environ["FLAGS_use_mkldnn"] = "0"
 os.environ["FLAGS_enable_mkldnn"] = "0"
 os.environ["DN_ENABLE_MKLDNN"] = "0"
@@ -15,95 +14,44 @@ from PIL import Image, ImageDraw, ImageFont
 import numpy as np
 import requests
-# Tắt log thừa của Paddle
 logging.getLogger("ppocr").setLevel(logging.WARNING)
-# --- DANH SÁCH NGÔN NGỮ HỖ TRỢ ---
-# Model Latin hỗ trợ tất cả các ngôn ngữ này
-LATIN_LANGUAGES = [
-    "vi", "en", "fr", "de", "es", "it", "pt", "id", "ms", "tr", "pl", "cs", "nl"
-]
-LANG_CHOICES = [
-    ("Tiếng Việt (Vietnamese)", "vi"),
-    ("Tiếng Anh (English)", "en"),
-    ("Tiếng Trung (Chinese)", "ch"),
-    ("Tiếng Pháp (French)", "fr"),
-    ("Tiếng Đức (German)", "de"),
-    ("Tiếng Tây Ban Nha (Spanish)", "es"),
-    ("Tiếng Indonesia", "id"),
-    ("Đa ngôn ngữ Latin (General Latin)", "latin_group")
-]
-# --- QUẢN LÝ MODEL ---
-OCR_CACHE = {}
-def get_ocr_model(user_selection):
-    """
-    Chiến lược tối ưu:
-    - Nếu chọn 'ch' -> Load model Trung Quốc (lang='ch')
-    - Nếu chọn BẤT KỲ ngôn ngữ Latin nào (vi, en, fr...) -> Load model 'en'
-      (Trong Paddle, 'en' chính là model PP-OCR Latin đa ngôn ngữ)
-    """
-    # Xác định 'backend_lang' thực sự cần load
-    if user_selection == 'ch':
-        backend_lang = 'ch'
-    else:
-        # Tất cả ngôn ngữ Latin dùng chung model 'en' để tiết kiệm RAM
-        backend_lang = 'en'
-    if backend_lang in OCR_CACHE:
-        return OCR_CACHE[backend_lang]
-    print(f"📡 Đang tải Model gốc cho nhóm: {backend_lang} ...")
-    try:
-        model = PaddleOCR(
-            use_angle_cls=True,
-            lang=backend_lang,
-            use_textline_orientation=True,
-            show_log=False
-        )
-        OCR_CACHE[backend_lang] = model
-        print(f"✅ Đã tải xong model {backend_lang}!")
-        return model
-    except Exception as e:
-        print(f"❌ Lỗi khởi tạo model {backend_lang}: {e}")
-        return None
-# --- QUẢN LÝ FONT CHỮ ---
-def check_and_download_font(lang_code):
-    """
-    Tự động chọn font:
-    - ch: SimFang
-    - Nhóm Latin (vi, en, fr...): Roboto
-    """
-    if lang_code == 'ch':
-        font_name = "simfang.ttf"
-        url = "https://github.com/StellarCN/scp_zh/raw/master/fonts/SimFang.ttf"
-    else:
-        font_name = "roboto.ttf"
-        url = "https://github.com/googlefonts/roboto/raw/main/src/hinted/Roboto-Regular.ttf"
-    font_path = f"./{font_name}"
-    # Chỉ tải nếu file chưa tồn tại (hoặc kích thước file = 0)
-    if not os.path.exists(font_path) or os.path.getsize(font_path) == 0:
-        print(f"📥 Đang tải font {font_name}...")
         try:
             r = requests.get(url, allow_redirects=True)
             with open(font_path, 'wb') as f:
                 f.write(r.content)
-            print(f"✅ Font {font_name} sẵn sàng.")
-        except Exception as e:
-            print(f"⚠️ Lỗi tải font: {e}")
-            return None
     return font_path
-# --- HÀM VẼ KHUNG & TEXT ---
 def universal_draw(image, raw_data, font_path):
     if image is None: return image
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
     canvas = image.copy()
     draw = ImageDraw.Draw(canvas)
@@ -113,9 +61,7 @@ def universal_draw(image, raw_data, font_path):
     except:
         font = ImageFont.load_default()
-    items_to_draw = []
-    # Hàm chuẩn hóa box
     def parse_box(b):
         try:
             if hasattr(b, 'tolist'): b = b.tolist()
@@ -125,114 +71,149 @@ def universal_draw(image, raw_data, font_path):
             return None
         except: return None
-    # Logic trích xuất box từ kết quả Paddle
-    if isinstance(raw_data, list) and len(raw_data) > 0:
-        # Trường hợp chuẩn (List of Lists)
-        for line in raw_data:
-            if isinstance(line, list):
-                for res in line:
-                    if isinstance(res, list) and len(res) == 2:
-                        box = parse_box(res[0])
-                        txt = res[1][0]
-                        if box and txt: items_to_draw.append((box, txt))
     # Vẽ
     for box, txt in items_to_draw:
         try:
             draw.polygon(box, outline="red", width=3)
             txt_x, txt_y = box[0]
-            # Vẽ nền đỏ cho chữ
             if hasattr(draw, "textbbox"):
                 text_bbox = draw.textbbox((txt_x, txt_y), txt, font=font, anchor="lb")
                 draw.rectangle(text_bbox, fill="red")
                 draw.text((txt_x, txt_y), txt, fill="white", font=font, anchor="lb")
             else:
-                draw.text((txt_x, txt_y - 20), txt, fill="white", font=font)
         except: continue
     return canvas
-# --- XỬ LÝ TEXT ---
-def extract_clean_text(raw_data):
-    texts = []
-    if isinstance(raw_data, list):
-        for line in raw_data:
-            if isinstance(line, list):
-                for res in line:
-                    if isinstance(res, list) and len(res) == 2:
-                        texts.append(res[1][0])
-    return "\n".join(texts)
 # --- MAIN PREDICT ---
-def predict(image, lang_selection):
-    if image is None: return None, "Vui lòng upload ảnh.", "No Data"
     try:
-        # 1. Lấy Model (Cache)
-        ocr = get_ocr_model(lang_selection)
-        if ocr is None:
-            return image, "Lỗi khởi tạo Model. Vui lòng kiểm tra Log.", "Init Error"
-        # 2. Lấy Font
-        font_path = check_and_download_font(lang_selection)
-        # 3. Chuẩn bị ảnh
-        original_pil = image.copy()
         image_np = np.array(image)
-        # 4. Chạy OCR
-        # cls=True để tự động xoay chiều văn bản
-        raw_result = ocr.ocr(image_np, cls=True)
-        if not raw_result or raw_result[0] is None:
-            return image, "Không tìm thấy văn bản nào.", str(raw_result)
-        # 5. Xử lý kết quả hình học (Unwarping nếu có)
-        # (Đơn giản hóa: Vẽ trực tiếp lên ảnh gốc vì ta đã tắt unwarping để tăng tốc)
-        annotated_image = universal_draw(original_pil, raw_result, font_path)
-        # 6. Xuất Text
-        final_text = extract_clean_text(raw_result)
-        debug_info = f"Language Mode: {lang_selection}\nModel Loaded: {'Chinese' if lang_selection == 'ch' else 'Latin (En/Vi/Fr...)'}\nFont: {font_path}\nRaw Data Sample:\n{str(raw_result)[:500]}..."
-        return annotated_image, final_text, debug_info
     except Exception as e:
         import traceback
-        return image, f"Lỗi xử lý: {str(e)}", traceback.format_exc()
 # --- GIAO DIỆN ---
-with gr.Blocks(title="Universal Latin & Chinese OCR") as iface:
-    gr.Markdown("## 🌐 Universal OCR (Hỗ trợ Tiếng Việt, Anh, Trung, Pháp, Đức...)")
-    gr.Markdown("Sử dụng model **Latin PP-OCR** đa ngôn ngữ và **Chinese PP-OCR**.")
     with gr.Row():
         with gr.Column():
-            input_img = gr.Image(type="pil", label="Ảnh đầu vào")
-            lang_dropdown = gr.Dropdown(
-                choices=LANG_CHOICES,
-                value="vi",
-                label="Chọn Ngôn Ngữ",
-                info="Nhóm Latin (Vi, En, Fr...) dùng chung 1 model siêu nhẹ."
-            )
-            submit_btn = gr.Button("🚀 CHẠY NHẬN DIỆN", variant="primary")
         with gr.Column():
             with gr.Tabs():
-                with gr.TabItem("🖼️ Kết quả"):
-                    output_img = gr.Image(type="pil", label="Ảnh đã vẽ khung")
                 with gr.TabItem("📝 Văn bản"):
-                    # Đã xóa show_copy_button=True để sửa lỗi
-                    output_txt = gr.Textbox(label="Nội dung", lines=15, interactive=True)
                 with gr.TabItem("🐞 Debug"):
-                    output_debug = gr.Textbox(label="Log hệ thống", lines=10)
     submit_btn.click(
         fn=predict,
-        inputs=[input_img, lang_dropdown],
         outputs=[output_img, output_txt, output_debug]
     )

 import os
 # --- CẤU HÌNH HỆ THỐNG ---
 os.environ["FLAGS_use_mkldnn"] = "0"
 os.environ["FLAGS_enable_mkldnn"] = "0"
 os.environ["DN_ENABLE_MKLDNN"] = "0"
 import numpy as np
 import requests
+# Tắt log thừa
 logging.getLogger("ppocr").setLevel(logging.WARNING)
+print("Đang khởi tạo PaddleOCR (Coordinate Sync Mode)...")
+try:
+    ocr = PaddleOCR(use_textline_orientation=True, use_doc_orientation_classify=False,
+        use_doc_unwarping=False, lang='ch')
+except Exception as e:
+    print(f"Lỗi khởi tạo: {e}. Chuyển về chế độ mặc định.")
+    ocr = PaddleOCR(lang='ch')
+print("Model đã sẵn sàng!")
+# --- TẢI FONT ---
+def check_and_download_font():
+    font_path = "./simfang.ttf"
+    if not os.path.exists(font_path):
         try:
+            url = "https://github.com/StellarCN/scp_zh/raw/master/fonts/SimFang.ttf"
             r = requests.get(url, allow_redirects=True)
             with open(font_path, 'wb') as f:
                 f.write(r.content)
+        except:
+            return None
     return font_path
+FONT_PATH = check_and_download_font()
+# --- HÀM VẼ ĐA NĂNG ---
 def universal_draw(image, raw_data, font_path):
     if image is None: return image
+    # Đảm bảo image là PIL
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
+    # Copy để vẽ
     canvas = image.copy()
     draw = ImageDraw.Draw(canvas)
     except:
         font = ImageFont.load_default()
+    # Hàm parse box
     def parse_box(b):
         try:
             if hasattr(b, 'tolist'): b = b.tolist()
             return None
         except: return None
+    items_to_draw = []
+    # Logic tìm box/text
+    # Ưu tiên cấu trúc PaddleX: rec_texts + dt_polys
+    processed = False
+    if isinstance(raw_data, list) and len(raw_data) > 0 and isinstance(raw_data[0], dict):
+        data_dict = raw_data[0]
+        texts = data_dict.get('rec_texts')
+        boxes = data_dict.get('dt_polys', data_dict.get('rec_polys', data_dict.get('dt_boxes')))
+        if texts and boxes and isinstance(texts, list) and isinstance(boxes, list):
+            for i in range(min(len(texts), len(boxes))):
+                txt = texts[i]
+                box = parse_box(boxes[i])
+                if box and txt: items_to_draw.append((box, txt))
+            processed = True
+    # Fallback Logic
+    if not processed:
+        def hunt(data):
+            if isinstance(data, dict):
+                box = None; text = None
+                for k in ['points', 'box', 'dt_boxes', 'poly']:
+                    if k in data: box = parse_box(data[k]); break
+                for k in ['transcription', 'text', 'rec_text', 'label']:
+                    if k in data: text = data[k]; break
+                if box and text: items_to_draw.append((box, text)); return
+                for v in data.values(): hunt(v)
+            elif isinstance(data, (list, tuple)):
+                if len(data) == 2 and isinstance(data[0], list) and len(data[0]) == 4:
+                    box = parse_box(data[0])
+                    txt_obj = data[1]
+                    text = txt_obj[0] if isinstance(txt_obj, (list, tuple)) else txt_obj
+                    if box and isinstance(text, str): items_to_draw.append((box, text)); return
+                for item in data: hunt(item)
+        hunt(raw_data)
     # Vẽ
     for box, txt in items_to_draw:
         try:
+            # Vẽ khung đỏ
             draw.polygon(box, outline="red", width=3)
+            # Vẽ chữ
             txt_x, txt_y = box[0]
             if hasattr(draw, "textbbox"):
                 text_bbox = draw.textbbox((txt_x, txt_y), txt, font=font, anchor="lb")
                 draw.rectangle(text_bbox, fill="red")
                 draw.text((txt_x, txt_y), txt, fill="white", font=font, anchor="lb")
             else:
+                draw.text((txt_x, txt_y - font_size), txt, fill="white", font=font)
         except: continue
     return canvas
+# --- HÀM XỬ LÝ TEXT ---
+def deep_extract_text(data):
+    found_texts = []
+    if isinstance(data, str):
+        if len(data.strip()) > 0: return [data]
+        return []
+    if isinstance(data, (list, tuple)):
+        for item in data: found_texts.extend(deep_extract_text(item))
+    elif isinstance(data, dict):
+        for val in data.values(): found_texts.extend(deep_extract_text(val))
+    elif hasattr(data, '__dict__'): found_texts.extend(deep_extract_text(data.__dict__))
+    return found_texts
+def clean_text_result(text_list):
+    cleaned = []
+    block_list = ['min', 'max', 'general', 'header', 'footer', 'structure']
+    for t in text_list:
+        t = t.strip()
+        if len(t) < 2 and not any(u'\u4e00' <= c <= u'\u9fff' for c in t): continue
+        if t.lower().endswith(('.ttf', '.json', '.pdparams', '.yml', '.log')): continue
+        if t.lower() in block_list: continue
+        if not re.search(r'[\w\u4e00-\u9fff]', t): continue
+        cleaned.append(t)
+    return cleaned
 # --- MAIN PREDICT ---
+def predict(image):
+    if image is None: return None, "Chưa có ảnh.", "No Data"
     try:
+        # Chuẩn bị ảnh đầu vào
+        original_pil = image.copy() if isinstance(image, Image.Image) else Image.fromarray(image).copy()
         image_np = np.array(image)
+        # 1. OCR
+        raw_result = ocr.ocr(image_np)
+        # 2. XỬ LÝ ẢNH ĐỂ VẼ (KEY FIX: Lấy ảnh từ Preprocessor nếu có)
+        target_image_for_drawing = original_pil
+        # Kiểm tra xem Paddle có chỉnh sửa ảnh không (dựa vào key 'doc_preprocessor_res')
+        if isinstance(raw_result, list) and len(raw_result) > 0 and isinstance(raw_result[0], dict):
+            if 'doc_preprocessor_res' in raw_result[0]:
+                proc_res = raw_result[0]['doc_preprocessor_res']
+                # Nếu có ảnh đầu ra đã chỉnh sửa (output_img)
+                if 'output_img' in proc_res:
+                    print("Phát hiện ảnh đã qua xử lý hình học. Đang đồng bộ tọa độ...")
+                    numpy_img = proc_res['output_img']
+                    target_image_for_drawing = Image.fromarray(numpy_img)
+        # 3. Vẽ lên ảnh ĐÚNG (Target Image)
+        annotated_image = universal_draw(target_image_for_drawing, raw_result, FONT_PATH)
+        # 4. Xử lý Text
+        all_texts = deep_extract_text(raw_result)
+        final_texts = clean_text_result(all_texts)
+        text_output = "\n".join(final_texts) if final_texts else "Không tìm thấy văn bản."
+        # Debug Info
+        debug_str = str(raw_result)[:1000]
+        debug_info = f"Used Image Source: {'Preprocessed' if target_image_for_drawing != original_pil else 'Original'}\nData Preview:\n{debug_str}..."
+        return annotated_image, text_output, debug_info
     except Exception as e:
         import traceback
+        return image, f"Lỗi: {str(e)}", traceback.format_exc()
 # --- GIAO DIỆN ---
+with gr.Blocks(title="PaddleOCR Perfect Overlay") as iface:
+    gr.Markdown("## PaddleOCR Chinese - High Precision Overlay")
     with gr.Row():
         with gr.Column():
+            input_img = gr.Image(type="pil", label="Input Image")
+            submit_btn = gr.Button("RUN OCR", variant="primary")
         with gr.Column():
             with gr.Tabs():
+                with gr.TabItem("🖼️ Kết quả Khớp Tọa Độ"):
+                    output_img = gr.Image(type="pil", label="Overlay Result")
                 with gr.TabItem("📝 Văn bản"):
+                    output_txt = gr.Textbox(label="Text Content", lines=15)
                 with gr.TabItem("🐞 Debug"):
+                    output_debug = gr.Textbox(label="Debug Info", lines=15)
     submit_btn.click(
         fn=predict,
+        inputs=input_img,
         outputs=[output_img, output_txt, output_debug]
     )