Spaces:

ranbac
/

PaddleOCR

Sleeping

App Files Files Community

ranbac commited on 21 days ago

Commit

233fd7a

verified ·

1 Parent(s): 802ddd9

Update app.py

Browse files

Files changed (1) hide show

app.py +141 -102

app.py CHANGED Viewed

@@ -13,69 +13,88 @@ from paddleocr import PaddleOCR
 from PIL import Image, ImageDraw, ImageFont
 import numpy as np
 import requests
 # Tắt log thừa
 logging.getLogger("ppocr").setLevel(logging.WARNING)
-# --- QUẢN LÝ MODEL (Cache Model) ---
-print("Đang khởi tạo hệ thống quản lý Model...")
-OCR_ENGINES = {}
-def get_ocr_model(lang_code):
-    if lang_code in OCR_ENGINES:
-        return OCR_ENGINES[lang_code]
-    print(f"🔄 Đang tải model ngôn ngữ: {lang_code}...")
-    try:
-        # lang='vi' trong PaddleOCR sử dụng latin_dict, hỗ trợ:
-        # Tiếng Việt, Anh, Pháp, Đức, Ý, Tây Ban Nha, Bồ Đào Nha, v.v...
-        engine = PaddleOCR(
-            use_angle_cls=True,  # Bật tự động xoay ảnh
-            use_textline_orientation=True,
-            lang=lang_code
-        )
-        OCR_ENGINES[lang_code] = engine
-        return engine
-    except Exception as e:
-        print(f"⚠️ Lỗi tải model {lang_code}: {e}. Chuyển về 'en'.")
-        engine = PaddleOCR(lang='en')
-        OCR_ENGINES[lang_code] = engine
-        return engine
-# --- TẢI FONT HỖ TRỢ TIẾNG VIỆT (NOTO SANS) ---
-def check_and_download_font():
-    # Đổi sang NotoSans để hỗ trợ Tiếng Việt và Latin đầy đủ
-    font_filename = "NotoSans-Regular.ttf"
-    font_path = f"./{font_filename}"
     if not os.path.exists(font_path):
-        print("⬇️ Đang tải font hỗ trợ Tiếng Việt (Noto Sans)...")
         try:
-            # Link tải font Noto Sans chính chủ Google
-            url = "https://github.com/google/fonts/raw/main/ofl/notosans/NotoSans-Regular.ttf"
             r = requests.get(url, allow_redirects=True)
             with open(font_path, 'wb') as f:
                 f.write(r.content)
-            print("✅ Đã tải font thành công.")
-        except Exception as e:
-            print(f"⚠️ Lỗi tải font: {e}")
             return None
     return font_path
-FONT_PATH = check_and_download_font()
-# --- HÀM VẼ ĐA NĂNG (GIỮ NGUYÊN LOGIC VẼ) ---
 def universal_draw(image, raw_data, font_path):
     if image is None: return image
-    if isinstance(image, np.ndarray):
-        image = Image.fromarray(image)
     canvas = image.copy()
     draw = ImageDraw.Draw(canvas)
     try:
-        # Tăng kích thước font một chút cho dễ nhìn
-        font_size = 26
         font = ImageFont.truetype(font_path, font_size) if font_path else ImageFont.load_default()
     except:
         font = ImageFont.load_default()
@@ -90,16 +109,19 @@ def universal_draw(image, raw_data, font_path):
         except: return None
     items_to_draw = []
-    # Logic lấy dữ liệu
     processed = False
     if isinstance(raw_data, list) and len(raw_data) > 0 and isinstance(raw_data[0], dict):
         data_dict = raw_data[0]
         texts = data_dict.get('rec_texts')
         boxes = data_dict.get('dt_polys', data_dict.get('rec_polys', data_dict.get('dt_boxes')))
-        if texts and boxes:
             for i in range(min(len(texts), len(boxes))):
-                items_to_draw.append((parse_box(boxes[i]), texts[i]))
             processed = True
     if not processed:
@@ -108,7 +130,7 @@ def universal_draw(image, raw_data, font_path):
                 box = None; text = None
                 for k in ['points', 'box', 'dt_boxes', 'poly']:
                     if k in data: box = parse_box(data[k]); break
-                for k in ['transcription', 'text', 'rec_text']:
                     if k in data: text = data[k]; break
                 if box and text: items_to_draw.append((box, text)); return
                 for v in data.values(): hunt(v)
@@ -121,17 +143,13 @@ def universal_draw(image, raw_data, font_path):
                 for item in data: hunt(item)
         hunt(raw_data)
-    # Thực hiện vẽ
     for box, txt in items_to_draw:
-        if not box or not txt: continue
         try:
-            draw.polygon(box, outline="#00FF00", width=3) # Đổi sang màu xanh lá cho nổi
-            # Vẽ nền chữ
             txt_x, txt_y = box[0]
             if hasattr(draw, "textbbox"):
-                bbox = draw.textbbox((txt_x, txt_y), txt, font=font, anchor="lb")
-                draw.rectangle(bbox, fill="#00AA00") # Nền xanh
                 draw.text((txt_x, txt_y), txt, fill="white", font=font, anchor="lb")
             else:
                 draw.text((txt_x, txt_y - font_size), txt, fill="white", font=font)
@@ -139,83 +157,104 @@ def universal_draw(image, raw_data, font_path):
     return canvas
-# --- XỬ LÝ TEXT ---
 def deep_extract_text(data):
-    found = []
-    if isinstance(data, str): return [data] if data.strip() else []
-    if isinstance(data, (list, tuple)):
-        for i in data: found.extend(deep_extract_text(i))
     elif isinstance(data, dict):
-        for v in data.values(): found.extend(deep_extract_text(v))
-    return found
 def clean_text_result(text_list):
     cleaned = []
-    ignore = ['min', 'max', 'header', 'footer']
     for t in text_list:
         t = t.strip()
-        if len(t) < 2 and not any(u'\u00C0' <= c <= u'\u1EF9' for c in t) and not t.isalnum(): continue
-        if t.lower() in ignore: continue
         cleaned.append(t)
     return cleaned
 # --- MAIN PREDICT ---
 def predict(image, lang_code):
     if image is None: return None, "Chưa có ảnh.", "No Data"
     try:
-        # Load model đúng ngôn ngữ
-        ocr_engine = get_ocr_model(lang_code)
-        img_np = np.array(image)
-        original_pil = image.copy() if isinstance(image, Image.Image) else Image.fromarray(image)
-        # RUN OCR
-        raw_result = ocr_engine.ocr(img_np)
-        # Xử lý kết quả & Vẽ
-        annotated_image = universal_draw(original_pil, raw_result, FONT_PATH)
-        # Lấy text
-        texts = deep_extract_text(raw_result)
-        final_text = "\n".join(clean_text_result(texts)) if texts else "Không tìm thấy văn bản."
-        debug_info = f"Language: {lang_code}\nFont Loaded: {FONT_PATH}\nRaw Data Sample:\n{str(raw_result)[:800]}..."
-        return annotated_image, final_text, debug_info
     except Exception as e:
         import traceback
-        return image, f"Lỗi: {e}", traceback.format_exc()
 # --- GIAO DIỆN ---
-with gr.Blocks(title="PaddleOCR Multi-Lang Pro") as demo:
-    gr.Markdown("## 🇻🇳 PaddleOCR: Hỗ trợ Tiếng Việt & Đa Ngôn Ngữ")
     with gr.Row():
-        with gr.Column(scale=1):
-            input_img = gr.Image(type="pil", label="Ảnh đầu vào")
-            # Dropdown chọn ngôn ngữ (Mặc định là 'vi' để sửa lỗi của bạn)
             lang_dropdown = gr.Dropdown(
-                choices=["vi", "en", "ch", "japan", "korean", "french", "german"],
-                value="vi",
-                label="Ngôn Ngữ (Language Model)",
-                info="Chọn 'vi' để hỗ trợ tốt nhất: Tiếng Việt + Toàn bộ các ngôn ngữ Latin (Anh, Pháp, Đức, Ý...)"
             )
-            btn = gr.Button("CHẠY OCR", variant="primary")
-        with gr.Column(scale=1):
             with gr.Tabs():
                 with gr.TabItem("🖼️ Kết quả"):
-                    output_img = gr.Image(type="pil", label="Ảnh đã nhận diện")
                 with gr.TabItem("📝 Text"):
-                    output_txt = gr.Textbox(label="Nội dung", lines=15)
                 with gr.TabItem("🐞 Debug"):
-                    output_debug = gr.Textbox(label="Log", lines=10)
-    btn.click(predict, [input_img, lang_dropdown], [output_img, output_txt, output_debug])
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 from PIL import Image, ImageDraw, ImageFont
 import numpy as np
 import requests
+import traceback # Thêm để in lỗi chi tiết
 # Tắt log thừa
 logging.getLogger("ppocr").setLevel(logging.WARNING)
+# --- KHỞI TẠO PADDLEOCR ĐA NGÔN NGỮ ---
+print("Đang khởi tạo các Model OCR...")
+ocr_engines = {}
+# 1. Model Trung Quốc
+try:
+    print(" - Đang tải Model Tiếng Trung (ch)...")
+    ocr_engines['ch'] = PaddleOCR(
+        use_textline_orientation=True,
+        use_doc_orientation_classify=False,
+        use_doc_unwarping=False,
+        lang='ch',
+        show_log=False
+    )
+    print(" -> OK: Model Trung Quốc đã sẵn sàng.")
+except Exception as e:
+    print(f" -> LỖI khởi tạo Model Trung Quốc: {e}")
+# 2. Model Latin (SỬA LỖI: Dùng lang='en' để bao trùm tất cả ngôn ngữ Latin/Việt)
+try:
+    print(" - Đang tải Model Latin/Việt (en)...")
+    # Lưu ý: lang='en' trong PP-OCRv3/v4 hỗ trợ toàn bộ ký tự Latin mở rộng (Việt, Pháp, Đức, v.v.)
+    ocr_engines['latin'] = PaddleOCR(
+        use_textline_orientation=True,
+        use_doc_orientation_classify=False,
+        use_doc_unwarping=False,
+        lang='en',  # QUAN TRỌNG: Dùng 'en' thay vì 'latin'
+        show_log=False
+    )
+    print(" -> OK: Model Latin/Việt đã sẵn sàng.")
+except Exception as e:
+    print(f" -> LỖI khởi tạo Model Latin: {e}")
+    print(traceback.format_exc()) # In chi tiết lỗi để debug
+print("Quá trình khởi tạo hoàn tất!")
+# --- TẢI FONT ---
+def check_and_download_font(font_name, url):
+    font_path = f"./{font_name}"
     if not os.path.exists(font_path):
         try:
+            print(f"Đang tải font {font_name}...")
             r = requests.get(url, allow_redirects=True)
             with open(font_path, 'wb') as f:
                 f.write(r.content)
+            print(f"Đã tải xong {font_name}.")
+        except:
+            print(f"Không thể tải {font_name}. Vui lòng kiểm tra internet.")
             return None
     return font_path
+FONT_CH_PATH = check_and_download_font(
+    "simfang.ttf",
+    "https://github.com/StellarCN/scp_zh/raw/master/fonts/SimFang.ttf"
+)
+FONT_LATIN_PATH = check_and_download_font(
+    "Roboto-Regular.ttf",
+    "https://github.com/google/fonts/raw/main/apache/robotoslab/RobotoSlab-Regular.ttf"
+)
+FONT_MAP = {
+    'ch': FONT_CH_PATH,
+    'latin': FONT_LATIN_PATH
+}
+# --- HÀM VẼ ĐA NĂNG ---
 def universal_draw(image, raw_data, font_path):
     if image is None: return image
+    if isinstance(image, np.ndarray): image = Image.fromarray(image)
     canvas = image.copy()
     draw = ImageDraw.Draw(canvas)
     try:
+        font_size = 24
         font = ImageFont.truetype(font_path, font_size) if font_path else ImageFont.load_default()
     except:
         font = ImageFont.load_default()
         except: return None
     items_to_draw = []
+    # Logic tìm box/text
     processed = False
     if isinstance(raw_data, list) and len(raw_data) > 0 and isinstance(raw_data[0], dict):
         data_dict = raw_data[0]
         texts = data_dict.get('rec_texts')
         boxes = data_dict.get('dt_polys', data_dict.get('rec_polys', data_dict.get('dt_boxes')))
+        if texts and boxes and isinstance(texts, list) and isinstance(boxes, list):
             for i in range(min(len(texts), len(boxes))):
+                txt = texts[i]
+                box = parse_box(boxes[i])
+                if box and txt: items_to_draw.append((box, txt))
             processed = True
     if not processed:
                 box = None; text = None
                 for k in ['points', 'box', 'dt_boxes', 'poly']:
                     if k in data: box = parse_box(data[k]); break
+                for k in ['transcription', 'text', 'rec_text', 'label']:
                     if k in data: text = data[k]; break
                 if box and text: items_to_draw.append((box, text)); return
                 for v in data.values(): hunt(v)
                 for item in data: hunt(item)
         hunt(raw_data)
     for box, txt in items_to_draw:
         try:
+            draw.polygon(box, outline="red", width=3)
             txt_x, txt_y = box[0]
             if hasattr(draw, "textbbox"):
+                text_bbox = draw.textbbox((txt_x, txt_y), txt, font=font, anchor="lb")
+                draw.rectangle(text_bbox, fill="red")
                 draw.text((txt_x, txt_y), txt, fill="white", font=font, anchor="lb")
             else:
                 draw.text((txt_x, txt_y - font_size), txt, fill="white", font=font)
     return canvas
+# --- HÀM XỬ LÝ TEXT ---
 def deep_extract_text(data):
+    found_texts = []
+    if isinstance(data, str):
+        if len(data.strip()) > 0: return [data]
+        return []
+    if isinstance(data, (list, tuple)):
+        for item in data: found_texts.extend(deep_extract_text(item))
     elif isinstance(data, dict):
+        for val in data.values(): found_texts.extend(deep_extract_text(val))
+    elif hasattr(data, '__dict__'): found_texts.extend(deep_extract_text(data.__dict__))
+    return found_texts
 def clean_text_result(text_list):
     cleaned = []
+    block_list = ['min', 'max', 'general', 'header', 'footer', 'structure']
     for t in text_list:
         t = t.strip()
+        if len(t) < 1: continue
+        if t.lower().endswith(('.ttf', '.json', '.pdparams', '.yml', '.log')): continue
+        if t.lower() in block_list: continue
         cleaned.append(t)
     return cleaned
 # --- MAIN PREDICT ---
 def predict(image, lang_code):
     if image is None: return None, "Chưa có ảnh.", "No Data"
+    # Lấy model từ dict
+    current_ocr = ocr_engines.get(lang_code)
+    # Kiểm tra kỹ model có tồn tại không
+    if not current_ocr:
+        loaded_keys = list(ocr_engines.keys())
+        msg = f"Lỗi: Model '{lang_code}' chưa tải được. Danh sách model đang có: {loaded_keys}.\nVui lòng xem log khởi tạo (terminal) để biết lý do."
+        return image, msg, "Initialization Failed"
+    current_font = FONT_MAP.get(lang_code, FONT_LATIN_PATH)
     try:
+        original_pil = image.copy() if isinstance(image, Image.Image) else Image.fromarray(image).copy()
+        image_np = np.array(image)
+        # 1. RUN OCR
+        raw_result = current_ocr.ocr(image_np)
+        # 2. XỬ LÝ ẢNH
+        target_image_for_drawing = original_pil
+        if isinstance(raw_result, list) and len(raw_result) > 0 and isinstance(raw_result[0], dict):
+            if 'doc_preprocessor_res' in raw_result[0]:
+                proc_res = raw_result[0]['doc_preprocessor_res']
+                if 'output_img' in proc_res:
+                    numpy_img = proc_res['output_img']
+                    target_image_for_drawing = Image.fromarray(numpy_img)
+        # 3. VẼ & TEXT
+        annotated_image = universal_draw(target_image_for_drawing, raw_result, current_font)
+        all_texts = deep_extract_text(raw_result)
+        final_texts = clean_text_result(all_texts)
+        text_output = "\n".join(final_texts) if final_texts else "Không tìm thấy văn bản."
+        debug_info = f"Engine: {lang_code} (Mapped to Paddle 'en' for Latin)\nFont: {current_font}\nRaw Data Head:\n{str(raw_result)[:800]}..."
+        return annotated_image, text_output, debug_info
     except Exception as e:
         import traceback
+        return image, f"Runtime Error: {str(e)}", traceback.format_exc()
 # --- GIAO DIỆN ---
+with gr.Blocks(title="PaddleOCR Ultimate") as iface:
+    gr.Markdown("## PaddleOCR Multi-Language (v4/v3)")
     with gr.Row():
+        with gr.Column():
+            input_img = gr.Image(type="pil", label="Input Image")
             lang_dropdown = gr.Dropdown(
+                choices=["ch", "latin"],
+                value="latin",
+                label="Chọn Ngôn ngữ / Language",
+                info="ch: Chinese | latin: English, Vietnamese, French, German, etc."
             )
+            submit_btn = gr.Button("RUN OCR", variant="primary")
+        with gr.Column():
             with gr.Tabs():
                 with gr.TabItem("🖼️ Kết quả"):
+                    output_img = gr.Image(type="pil", label="Overlay")
                 with gr.TabItem("📝 Text"):
+                    output_txt = gr.Textbox(label="Content", lines=15)
                 with gr.TabItem("🐞 Debug"):
+                    output_debug = gr.Textbox(label="Log", lines=15)
+    submit_btn.click(
+        fn=predict,
+        inputs=[input_img, lang_dropdown],
+        outputs=[output_img, output_txt, output_debug]
+    )
 if __name__ == "__main__":
+    iface.launch(server_name="0.0.0.0", server_port=7860)