Spaces:

ranbac
/

PaddleOCR

Sleeping

App Files Files Community

ranbac commited on 23 days ago

Commit

0396510

verified ·

1 Parent(s): e5634f6

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -60

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
-# --- CẤU HÌNH HỆ THỐNG ---
 os.environ["FLAGS_use_mkldnn"] = "0"
 os.environ["FLAGS_enable_mkldnn"] = "0"
 os.environ["DN_ENABLE_MKLDNN"] = "0"
@@ -17,56 +17,52 @@ import requests
 # Tắt log thừa
 logging.getLogger("ppocr").setLevel(logging.WARNING)
-print("Đang khởi tạo hệ thống...")
-# --- QUẢN LÝ MODEL (CACHE) ---
-# Dictionary để lưu các model đã load, tránh load lại gây chậm
 OCR_ENGINES = {}
-def get_ocr_engine(lang_code):
-    """
-    Hàm lấy model từ bộ nhớ đệm, nếu chưa có thì tải mới.
-    """
     if lang_code in OCR_ENGINES:
         return OCR_ENGINES[lang_code]
-    print(f"🔄 Đang nạp model ngôn ngữ: {lang_code} ...")
     try:
-        # Cấu hình chung cho các model
-        ocr = PaddleOCR(
             use_textline_orientation=True,
             use_doc_orientation_classify=False,
             use_doc_unwarping=False,
-            lang=lang_code,                 # Ngôn ngữ được chọn
         )
-        OCR_ENGINES[lang_code] = ocr
-        print(f"✅ Đã nạp xong model: {lang_code}")
-        return ocr
     except Exception as e:
-        print(f"❌ Lỗi khởi tạo model {lang_code}: {e}")
-        return None
-# Pre-load model mặc định (Tiếng Trung) để khởi động nhanh
-get_ocr_engine('ch')
-# --- TẢI FONT ---
 def check_and_download_font():
     font_path = "./simfang.ttf"
     if not os.path.exists(font_path):
         try:
             url = "https://github.com/StellarCN/scp_zh/raw/master/fonts/SimFang.ttf"
-            print("Đang tải font SimFang...")
             r = requests.get(url, allow_redirects=True)
             with open(font_path, 'wb') as f:
                 f.write(r.content)
-            print("Đã tải font xong.")
         except:
             return None
     return font_path
 FONT_PATH = check_and_download_font()
-# --- HÀM VẼ ĐA NĂNG ---
 def universal_draw(image, raw_data, font_path):
     if image is None: return image
@@ -97,6 +93,7 @@ def universal_draw(image, raw_data, font_path):
     items_to_draw = []
     # Logic tìm box/text
     processed = False
     if isinstance(raw_data, list) and len(raw_data) > 0 and isinstance(raw_data[0], dict):
         data_dict = raw_data[0]
@@ -110,18 +107,23 @@ def universal_draw(image, raw_data, font_path):
                 if box and txt: items_to_draw.append((box, txt))
             processed = True
-    # Fallback Logic (Dành cho output chuẩn của PaddleOCR)
     if not processed:
         def hunt(data):
-            if isinstance(data, (list, tuple)):
-                # Cấu trúc [[box, [text, conf]], ...]
                 if len(data) == 2 and isinstance(data[0], list) and len(data[0]) == 4:
                     box = parse_box(data[0])
                     txt_obj = data[1]
                     text = txt_obj[0] if isinstance(txt_obj, (list, tuple)) else txt_obj
-                    if box and isinstance(text, str):
-                        items_to_draw.append((box, text))
-                        return
                 for item in data: hunt(item)
         hunt(raw_data)
@@ -142,54 +144,59 @@ def universal_draw(image, raw_data, font_path):
     return canvas
-# --- HÀM XỬ LÝ TEXT ---
 def deep_extract_text(data):
     found_texts = []
     if isinstance(data, str):
         if len(data.strip()) > 0: return [data]
         return []
     if isinstance(data, (list, tuple)):
-        # Bỏ qua con số confidence (float)
-        if len(data) == 2 and isinstance(data[1], (int, float)):
-             return deep_extract_text(data[0])
         for item in data: found_texts.extend(deep_extract_text(item))
     elif isinstance(data, dict):
         for val in data.values(): found_texts.extend(deep_extract_text(val))
     return found_texts
 def clean_text_result(text_list):
     cleaned = []
     block_list = ['min', 'max', 'general', 'header', 'footer', 'structure']
     for t in text_list:
-        if not isinstance(t, str): continue
         t = t.strip()
-        if len(t) < 1: continue
-        # Giữ lại text nếu có ký tự chữ hoặc số
         if not re.search(r'[\w\u4e00-\u9fff]', t): continue
         cleaned.append(t)
     return cleaned
-# --- MAIN PREDICT ---
-def predict(image, lang_choice):
     if image is None: return None, "Chưa có ảnh.", "No Data"
     try:
-        # Lấy model tương ứng với ngôn ngữ user chọn
-        ocr_model = get_ocr_engine(lang_choice)
-        if ocr_model is None:
-            return image, "Lỗi khởi tạo model.", "Error loading model"
         # Chuẩn bị ảnh đầu vào
         original_pil = image.copy() if isinstance(image, Image.Image) else Image.fromarray(image).copy()
         image_np = np.array(image)
-        # 1. OCR
-        raw_result = ocr_model.ocr(image_np)
         # 2. XỬ LÝ ẢNH ĐỂ VẼ
         target_image_for_drawing = original_pil
-        # 3. Vẽ lên ảnh
         annotated_image = universal_draw(target_image_for_drawing, raw_result, FONT_PATH)
         # 4. Xử lý Text
@@ -199,7 +206,7 @@ def predict(image, lang_choice):
         # Debug Info
         debug_str = str(raw_result)[:1000]
-        debug_info = f"Model Used: {lang_choice}\nRaw Output Preview:\n{debug_str}..."
         return annotated_image, text_output, debug_info
@@ -207,36 +214,39 @@ def predict(image, lang_choice):
         import traceback
         return image, f"Lỗi: {str(e)}", traceback.format_exc()
-# --- GIAO DIỆN ---
-with gr.Blocks(title="Multi-Language OCR") as iface:
-    gr.Markdown("## PaddleOCR Multi-Language (Latin/Vietnamese/Chinese)")
     with gr.Row():
-        with gr.Column(scale=1):
             input_img = gr.Image(type="pil", label="Input Image")
-            # --- DROPDOWN CHỌN NGÔN NGỮ ---
             lang_dropdown = gr.Dropdown(
-                choices=["ch", "en", "vi"],
-                value="ch",
-                label="Chọn Ngôn Ngữ / Language",
-                info="ch: Chinese | en: Latin/Euro (Anh, Pháp, Đức...) | vi: Vietnamese"
             )
             submit_btn = gr.Button("RUN OCR", variant="primary")
-        with gr.Column(scale=2):
             with gr.Tabs():
-                with gr.TabItem("🖼️ Kết quả / Result"):
                     output_img = gr.Image(type="pil", label="Overlay Result")
-                with gr.TabItem("📝 Văn bản / Text"):
                     output_txt = gr.Textbox(label="Text Content", lines=15)
                 with gr.TabItem("🐞 Debug"):
                     output_debug = gr.Textbox(label="Debug Info", lines=15)
     submit_btn.click(
         fn=predict,
-        inputs=[input_img, lang_dropdown], # Truyền thêm lựa chọn ngôn ngữ
         outputs=[output_img, output_txt, output_debug]
     )

 import os
+# --- CẤU HÌNH HỆ THỐNG (GIỮ NGUYÊN) ---
 os.environ["FLAGS_use_mkldnn"] = "0"
 os.environ["FLAGS_enable_mkldnn"] = "0"
 os.environ["DN_ENABLE_MKLDNN"] = "0"
 # Tắt log thừa
 logging.getLogger("ppocr").setLevel(logging.WARNING)
+# --- QUẢN LÝ MODEL ĐA NGÔN NGỮ (THAY ĐỔI ĐỂ HỖ TRỢ THÊM NGÔN NGỮ) ---
+print("Đang khởi tạo hệ thống quản lý Model...")
+# Cache để lưu các model đã tải, tránh load lại gây chậm
 OCR_ENGINES = {}
+def get_ocr_model(lang_code):
+    # Nếu model ngôn ngữ này đã load rồi thì dùng luôn
     if lang_code in OCR_ENGINES:
         return OCR_ENGINES[lang_code]
+    print(f"Đang tải model ngôn ngữ: {lang_code}...")
     try:
+        # Khởi tạo PaddleOCR với ngôn ngữ được chọn
+        # 'vi' hỗ trợ tốt Tiếng Việt và Latin. 'en' hỗ trợ chung Latin. 'ch' là Tiếng Trung.
+        engine = PaddleOCR(
             use_textline_orientation=True,
             use_doc_orientation_classify=False,
             use_doc_unwarping=False,
+            lang=lang_code
         )
+        OCR_ENGINES[lang_code] = engine
+        return engine
     except Exception as e:
+        print(f"Lỗi khởi tạo ngôn ngữ {lang_code}: {e}. Thử khởi tạo lại chế độ cơ bản.")
+        # Fallback nếu tham số lỗi
+        engine = PaddleOCR(lang='en')
+        OCR_ENGINES[lang_code] = engine
+        return engine
+# --- TẢI FONT (GIỮ NGUYÊN) ---
 def check_and_download_font():
     font_path = "./simfang.ttf"
     if not os.path.exists(font_path):
         try:
             url = "https://github.com/StellarCN/scp_zh/raw/master/fonts/SimFang.ttf"
             r = requests.get(url, allow_redirects=True)
             with open(font_path, 'wb') as f:
                 f.write(r.content)
         except:
             return None
     return font_path
 FONT_PATH = check_and_download_font()
+# --- HÀM VẼ ĐA NĂNG (GIỮ NGUYÊN) ---
 def universal_draw(image, raw_data, font_path):
     if image is None: return image
     items_to_draw = []
     # Logic tìm box/text
+    # Ưu tiên cấu trúc PaddleX: rec_texts + dt_polys
     processed = False
     if isinstance(raw_data, list) and len(raw_data) > 0 and isinstance(raw_data[0], dict):
         data_dict = raw_data[0]
                 if box and txt: items_to_draw.append((box, txt))
             processed = True
+    # Fallback Logic
     if not processed:
         def hunt(data):
+            if isinstance(data, dict):
+                box = None; text = None
+                for k in ['points', 'box', 'dt_boxes', 'poly']:
+                    if k in data: box = parse_box(data[k]); break
+                for k in ['transcription', 'text', 'rec_text', 'label']:
+                    if k in data: text = data[k]; break
+                if box and text: items_to_draw.append((box, text)); return
+                for v in data.values(): hunt(v)
+            elif isinstance(data, (list, tuple)):
                 if len(data) == 2 and isinstance(data[0], list) and len(data[0]) == 4:
                     box = parse_box(data[0])
                     txt_obj = data[1]
                     text = txt_obj[0] if isinstance(txt_obj, (list, tuple)) else txt_obj
+                    if box and isinstance(text, str): items_to_draw.append((box, text)); return
                 for item in data: hunt(item)
         hunt(raw_data)
     return canvas
+# --- HÀM XỬ LÝ TEXT (GIỮ NGUYÊN) ---
 def deep_extract_text(data):
     found_texts = []
     if isinstance(data, str):
         if len(data.strip()) > 0: return [data]
         return []
     if isinstance(data, (list, tuple)):
         for item in data: found_texts.extend(deep_extract_text(item))
     elif isinstance(data, dict):
         for val in data.values(): found_texts.extend(deep_extract_text(val))
+    elif hasattr(data, '__dict__'): found_texts.extend(deep_extract_text(data.__dict__))
     return found_texts
 def clean_text_result(text_list):
     cleaned = []
     block_list = ['min', 'max', 'general', 'header', 'footer', 'structure']
     for t in text_list:
         t = t.strip()
+        if len(t) < 2 and not any(u'\u4e00' <= c <= u'\u9fff' for c in t): continue
+        if t.lower().endswith(('.ttf', '.json', '.pdparams', '.yml', '.log')): continue
+        if t.lower() in block_list: continue
         if not re.search(r'[\w\u4e00-\u9fff]', t): continue
         cleaned.append(t)
     return cleaned
+# --- MAIN PREDICT (CẬP NHẬT THAM SỐ LANG) ---
+def predict(image, lang_code):
     if image is None: return None, "Chưa có ảnh.", "No Data"
     try:
+        # Lấy model tương ứng với ngôn ngữ đã chọn
+        current_ocr = get_ocr_model(lang_code)
         # Chuẩn bị ảnh đầu vào
         original_pil = image.copy() if isinstance(image, Image.Image) else Image.fromarray(image).copy()
         image_np = np.array(image)
+        # 1. OCR (Sử dụng model đã chọn)
+        raw_result = current_ocr.ocr(image_np)
         # 2. XỬ LÝ ẢNH ĐỂ VẼ
         target_image_for_drawing = original_pil
+        # Kiểm tra xem Paddle có chỉnh sửa ảnh không
+        if isinstance(raw_result, list) and len(raw_result) > 0 and isinstance(raw_result[0], dict):
+            if 'doc_preprocessor_res' in raw_result[0]:
+                proc_res = raw_result[0]['doc_preprocessor_res']
+                if 'output_img' in proc_res:
+                    print("Phát hiện ảnh đã qua xử lý hình học. Đang đồng bộ tọa độ...")
+                    numpy_img = proc_res['output_img']
+                    target_image_for_drawing = Image.fromarray(numpy_img)
+        # 3. Vẽ lên ảnh ĐÚNG (Target Image)
         annotated_image = universal_draw(target_image_for_drawing, raw_result, FONT_PATH)
         # 4. Xử lý Text
         # Debug Info
         debug_str = str(raw_result)[:1000]
+        debug_info = f"Language Mode: {lang_code}\nUsed Image Source: {'Preprocessed' if target_image_for_drawing != original_pil else 'Original'}\nData Preview:\n{debug_str}..."
         return annotated_image, text_output, debug_info
         import traceback
         return image, f"Lỗi: {str(e)}", traceback.format_exc()
+# --- GIAO DIỆN (CẬP NHẬT DROPDOWN) ---
+with gr.Blocks(title="PaddleOCR Multi-Language Overlay") as iface:
+    gr.Markdown("## PaddleOCR Multi-Language - High Precision Overlay")
     with gr.Row():
+        with gr.Column():
             input_img = gr.Image(type="pil", label="Input Image")
+            # Thêm Dropdown chọn ngôn ngữ
+            # 'vi' bao phủ hầu hết các ngôn ngữ Latin + Tiếng Việt
+            # 'ch' cho tiếng Trung
+            # 'en', 'fr', 'de' cho các model chuyên biệt nếu cần
             lang_dropdown = gr.Dropdown(
+                choices=["vi", "ch", "en", "fr", "de", "es", "it", "pt", "ru", "ja", "ko"],
+                value="vi",
+                label="Chọn Ngôn Ngữ (Language)",
+                info="Chọn 'vi' cho Tiếng Việt & hầu hết ngôn ngữ Latin (Anh, Pháp, Đức...)."
             )
             submit_btn = gr.Button("RUN OCR", variant="primary")
+        with gr.Column():
             with gr.Tabs():
+                with gr.TabItem("🖼️ Kết quả Khớp Tọa Độ"):
                     output_img = gr.Image(type="pil", label="Overlay Result")
+                with gr.TabItem("📝 Văn bản"):
                     output_txt = gr.Textbox(label="Text Content", lines=15)
                 with gr.TabItem("🐞 Debug"):
                     output_debug = gr.Textbox(label="Debug Info", lines=15)
     submit_btn.click(
         fn=predict,
+        inputs=[input_img, lang_dropdown], # Truyền thêm lang_dropdown
         outputs=[output_img, output_txt, output_debug]
     )