Spaces:

ranbac
/

PaddleOCR

Running

App Files Files Community

ranbac commited on 23 days ago

Commit

7dd9372

verified ·

1 Parent(s): 0396510

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -44

app.py CHANGED Viewed

@@ -17,21 +17,50 @@ import requests
 # Tắt log thừa
 logging.getLogger("ppocr").setLevel(logging.WARNING)
-# --- QUẢN LÝ MODEL ĐA NGÔN NGỮ (THAY ĐỔI ĐỂ HỖ TRỢ THÊM NGÔN NGỮ) ---
 print("Đang khởi tạo hệ thống quản lý Model...")
-# Cache để lưu các model đã tải, tránh load lại gây chậm
 OCR_ENGINES = {}
-def get_ocr_model(lang_code):
-    # Nếu model ngôn ngữ này đã load rồi thì dùng luôn
     if lang_code in OCR_ENGINES:
         return OCR_ENGINES[lang_code]
-    print(f"Đang tải model ngôn ngữ: {lang_code}...")
     try:
         # Khởi tạo PaddleOCR với ngôn ngữ được chọn
-        # 'vi' hỗ trợ tốt Tiếng Việt và Latin. 'en' hỗ trợ chung Latin. 'ch' là Tiếng Trung.
         engine = PaddleOCR(
             use_textline_orientation=True,
             use_doc_orientation_classify=False,
@@ -41,8 +70,8 @@ def get_ocr_model(lang_code):
         OCR_ENGINES[lang_code] = engine
         return engine
     except Exception as e:
-        print(f"Lỗi khởi tạo ngôn ngữ {lang_code}: {e}. Thử khởi tạo lại chế độ cơ bản.")
-        # Fallback nếu tham số lỗi
         engine = PaddleOCR(lang='en')
         OCR_ENGINES[lang_code] = engine
         return engine
@@ -66,11 +95,9 @@ FONT_PATH = check_and_download_font()
 def universal_draw(image, raw_data, font_path):
     if image is None: return image
-    # Đảm bảo image là PIL
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
-    # Copy để vẽ
     canvas = image.copy()
     draw = ImageDraw.Draw(canvas)
@@ -80,7 +107,6 @@ def universal_draw(image, raw_data, font_path):
     except:
         font = ImageFont.load_default()
-    # Hàm parse box
     def parse_box(b):
         try:
             if hasattr(b, 'tolist'): b = b.tolist()
@@ -92,8 +118,6 @@ def universal_draw(image, raw_data, font_path):
     items_to_draw = []
-    # Logic tìm box/text
-    # Ưu tiên cấu trúc PaddleX: rec_texts + dt_polys
     processed = False
     if isinstance(raw_data, list) and len(raw_data) > 0 and isinstance(raw_data[0], dict):
         data_dict = raw_data[0]
@@ -107,7 +131,6 @@ def universal_draw(image, raw_data, font_path):
                 if box and txt: items_to_draw.append((box, txt))
             processed = True
-    # Fallback Logic
     if not processed:
         def hunt(data):
             if isinstance(data, dict):
@@ -127,12 +150,9 @@ def universal_draw(image, raw_data, font_path):
                 for item in data: hunt(item)
         hunt(raw_data)
-    # Vẽ
     for box, txt in items_to_draw:
         try:
-            # Vẽ khung đỏ
             draw.polygon(box, outline="red", width=3)
-            # Vẽ chữ
             txt_x, txt_y = box[0]
             if hasattr(draw, "textbbox"):
                 text_bbox = draw.textbbox((txt_x, txt_y), txt, font=font, anchor="lb")
@@ -169,44 +189,43 @@ def clean_text_result(text_list):
         cleaned.append(t)
     return cleaned
-# --- MAIN PREDICT (CẬP NHẬT THAM SỐ LANG) ---
-def predict(image, lang_code):
     if image is None: return None, "Chưa có ảnh.", "No Data"
     try:
-        # Lấy model tương ứng với ngôn ngữ đã chọn
-        current_ocr = get_ocr_model(lang_code)
-        # Chuẩn bị ảnh đầu vào
         original_pil = image.copy() if isinstance(image, Image.Image) else Image.fromarray(image).copy()
         image_np = np.array(image)
-        # 1. OCR (Sử dụng model đã chọn)
         raw_result = current_ocr.ocr(image_np)
-        # 2. XỬ LÝ ẢNH ĐỂ VẼ
         target_image_for_drawing = original_pil
-        # Kiểm tra xem Paddle có chỉnh sửa ảnh không
         if isinstance(raw_result, list) and len(raw_result) > 0 and isinstance(raw_result[0], dict):
             if 'doc_preprocessor_res' in raw_result[0]:
                 proc_res = raw_result[0]['doc_preprocessor_res']
                 if 'output_img' in proc_res:
-                    print("Phát hiện ảnh đã qua xử lý hình học. Đang đồng bộ tọa độ...")
                     numpy_img = proc_res['output_img']
                     target_image_for_drawing = Image.fromarray(numpy_img)
-        # 3. Vẽ lên ảnh ĐÚNG (Target Image)
         annotated_image = universal_draw(target_image_for_drawing, raw_result, FONT_PATH)
-        # 4. Xử lý Text
         all_texts = deep_extract_text(raw_result)
         final_texts = clean_text_result(all_texts)
         text_output = "\n".join(final_texts) if final_texts else "Không tìm thấy văn bản."
-        # Debug Info
         debug_str = str(raw_result)[:1000]
-        debug_info = f"Language Mode: {lang_code}\nUsed Image Source: {'Preprocessed' if target_image_for_drawing != original_pil else 'Original'}\nData Preview:\n{debug_str}..."
         return annotated_image, text_output, debug_info
@@ -214,30 +233,27 @@ def predict(image, lang_code):
         import traceback
         return image, f"Lỗi: {str(e)}", traceback.format_exc()
-# --- GIAO DIỆN (CẬP NHẬT DROPDOWN) ---
-with gr.Blocks(title="PaddleOCR Multi-Language Overlay") as iface:
-    gr.Markdown("## PaddleOCR Multi-Language - High Precision Overlay")
     with gr.Row():
         with gr.Column():
             input_img = gr.Image(type="pil", label="Input Image")
-            # Thêm Dropdown chọn ngôn ngữ
-            # 'vi' bao phủ hầu hết các ngôn ngữ Latin + Tiếng Việt
-            # 'ch' cho tiếng Trung
-            # 'en', 'fr', 'de' cho các model chuyên biệt nếu cần
             lang_dropdown = gr.Dropdown(
-                choices=["vi", "ch", "en", "fr", "de", "es", "it", "pt", "ru", "ja", "ko"],
-                value="vi",
-                label="Chọn Ngôn Ngữ (Language)",
-                info="Chọn 'vi' cho Tiếng Việt & hầu hết ngôn ngữ Latin (Anh, Pháp, Đức...)."
             )
             submit_btn = gr.Button("RUN OCR", variant="primary")
         with gr.Column():
             with gr.Tabs():
-                with gr.TabItem("🖼️ Kết quả Khớp Tọa Độ"):
                     output_img = gr.Image(type="pil", label="Overlay Result")
                 with gr.TabItem("📝 Văn bản"):
                     output_txt = gr.Textbox(label="Text Content", lines=15)
@@ -246,7 +262,7 @@ with gr.Blocks(title="PaddleOCR Multi-Language Overlay") as iface:
     submit_btn.click(
         fn=predict,
-        inputs=[input_img, lang_dropdown], # Truyền thêm lang_dropdown
         outputs=[output_img, output_txt, output_debug]
     )

 # Tắt log thừa
 logging.getLogger("ppocr").setLevel(logging.WARNING)
+# --- QUẢN LÝ MODEL ĐA NGÔN NGỮ ---
 print("Đang khởi tạo hệ thống quản lý Model...")
+# 1. Định nghĩa Mapping từ tên ngôn ngữ sang mã PaddleOCR
+# Danh sách này bao gồm các ngôn ngữ bạn yêu cầu
+LANG_MAP = {
+    "Vietnamese (Tiếng Việt)": "vi",
+    "English (Tiếng Anh)": "en",
+    "Chinese (Trung Quốc)": "ch",
+    "French (Pháp)": "fr",
+    "German (Đức)": "de",
+    "Korean (Hàn Quốc)": "korean",
+    "Japan (Nhật Bản)": "japan",
+    "Spanish (Tây Ban Nha)": "es",
+    "Portuguese (Bồ Đào Nha)": "pt",
+    "Italian (Ý)": "it",
+    "Russian (Nga)": "ru",
+    # Các ngôn ngữ Latin mở rộng (Mapping về mã code cụ thể hoặc 'latin' nếu model chung)
+    "Afrikaans": "af", "Albanian": "sq", "Azerbaijani": "az", "Basque": "eu", "Bosnian": "bs",
+    "Catalan": "ca", "Croatian": "hr", "Czech": "cs", "Danish": "da", "Dutch": "nl",
+    "Estonian": "et", "Finnish": "fi", "Galician": "gl", "Hungarian": "hu", "Icelandic": "is",
+    "Indonesian": "id", "Irish": "ga", "Kurdish": "ku", "Latin": "la", "Latvian": "lv",
+    "Lithuanian": "lt", "Malay": "ms", "Maltese": "mt", "Maori": "mi", "Norwegian": "no",
+    "Occitan": "oc", "Polish": "pl", "Romanian": "ro", "Slovak": "sk", "Slovenian": "sl",
+    "Swahili": "sw", "Swedish": "sv", "Tagalog": "tl", "Turkish": "tr", "Uzbek": "uz",
+    "Welsh": "cy",
+    # Fallback cho các ngôn ngữ hiếm khác vào model Latin chung
+    "General Latin (Other)": "latin"
+}
+# Cache để lưu các model đã tải
 OCR_ENGINES = {}
+def get_ocr_model(lang_name):
+    # Lấy mã code từ tên hiển thị, mặc định là 'en' nếu không tìm thấy
+    lang_code = LANG_MAP.get(lang_name, 'en')
+    # Nếu model này đã load rồi thì dùng lại ngay
     if lang_code in OCR_ENGINES:
         return OCR_ENGINES[lang_code]
+    print(f"🔄 Đang tải model ngôn ngữ: {lang_code}...")
     try:
         # Khởi tạo PaddleOCR với ngôn ngữ được chọn
         engine = PaddleOCR(
             use_textline_orientation=True,
             use_doc_orientation_classify=False,
         OCR_ENGINES[lang_code] = engine
         return engine
     except Exception as e:
+        print(f"⚠️ Lỗi khởi tạo ngôn ngữ {lang_code}: {e}. Chuyển về chế độ Latin mặc định.")
+        # Fallback an toàn
         engine = PaddleOCR(lang='en')
         OCR_ENGINES[lang_code] = engine
         return engine
 def universal_draw(image, raw_data, font_path):
     if image is None: return image
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
     canvas = image.copy()
     draw = ImageDraw.Draw(canvas)
     except:
         font = ImageFont.load_default()
     def parse_box(b):
         try:
             if hasattr(b, 'tolist'): b = b.tolist()
     items_to_draw = []
     processed = False
     if isinstance(raw_data, list) and len(raw_data) > 0 and isinstance(raw_data[0], dict):
         data_dict = raw_data[0]
                 if box and txt: items_to_draw.append((box, txt))
             processed = True
     if not processed:
         def hunt(data):
             if isinstance(data, dict):
                 for item in data: hunt(item)
         hunt(raw_data)
     for box, txt in items_to_draw:
         try:
             draw.polygon(box, outline="red", width=3)
             txt_x, txt_y = box[0]
             if hasattr(draw, "textbbox"):
                 text_bbox = draw.textbbox((txt_x, txt_y), txt, font=font, anchor="lb")
         cleaned.append(t)
     return cleaned
+# --- MAIN PREDICT (CẬP NHẬT: Nhận thêm tham số ngôn ngữ) ---
+def predict(image, selected_lang):
     if image is None: return None, "Chưa có ảnh.", "No Data"
     try:
+        # 1. Lấy model dựa trên ngôn ngữ người dùng chọn
+        current_ocr = get_ocr_model(selected_lang)
+        # Chuẩn bị ảnh
         original_pil = image.copy() if isinstance(image, Image.Image) else Image.fromarray(image).copy()
         image_np = np.array(image)
+        # 2. OCR
         raw_result = current_ocr.ocr(image_np)
+        # 3. XỬ LÝ ẢNH ĐỂ VẼ (Giữ nguyên logic cũ)
         target_image_for_drawing = original_pil
         if isinstance(raw_result, list) and len(raw_result) > 0 and isinstance(raw_result[0], dict):
             if 'doc_preprocessor_res' in raw_result[0]:
                 proc_res = raw_result[0]['doc_preprocessor_res']
                 if 'output_img' in proc_res:
+                    print("Phát hiện ảnh đã qua xử lý hình học.")
                     numpy_img = proc_res['output_img']
                     target_image_for_drawing = Image.fromarray(numpy_img)
+        # 4. Vẽ
         annotated_image = universal_draw(target_image_for_drawing, raw_result, FONT_PATH)
+        # 5. Xử lý Text
         all_texts = deep_extract_text(raw_result)
         final_texts = clean_text_result(all_texts)
         text_output = "\n".join(final_texts) if final_texts else "Không tìm thấy văn bản."
+        # Debug
         debug_str = str(raw_result)[:1000]
+        lang_code_used = LANG_MAP.get(selected_lang, 'unknown')
+        debug_info = f"Language: {selected_lang} (Code: {lang_code_used})\nData Preview:\n{debug_str}..."
         return annotated_image, text_output, debug_info
         import traceback
         return image, f"Lỗi: {str(e)}", traceback.format_exc()
+# --- GIAO DIỆN (CẬP NHẬT: Thêm Dropdown) ---
+with gr.Blocks(title="PaddleOCR Multi-Lang Pro") as iface:
+    gr.Markdown("## PaddleOCR Multi-Language - Precision Overlay")
     with gr.Row():
         with gr.Column():
             input_img = gr.Image(type="pil", label="Input Image")
+            # Dropdown chọn ngôn ngữ
             lang_dropdown = gr.Dropdown(
+                choices=list(LANG_MAP.keys()),
+                value="Vietnamese (Tiếng Việt)",  # Mặc định chọn Tiếng Việt
+                label="Chọn Ngôn Ngữ (Select Language)",
+                info="Chọn đúng ngôn ngữ để nhận diện dấu tốt nhất."
             )
             submit_btn = gr.Button("RUN OCR", variant="primary")
         with gr.Column():
             with gr.Tabs():
+                with gr.TabItem("🖼️ Kết quả"):
                     output_img = gr.Image(type="pil", label="Overlay Result")
                 with gr.TabItem("📝 Văn bản"):
                     output_txt = gr.Textbox(label="Text Content", lines=15)
     submit_btn.click(
         fn=predict,
+        inputs=[input_img, lang_dropdown], # Truyền thêm input ngôn ngữ
         outputs=[output_img, output_txt, output_debug]
     )