Spaces:

ranbac
/

PaddleOCR

Running

App Files Files Community

ranbac commited on 22 days ago

Commit

d72652a

verified ·

1 Parent(s): f082006

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -57

app.py CHANGED Viewed

@@ -17,52 +17,64 @@ import requests
 # Tắt log thừa
 logging.getLogger("ppocr").setLevel(logging.WARNING)
-# --- QUẢN LÝ MODEL ĐA NGÔN NGỮ (THAY ĐỔI ĐỂ HỖ TRỢ THÊM NGÔN NGỮ) ---
-print("Đang khởi tạo hệ thống quản lý Model...")
-# Cache để lưu các model đã tải, tránh load lại gây chậm
-OCR_ENGINES = {}
-def get_ocr_model(lang_code):
-    # Nếu model ngôn ngữ này đã load rồi thì dùng luôn
-    if lang_code in OCR_ENGINES:
-        return OCR_ENGINES[lang_code]
-    print(f"Đang tải model ngôn ngữ: {lang_code}...")
-    try:
-        # Khởi tạo PaddleOCR với ngôn ngữ được chọn
-        # 'vi' hỗ trợ tốt Tiếng Việt và Latin. 'en' hỗ trợ chung Latin. 'ch' là Tiếng Trung.
-        engine = PaddleOCR(
-            use_textline_orientation=True,
-            use_doc_orientation_classify=False,
-            use_doc_unwarping=False,
-            lang=lang_code
-        )
-        OCR_ENGINES[lang_code] = engine
-        return engine
-    except Exception as e:
-        print(f"Lỗi khởi tạo ngôn ngữ {lang_code}: {e}. Thử khởi tạo lại chế độ cơ bản.")
-        # Fallback nếu tham số lỗi
-        engine = PaddleOCR(lang='en')
-        OCR_ENGINES[lang_code] = engine
-        return engine
-# --- TẢI FONT (GIỮ NGUYÊN) ---
-def check_and_download_font():
-    font_path = "./simfang.ttf"
     if not os.path.exists(font_path):
         try:
-            url = "https://github.com/StellarCN/scp_zh/raw/master/fonts/SimFang.ttf"
             r = requests.get(url, allow_redirects=True)
             with open(font_path, 'wb') as f:
                 f.write(r.content)
         except:
             return None
     return font_path
-FONT_PATH = check_and_download_font()
-# --- HÀM VẼ ĐA NĂNG (GIỮ NGUYÊN) ---
 def universal_draw(image, raw_data, font_path):
     if image is None: return image
@@ -76,6 +88,7 @@ def universal_draw(image, raw_data, font_path):
     try:
         font_size = 24
         font = ImageFont.truetype(font_path, font_size) if font_path else ImageFont.load_default()
     except:
         font = ImageFont.load_default()
@@ -93,7 +106,6 @@ def universal_draw(image, raw_data, font_path):
     items_to_draw = []
     # Logic tìm box/text
-    # Ưu tiên cấu trúc PaddleX: rec_texts + dt_polys
     processed = False
     if isinstance(raw_data, list) and len(raw_data) > 0 and isinstance(raw_data[0], dict):
         data_dict = raw_data[0]
@@ -162,32 +174,36 @@ def clean_text_result(text_list):
     block_list = ['min', 'max', 'general', 'header', 'footer', 'structure']
     for t in text_list:
         t = t.strip()
-        if len(t) < 2 and not any(u'\u4e00' <= c <= u'\u9fff' for c in t): continue
         if t.lower().endswith(('.ttf', '.json', '.pdparams', '.yml', '.log')): continue
         if t.lower() in block_list: continue
-        if not re.search(r'[\w\u4e00-\u9fff]', t): continue
         cleaned.append(t)
     return cleaned
-# --- MAIN PREDICT (CẬP NHẬT THAM SỐ LANG) ---
 def predict(image, lang_code):
     if image is None: return None, "Chưa có ảnh.", "No Data"
-    try:
-        # Lấy model tương ứng với ngôn ngữ đã chọn
-        current_ocr = get_ocr_model(lang_code)
         # Chuẩn bị ảnh đầu vào
         original_pil = image.copy() if isinstance(image, Image.Image) else Image.fromarray(image).copy()
         image_np = np.array(image)
-        # 1. OCR (Sử dụng model đã chọn)
         raw_result = current_ocr.ocr(image_np)
         # 2. XỬ LÝ ẢNH ĐỂ VẼ
         target_image_for_drawing = original_pil
-        # Kiểm tra xem Paddle có chỉnh sửa ảnh không
         if isinstance(raw_result, list) and len(raw_result) > 0 and isinstance(raw_result[0], dict):
             if 'doc_preprocessor_res' in raw_result[0]:
                 proc_res = raw_result[0]['doc_preprocessor_res']
@@ -196,8 +212,8 @@ def predict(image, lang_code):
                     numpy_img = proc_res['output_img']
                     target_image_for_drawing = Image.fromarray(numpy_img)
-        # 3. Vẽ lên ảnh ĐÚNG (Target Image)
-        annotated_image = universal_draw(target_image_for_drawing, raw_result, FONT_PATH)
         # 4. Xử lý Text
         all_texts = deep_extract_text(raw_result)
@@ -206,7 +222,7 @@ def predict(image, lang_code):
         # Debug Info
         debug_str = str(raw_result)[:1000]
-        debug_info = f"Language Mode: {lang_code}\nUsed Image Source: {'Preprocessed' if target_image_for_drawing != original_pil else 'Original'}\nData Preview:\n{debug_str}..."
         return annotated_image, text_output, debug_info
@@ -214,23 +230,20 @@ def predict(image, lang_code):
         import traceback
         return image, f"Lỗi: {str(e)}", traceback.format_exc()
-# --- GIAO DIỆN (CẬP NHẬT DROPDOWN) ---
 with gr.Blocks(title="PaddleOCR Multi-Language Overlay") as iface:
-    gr.Markdown("## PaddleOCR Multi-Language - High Precision Overlay")
     with gr.Row():
         with gr.Column():
             input_img = gr.Image(type="pil", label="Input Image")
-            # Thêm Dropdown chọn ngôn ngữ
-            # 'vi' bao phủ hầu hết các ngôn ngữ Latin + Tiếng Việt
-            # 'ch' cho tiếng Trung
-            # 'en', 'fr', 'de' cho các model chuyên biệt nếu cần
             lang_dropdown = gr.Dropdown(
-                choices=["vi", "ch", "en", "fr", "de", "es", "it", "pt", "ru", "ja", "ko"],
-                value="vi",
-                label="Chọn Ngôn Ngữ (Language)",
-                info="Chọn 'vi' cho Tiếng Việt & hầu hết ngôn ngữ Latin (Anh, Pháp, Đức...)."
             )
             submit_btn = gr.Button("RUN OCR", variant="primary")
@@ -246,7 +259,7 @@ with gr.Blocks(title="PaddleOCR Multi-Language Overlay") as iface:
     submit_btn.click(
         fn=predict,
-        inputs=[input_img, lang_dropdown], # Truyền thêm lang_dropdown
         outputs=[output_img, output_txt, output_debug]
     )

 # Tắt log thừa
 logging.getLogger("ppocr").setLevel(logging.WARNING)
+# --- KHỞI TẠO PADDLEOCR ĐA NGÔN NGỮ ---
+print("Đang khởi tạo các Model OCR...")
+ocr_engines = {}
+# 1. Model Trung Quốc (Mặc định cũ)
+try:
+    print(" - Loading Chinese Model...")
+    ocr_engines['ch'] = PaddleOCR(use_textline_orientation=True, use_doc_orientation_classify=False,
+        use_doc_unwarping=False, lang='ch', show_log=False)
+except Exception as e:
+    print(f"Lỗi khởi tạo Chinese Model: {e}")
+# 2. Model Latin (Mới - Hỗ trợ list ngôn ngữ Châu Âu, Việt Nam, v.v.)
+try:
+    print(" - Loading Latin/European/Vietnamese Model...")
+    # lang='latin' hỗ trợ chung cho các ngôn ngữ ký tự Latin bao gồm cả tiếng Việt, Pháp, Đức...
+    ocr_engines['latin'] = PaddleOCR(use_textline_orientation=True, use_doc_orientation_classify=False,
+        use_doc_unwarping=False, lang='latin', show_log=False)
+except Exception as e:
+    print(f"Lỗi khởi tạo Latin Model: {e}")
+print("Các Model đã sẵn sàng!")
+# --- TẢI FONT (CẬP NHẬT) ---
+def check_and_download_font(font_name, url):
+    font_path = f"./{font_name}"
     if not os.path.exists(font_path):
         try:
+            print(f"Đang tải font {font_name}...")
             r = requests.get(url, allow_redirects=True)
             with open(font_path, 'wb') as f:
                 f.write(r.content)
+            print(f"Đã tải xong {font_name}.")
         except:
+            print(f"Không thể tải {font_name}.")
             return None
     return font_path
+# Font cho tiếng Trung (Cũ)
+FONT_CH_PATH = check_and_download_font(
+    "simfang.ttf",
+    "https://github.com/StellarCN/scp_zh/raw/master/fonts/SimFang.ttf"
+)
+# Font cho tiếng Latin/Việt (Mới - Dùng Roboto để hiển thị tốt dấu)
+FONT_LATIN_PATH = check_and_download_font(
+    "Roboto-Regular.ttf",
+    "https://github.com/google/fonts/raw/main/apache/robotoslab/RobotoSlab-Regular.ttf"
+)
+# Map ngôn ngữ với Font
+FONT_MAP = {
+    'ch': FONT_CH_PATH,
+    'latin': FONT_LATIN_PATH
+}
+# --- HÀM VẼ ĐA NĂNG (GIỮ NGUYÊN LOGIC, CHỈ THÊM THAM SỐ FONT) ---
 def universal_draw(image, raw_data, font_path):
     if image is None: return image
     try:
         font_size = 24
+        # Sử dụng font_path được truyền vào
         font = ImageFont.truetype(font_path, font_size) if font_path else ImageFont.load_default()
     except:
         font = ImageFont.load_default()
     items_to_draw = []
     # Logic tìm box/text
     processed = False
     if isinstance(raw_data, list) and len(raw_data) > 0 and isinstance(raw_data[0], dict):
         data_dict = raw_data[0]
     block_list = ['min', 'max', 'general', 'header', 'footer', 'structure']
     for t in text_list:
         t = t.strip()
+        # Chỉnh sửa nhẹ điều kiện lọc để không bỏ qua các từ ngắn Latin
+        if len(t) < 1: continue
         if t.lower().endswith(('.ttf', '.json', '.pdparams', '.yml', '.log')): continue
         if t.lower() in block_list: continue
         cleaned.append(t)
     return cleaned
+# --- MAIN PREDICT (CẬP NHẬT) ---
 def predict(image, lang_code):
     if image is None: return None, "Chưa có ảnh.", "No Data"
+    # Chọn OCR Engine
+    current_ocr = ocr_engines.get(lang_code)
+    if not current_ocr:
+        return image, "Lỗi: Model ngôn ngữ chưa được tải hoặc không tồn tại.", ""
+    # Chọn Font tương ứng
+    current_font = FONT_MAP.get(lang_code, FONT_LATIN_PATH)
+    try:
         # Chuẩn bị ảnh đầu vào
         original_pil = image.copy() if isinstance(image, Image.Image) else Image.fromarray(image).copy()
         image_np = np.array(image)
+        # 1. OCR (Dùng engine đã chọn)
         raw_result = current_ocr.ocr(image_np)
         # 2. XỬ LÝ ẢNH ĐỂ VẼ
         target_image_for_drawing = original_pil
         if isinstance(raw_result, list) and len(raw_result) > 0 and isinstance(raw_result[0], dict):
             if 'doc_preprocessor_res' in raw_result[0]:
                 proc_res = raw_result[0]['doc_preprocessor_res']
                     numpy_img = proc_res['output_img']
                     target_image_for_drawing = Image.fromarray(numpy_img)
+        # 3. Vẽ lên ảnh (Truyền đúng font)
+        annotated_image = universal_draw(target_image_for_drawing, raw_result, current_font)
         # 4. Xử lý Text
         all_texts = deep_extract_text(raw_result)
         # Debug Info
         debug_str = str(raw_result)[:1000]
+        debug_info = f"Language: {lang_code}\nFont Used: {current_font}\nUsed Image Source: {'Preprocessed' if target_image_for_drawing != original_pil else 'Original'}\nData Preview:\n{debug_str}..."
         return annotated_image, text_output, debug_info
         import traceback
         return image, f"Lỗi: {str(e)}", traceback.format_exc()
+# --- GIAO DIỆN (CẬP NHẬT) ---
 with gr.Blocks(title="PaddleOCR Multi-Language Overlay") as iface:
+    gr.Markdown("## PaddleOCR - High Precision Overlay (Multi-Language)")
     with gr.Row():
         with gr.Column():
             input_img = gr.Image(type="pil", label="Input Image")
+            # Dropdown chọn ngôn ngữ
             lang_dropdown = gr.Dropdown(
+                choices=["ch", "latin"],
+                value="ch",
+                label="Language / Ngôn ngữ",
+                info="ch: Tiếng Trung | latin: Anh, Việt, Pháp, Đức, Tây Ban Nha..."
             )
             submit_btn = gr.Button("RUN OCR", variant="primary")
     submit_btn.click(
         fn=predict,
+        inputs=[input_img, lang_dropdown],
         outputs=[output_img, output_txt, output_debug]
     )