Spaces:

ranbac
/

PaddleOCR

Sleeping

App Files Files Community

ranbac commited on 24 days ago

Commit

f633237

verified ·

1 Parent(s): 02a63b3

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -34

app.py CHANGED Viewed

@@ -17,39 +17,50 @@ import requests
 # Tắt log thừa
 logging.getLogger("ppocr").setLevel(logging.WARNING)
-print("Đang khởi tạo PaddleOCR (Coordinate Sync Mode) - Ngôn ngữ: Tiếng Việt...")
-try:
-    # THAY ĐỔI 1: Chuyển lang='ch' thành lang='vi'
-    ocr = PaddleOCR(use_textline_orientation=True, use_doc_orientation_classify=False,
-        use_doc_unwarping=False, lang='vi')
-except Exception as e:
-    print(f"Lỗi khởi tạo: {e}. Chuyển về chế độ mặc định.")
-    ocr = PaddleOCR(lang='vi')
-print("Model đã sẵn sàng!")
-# --- TẢI FONT (HỖ TRỢ TIẾNG VIỆT) ---
 def check_and_download_font():
-    # THAY ĐỔI 2: Sử dụng font Roboto để hiển thị đúng dấu Tiếng Việt
-    font_path = "./Roboto-Regular.ttf"
     if not os.path.exists(font_path):
         try:
-            print("Đang tải font Roboto hỗ trợ tiếng Việt...")
-            # URL Font Roboto chuẩn từ Google Fonts
-            url = "https://github.com/google/fonts/raw/main/apache/roboto/Roboto-Regular.ttf"
             r = requests.get(url, allow_redirects=True)
             with open(font_path, 'wb') as f:
                 f.write(r.content)
-            print("Đã tải xong font.")
         except:
-            print("Không tải được font. Sẽ sử dụng font mặc định hệ thống (có thể lỗi dấu).")
             return None
     return font_path
 FONT_PATH = check_and_download_font()
-# --- HÀM VẼ ĐA NĂNG ---
 def universal_draw(image, raw_data, font_path):
     if image is None: return image
@@ -80,7 +91,6 @@ def universal_draw(image, raw_data, font_path):
     items_to_draw = []
     # Logic tìm box/text
-    # Ưu tiên cấu trúc PaddleX: rec_texts + dt_polys
     processed = False
     if isinstance(raw_data, list) and len(raw_data) > 0 and isinstance(raw_data[0], dict):
         data_dict = raw_data[0]
@@ -131,7 +141,7 @@ def universal_draw(image, raw_data, font_path):
     return canvas
-# --- HÀM XỬ LÝ TEXT ---
 def deep_extract_text(data):
     found_texts = []
     if isinstance(data, str):
@@ -149,40 +159,47 @@ def clean_text_result(text_list):
     block_list = ['min', 'max', 'general', 'header', 'footer', 'structure']
     for t in text_list:
         t = t.strip()
-        # Giữ lại nếu là ký tự Unicode thông thường (bao gồm tiếng Việt)
-        if len(t) < 2 and not re.search(r'\w', t): continue
         if t.lower().endswith(('.ttf', '.json', '.pdparams', '.yml', '.log')): continue
         if t.lower() in block_list: continue
-        if not re.search(r'[\w\u00C0-\u1EF9]', t): continue # Regex mở rộng cho tiếng Việt
         cleaned.append(t)
     return cleaned
-# --- MAIN PREDICT ---
-def predict(image):
     if image is None: return None, "Chưa có ảnh.", "No Data"
     try:
         # Chuẩn bị ảnh đầu vào
         original_pil = image.copy() if isinstance(image, Image.Image) else Image.fromarray(image).copy()
         image_np = np.array(image)
         # 1. OCR
-        raw_result = ocr.ocr(image_np)
         # 2. XỬ LÝ ẢNH ĐỂ VẼ (KEY FIX: Lấy ảnh từ Preprocessor nếu có)
         target_image_for_drawing = original_pil
-        # Kiểm tra xem Paddle có chỉnh sửa ảnh không (dựa vào key 'doc_preprocessor_res')
         if isinstance(raw_result, list) and len(raw_result) > 0 and isinstance(raw_result[0], dict):
             if 'doc_preprocessor_res' in raw_result[0]:
                 proc_res = raw_result[0]['doc_preprocessor_res']
-                # Nếu có ảnh đầu ra đã chỉnh sửa (output_img)
                 if 'output_img' in proc_res:
                     print("Phát hiện ảnh đã qua xử lý hình học. Đang đồng bộ tọa độ...")
                     numpy_img = proc_res['output_img']
                     target_image_for_drawing = Image.fromarray(numpy_img)
-        # 3. Vẽ lên ảnh ĐÚNG (Target Image)
         annotated_image = universal_draw(target_image_for_drawing, raw_result, FONT_PATH)
         # 4. Xử lý Text
@@ -192,7 +209,7 @@ def predict(image):
         # Debug Info
         debug_str = str(raw_result)[:1000]
-        debug_info = f"Used Image Source: {'Preprocessed' if target_image_for_drawing != original_pil else 'Original'}\nData Preview:\n{debug_str}..."
         return annotated_image, text_output, debug_info
@@ -200,13 +217,15 @@ def predict(image):
         import traceback
         return image, f"Lỗi: {str(e)}", traceback.format_exc()
-# --- GIAO DIỆN ---
-with gr.Blocks(title="PaddleOCR Perfect Overlay (Vietnamese)") as iface:
-    gr.Markdown("## PaddleOCR Vietnamese - High Precision Overlay")
     with gr.Row():
         with gr.Column():
             input_img = gr.Image(type="pil", label="Input Image")
             submit_btn = gr.Button("RUN OCR", variant="primary")
         with gr.Column():
@@ -220,7 +239,7 @@ with gr.Blocks(title="PaddleOCR Perfect Overlay (Vietnamese)") as iface:
     submit_btn.click(
         fn=predict,
-        inputs=input_img,
         outputs=[output_img, output_txt, output_debug]
     )

 # Tắt log thừa
 logging.getLogger("ppocr").setLevel(logging.WARNING)
+# --- QUẢN LÝ MODEL ĐA NGÔN NGỮ ---
+# Cache để lưu các model đã load, tránh load lại gây chậm
+OCR_CACHE = {}
+def get_ocr_model(lang_code='ch'):
+    if lang_code in OCR_CACHE:
+        return OCR_CACHE[lang_code]
+    print(f"Đang khởi tạo PaddleOCR (Lang: {lang_code})...")
+    try:
+        # Cấu hình tối ưu (giữ nguyên logic cũ)
+        model = PaddleOCR(use_textline_orientation=True,
+                          use_doc_orientation_classify=False,
+                          use_doc_unwarping=False,
+                          lang=lang_code)
+    except Exception as e:
+        print(f"Lỗi khởi tạo nâng cao cho {lang_code}: {e}. Chuyển về chế độ mặc định.")
+        model = PaddleOCR(lang=lang_code)
+    OCR_CACHE[lang_code] = model
+    print(f"Model {lang_code} đã sẵn sàng!")
+    return model
+# Khởi tạo trước model mặc định (Trung + Việt) để chạy nhanh lần đầu
+print("Pre-loading models...")
+get_ocr_model('ch')
+# get_ocr_model('vi') # Bỏ comment nếu muốn load sẵn tiếng Việt ngay khi bật app
+# --- TẢI FONT (GIỮ NGUYÊN) ---
 def check_and_download_font():
+    font_path = "./simfang.ttf"
     if not os.path.exists(font_path):
         try:
+            url = "https://github.com/StellarCN/scp_zh/raw/master/fonts/SimFang.ttf"
             r = requests.get(url, allow_redirects=True)
             with open(font_path, 'wb') as f:
                 f.write(r.content)
         except:
             return None
     return font_path
 FONT_PATH = check_and_download_font()
+# --- HÀM VẼ ĐA NĂNG (GIỮ NGUYÊN) ---
 def universal_draw(image, raw_data, font_path):
     if image is None: return image
     items_to_draw = []
     # Logic tìm box/text
     processed = False
     if isinstance(raw_data, list) and len(raw_data) > 0 and isinstance(raw_data[0], dict):
         data_dict = raw_data[0]
     return canvas
+# --- HÀM XỬ LÝ TEXT (GIỮ NGUYÊN) ---
 def deep_extract_text(data):
     found_texts = []
     if isinstance(data, str):
     block_list = ['min', 'max', 'general', 'header', 'footer', 'structure']
     for t in text_list:
         t = t.strip()
+        # Giữ lại logic cũ: nếu < 2 ký tự và không phải chữ Hán thì bỏ.
+        # Tuy nhiên với Tiếng Việt, các từ ngắn vẫn quan trọng, nhưng để "giữ nguyên" logic cũ, ta không sửa dòng này.
+        if len(t) < 2 and not any(u'\u4e00' <= c <= u'\u9fff' for c in t):
+            # Logic cũ ưu tiên tiếng Trung, có thể lọc mất từ tiếng Việt ngắn (ví dụ "a", "à").
+            # Nhưng theo yêu cầu "tuyệt đối giữ nguyên", tôi sẽ không sửa logic lọc này.
+            continue
         if t.lower().endswith(('.ttf', '.json', '.pdparams', '.yml', '.log')): continue
         if t.lower() in block_list: continue
+        if not re.search(r'[\w\u4e00-\u9fff]', t): continue
         cleaned.append(t)
     return cleaned
+# --- MAIN PREDICT (CẬP NHẬT LANG) ---
+def predict(image, lang_choice):
     if image is None: return None, "Chưa có ảnh.", "No Data"
     try:
+        # Lấy model đúng theo ngôn ngữ người dùng chọn
+        current_ocr = get_ocr_model(lang_choice)
         # Chuẩn bị ảnh đầu vào
         original_pil = image.copy() if isinstance(image, Image.Image) else Image.fromarray(image).copy()
         image_np = np.array(image)
         # 1. OCR
+        raw_result = current_ocr.ocr(image_np)
         # 2. XỬ LÝ ẢNH ĐỂ VẼ (KEY FIX: Lấy ảnh từ Preprocessor nếu có)
         target_image_for_drawing = original_pil
+        # Kiểm tra xem Paddle có chỉnh sửa ảnh không
         if isinstance(raw_result, list) and len(raw_result) > 0 and isinstance(raw_result[0], dict):
             if 'doc_preprocessor_res' in raw_result[0]:
                 proc_res = raw_result[0]['doc_preprocessor_res']
                 if 'output_img' in proc_res:
                     print("Phát hiện ảnh đã qua xử lý hình học. Đang đồng bộ tọa độ...")
                     numpy_img = proc_res['output_img']
                     target_image_for_drawing = Image.fromarray(numpy_img)
+        # 3. Vẽ lên ảnh ĐÚNG
         annotated_image = universal_draw(target_image_for_drawing, raw_result, FONT_PATH)
         # 4. Xử lý Text
         # Debug Info
         debug_str = str(raw_result)[:1000]
+        debug_info = f"Language: {lang_choice}\nUsed Image Source: {'Preprocessed' if target_image_for_drawing != original_pil else 'Original'}\nData Preview:\n{debug_str}..."
         return annotated_image, text_output, debug_info
         import traceback
         return image, f"Lỗi: {str(e)}", traceback.format_exc()
+# --- GIAO DIỆN (CẬP NHẬT INPUT) ---
+with gr.Blocks(title="PaddleOCR Multi-Lang Overlay") as iface:
+    gr.Markdown("## PaddleOCR (Chinese/Vietnamese) - High Precision Overlay")
     with gr.Row():
         with gr.Column():
             input_img = gr.Image(type="pil", label="Input Image")
+            # Thêm Dropdown chọn ngôn ngữ
+            lang_select = gr.Dropdown(choices=["ch", "vi", "en"], value="ch", label="Chọn Ngôn ngữ (Language)")
             submit_btn = gr.Button("RUN OCR", variant="primary")
         with gr.Column():
     submit_btn.click(
         fn=predict,
+        inputs=[input_img, lang_select], # Thêm lang_select vào input
         outputs=[output_img, output_txt, output_debug]
     )