Spaces:

ranbac
/

PaddleOCR

Sleeping

App Files Files Community

ranbac commited on 22 days ago

Commit

3c3f016

verified ·

1 Parent(s): f633237

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -50

app.py CHANGED Viewed

@@ -17,35 +17,18 @@ import requests
 # Tắt log thừa
 logging.getLogger("ppocr").setLevel(logging.WARNING)
-# --- QUẢN LÝ MODEL ĐA NGÔN NGỮ ---
-# Cache để lưu các model đã load, tránh load lại gây chậm
-OCR_CACHE = {}
-def get_ocr_model(lang_code='ch'):
-    if lang_code in OCR_CACHE:
-        return OCR_CACHE[lang_code]
-    print(f"Đang khởi tạo PaddleOCR (Lang: {lang_code})...")
-    try:
-        # Cấu hình tối ưu (giữ nguyên logic cũ)
-        model = PaddleOCR(use_textline_orientation=True,
-                          use_doc_orientation_classify=False,
-                          use_doc_unwarping=False,
-                          lang=lang_code)
-    except Exception as e:
-        print(f"Lỗi khởi tạo nâng cao cho {lang_code}: {e}. Chuyển về chế độ mặc định.")
-        model = PaddleOCR(lang=lang_code)
-    OCR_CACHE[lang_code] = model
-    print(f"Model {lang_code} đã sẵn sàng!")
-    return model
-# Khởi tạo trước model mặc định (Trung + Việt) để chạy nhanh lần đầu
-print("Pre-loading models...")
-get_ocr_model('ch')
-# get_ocr_model('vi') # Bỏ comment nếu muốn load sẵn tiếng Việt ngay khi bật app
-# --- TẢI FONT (GIỮ NGUYÊN) ---
 def check_and_download_font():
     font_path = "./simfang.ttf"
     if not os.path.exists(font_path):
@@ -60,7 +43,7 @@ def check_and_download_font():
 FONT_PATH = check_and_download_font()
-# --- HÀM VẼ ĐA NĂNG (GIỮ NGUYÊN) ---
 def universal_draw(image, raw_data, font_path):
     if image is None: return image
@@ -91,6 +74,7 @@ def universal_draw(image, raw_data, font_path):
     items_to_draw = []
     # Logic tìm box/text
     processed = False
     if isinstance(raw_data, list) and len(raw_data) > 0 and isinstance(raw_data[0], dict):
         data_dict = raw_data[0]
@@ -141,7 +125,7 @@ def universal_draw(image, raw_data, font_path):
     return canvas
-# --- HÀM XỬ LÝ TEXT (GIỮ NGUYÊN) ---
 def deep_extract_text(data):
     found_texts = []
     if isinstance(data, str):
@@ -159,47 +143,39 @@ def clean_text_result(text_list):
     block_list = ['min', 'max', 'general', 'header', 'footer', 'structure']
     for t in text_list:
         t = t.strip()
-        # Giữ lại logic cũ: nếu < 2 ký tự và không phải chữ Hán thì bỏ.
-        # Tuy nhiên với Tiếng Việt, các từ ngắn vẫn quan trọng, nhưng để "giữ nguyên" logic cũ, ta không sửa dòng này.
-        if len(t) < 2 and not any(u'\u4e00' <= c <= u'\u9fff' for c in t):
-            # Logic cũ ưu tiên tiếng Trung, có thể lọc mất từ tiếng Việt ngắn (ví dụ "a", "à").
-            # Nhưng theo yêu cầu "tuyệt đối giữ nguyên", tôi sẽ không sửa logic lọc này.
-            continue
         if t.lower().endswith(('.ttf', '.json', '.pdparams', '.yml', '.log')): continue
         if t.lower() in block_list: continue
         if not re.search(r'[\w\u4e00-\u9fff]', t): continue
         cleaned.append(t)
     return cleaned
-# --- MAIN PREDICT (CẬP NHẬT LANG) ---
-def predict(image, lang_choice):
     if image is None: return None, "Chưa có ảnh.", "No Data"
     try:
-        # Lấy model đúng theo ngôn ngữ người dùng chọn
-        current_ocr = get_ocr_model(lang_choice)
         # Chuẩn bị ảnh đầu vào
         original_pil = image.copy() if isinstance(image, Image.Image) else Image.fromarray(image).copy()
         image_np = np.array(image)
         # 1. OCR
-        raw_result = current_ocr.ocr(image_np)
         # 2. XỬ LÝ ẢNH ĐỂ VẼ (KEY FIX: Lấy ảnh từ Preprocessor nếu có)
         target_image_for_drawing = original_pil
-        # Kiểm tra xem Paddle có chỉnh sửa ảnh không
         if isinstance(raw_result, list) and len(raw_result) > 0 and isinstance(raw_result[0], dict):
             if 'doc_preprocessor_res' in raw_result[0]:
                 proc_res = raw_result[0]['doc_preprocessor_res']
                 if 'output_img' in proc_res:
                     print("Phát hiện ảnh đã qua xử lý hình học. Đang đồng bộ tọa độ...")
                     numpy_img = proc_res['output_img']
                     target_image_for_drawing = Image.fromarray(numpy_img)
-        # 3. Vẽ lên ảnh ĐÚNG
         annotated_image = universal_draw(target_image_for_drawing, raw_result, FONT_PATH)
         # 4. Xử lý Text
@@ -209,7 +185,7 @@ def predict(image, lang_choice):
         # Debug Info
         debug_str = str(raw_result)[:1000]
-        debug_info = f"Language: {lang_choice}\nUsed Image Source: {'Preprocessed' if target_image_for_drawing != original_pil else 'Original'}\nData Preview:\n{debug_str}..."
         return annotated_image, text_output, debug_info
@@ -217,15 +193,13 @@ def predict(image, lang_choice):
         import traceback
         return image, f"Lỗi: {str(e)}", traceback.format_exc()
-# --- GIAO DIỆN (CẬP NHẬT INPUT) ---
-with gr.Blocks(title="PaddleOCR Multi-Lang Overlay") as iface:
-    gr.Markdown("## PaddleOCR (Chinese/Vietnamese) - High Precision Overlay")
     with gr.Row():
         with gr.Column():
             input_img = gr.Image(type="pil", label="Input Image")
-            # Thêm Dropdown chọn ngôn ngữ
-            lang_select = gr.Dropdown(choices=["ch", "vi", "en"], value="ch", label="Chọn Ngôn ngữ (Language)")
             submit_btn = gr.Button("RUN OCR", variant="primary")
         with gr.Column():
@@ -239,7 +213,7 @@ with gr.Blocks(title="PaddleOCR Multi-Lang Overlay") as iface:
     submit_btn.click(
         fn=predict,
-        inputs=[input_img, lang_select], # Thêm lang_select vào input
         outputs=[output_img, output_txt, output_debug]
     )

 # Tắt log thừa
 logging.getLogger("ppocr").setLevel(logging.WARNING)
+print("Đang khởi tạo PaddleOCR (Coordinate Sync Mode)...")
+try:
+    ocr = PaddleOCR(use_textline_orientation=True, use_doc_orientation_classify=False,
+        use_doc_unwarping=False, lang='ch')
+except Exception as e:
+    print(f"Lỗi khởi tạo: {e}. Chuyển về chế độ mặc định.")
+    ocr = PaddleOCR(lang='ch')
+print("Model đã sẵn sàng!")
+# --- TẢI FONT ---
 def check_and_download_font():
     font_path = "./simfang.ttf"
     if not os.path.exists(font_path):
 FONT_PATH = check_and_download_font()
+# --- HÀM VẼ ĐA NĂNG ---
 def universal_draw(image, raw_data, font_path):
     if image is None: return image
     items_to_draw = []
     # Logic tìm box/text
+    # Ưu tiên cấu trúc PaddleX: rec_texts + dt_polys
     processed = False
     if isinstance(raw_data, list) and len(raw_data) > 0 and isinstance(raw_data[0], dict):
         data_dict = raw_data[0]
     return canvas
+# --- HÀM XỬ LÝ TEXT ---
 def deep_extract_text(data):
     found_texts = []
     if isinstance(data, str):
     block_list = ['min', 'max', 'general', 'header', 'footer', 'structure']
     for t in text_list:
         t = t.strip()
+        if len(t) < 2 and not any(u'\u4e00' <= c <= u'\u9fff' for c in t): continue
         if t.lower().endswith(('.ttf', '.json', '.pdparams', '.yml', '.log')): continue
         if t.lower() in block_list: continue
         if not re.search(r'[\w\u4e00-\u9fff]', t): continue
         cleaned.append(t)
     return cleaned
+# --- MAIN PREDICT ---
+def predict(image):
     if image is None: return None, "Chưa có ảnh.", "No Data"
     try:
         # Chuẩn bị ảnh đầu vào
         original_pil = image.copy() if isinstance(image, Image.Image) else Image.fromarray(image).copy()
         image_np = np.array(image)
         # 1. OCR
+        raw_result = ocr.ocr(image_np)
         # 2. XỬ LÝ ẢNH ĐỂ VẼ (KEY FIX: Lấy ảnh từ Preprocessor nếu có)
         target_image_for_drawing = original_pil
+        # Kiểm tra xem Paddle có chỉnh sửa ảnh không (dựa vào key 'doc_preprocessor_res')
         if isinstance(raw_result, list) and len(raw_result) > 0 and isinstance(raw_result[0], dict):
             if 'doc_preprocessor_res' in raw_result[0]:
                 proc_res = raw_result[0]['doc_preprocessor_res']
+                # Nếu có ảnh đầu ra đã chỉnh sửa (output_img)
                 if 'output_img' in proc_res:
                     print("Phát hiện ảnh đã qua xử lý hình học. Đang đồng bộ tọa độ...")
                     numpy_img = proc_res['output_img']
                     target_image_for_drawing = Image.fromarray(numpy_img)
+        # 3. Vẽ lên ảnh ĐÚNG (Target Image)
         annotated_image = universal_draw(target_image_for_drawing, raw_result, FONT_PATH)
         # 4. Xử lý Text
         # Debug Info
         debug_str = str(raw_result)[:1000]
+        debug_info = f"Used Image Source: {'Preprocessed' if target_image_for_drawing != original_pil else 'Original'}\nData Preview:\n{debug_str}..."
         return annotated_image, text_output, debug_info
         import traceback
         return image, f"Lỗi: {str(e)}", traceback.format_exc()
+# --- GIAO DIỆN ---
+with gr.Blocks(title="PaddleOCR Perfect Overlay") as iface:
+    gr.Markdown("## PaddleOCR Chinese - High Precision Overlay")
     with gr.Row():
         with gr.Column():
             input_img = gr.Image(type="pil", label="Input Image")
             submit_btn = gr.Button("RUN OCR", variant="primary")
         with gr.Column():
     submit_btn.click(
         fn=predict,
+        inputs=input_img,
         outputs=[output_img, output_txt, output_debug]
     )