Spaces:

ranbac
/

PaddleOCR

Running

App Files Files Community

ranbac commited on 22 days ago

Commit

7b9a396

verified ·

1 Parent(s): 755b8bd

Update app.py

Browse files

Files changed (1) hide show

app.py +183 -135

app.py CHANGED Viewed

@@ -1,24 +1,38 @@
 import os
-import cv2
-import json
-import tarfile
-import requests
-import numpy as np
-import gradio as gr
-from PIL import Image, ImageDraw, ImageFont
-from paddleocr import PaddleOCR
-# ==========================================
-# 1. CẤU HÌNH & TẢI MODEL
-# ==========================================
 os.environ["FLAGS_use_mkldnn"] = "0"
 os.environ["CPP_MIN_LOG_LEVEL"] = "3"
 def check_and_download_font():
     font_path = "./simfang.ttf"
     if not os.path.exists(font_path):
         try:
-            print("Đang tải font SimFang...")
             url = "https://github.com/StellarCN/scp_zh/raw/master/fonts/SimFang.ttf"
             r = requests.get(url, allow_redirects=True)
             with open(font_path, 'wb') as f:
@@ -29,145 +43,179 @@ def check_and_download_font():
 FONT_PATH = check_and_download_font()
-def download_model_server(save_dir="./server_models"):
-    urls = {
-        "det": "https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_det_server_infer.tar",
-        "rec": "https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_rec_server_infer.tar",
-        "cls": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar"
-    }
-    paths = {}
-    if not os.path.exists(save_dir): os.makedirs(save_dir)
-    for key, url in urls.items():
-        filename = url.split("/")[-1]
-        extract_name = filename.replace('.tar', '')
-        full_path = os.path.join(save_dir, extract_name)
-        if not os.path.exists(full_path):
-            print(f"Đang tải model {key.upper()} High-Accuracy...")
-            tar_path = os.path.join(save_dir, filename)
-            try:
-                r = requests.get(url, stream=True)
-                with open(tar_path, 'wb') as f:
-                    for chunk in r.iter_content(chunk_size=1024):
-                        if chunk: f.write(chunk)
-                with tarfile.open(tar_path) as tar:
-                    tar.extractall(path=save_dir)
-                os.remove(tar_path)
-            except Exception as e:
-                print(f"Lỗi tải {filename}: {e}")
-        paths[key] = full_path
-    return paths
-print("Đang khởi tạo PaddleOCR...")
-try:
-    models = download_model_server()
-    ocr = PaddleOCR(use_angle_cls=True, lang='ch',
-                   det_model_dir=models.get('det'),
-                   rec_model_dir=models.get('rec'),
-                   cls_model_dir=models.get('cls'),
-                   use_textline_orientation=True)
-    print("Model Server đã sẵn sàng!")
-except:
-    print("Lỗi tải model server. Dùng Mobile model.")
-    ocr = PaddleOCR(use_angle_cls=True, lang='ch')
-# ==========================================
-# 2. XỬ LÝ HÌNH ẢNH & KẾT QUẢ (ĐÃ FIX)
-# ==========================================
-def get_lines_from_result(result):
-    """Hàm phụ trợ để chuẩn hóa đầu ra của PaddleOCR"""
-    if not result: return []
-    # Nếu là list phẳng [Line1, Line2] (cấu trúc mới)
-    if isinstance(result[0], list) and len(result[0]) == 2 and \
-       isinstance(result[0][1], (tuple, list)) and \
-       isinstance(result[0][1][0], str):
-        return result
-    # Nếu là batch [[Line1, Line2]] (cấu trúc cũ)
-    return result[0]
-def draw_results(image, result, font_path):
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
-    draw = ImageDraw.Draw(image)
     try:
-        font = ImageFont.truetype(font_path, 20) if font_path else ImageFont.load_default()
     except:
         font = ImageFont.load_default()
-    lines = get_lines_from_result(result)
-    for line in lines:
         try:
-            box = np.array(line[0]).astype(np.int32)
-            txt = line[1][0]
-            conf = line[1][1]
-            tuples = [tuple(p) for p in box]
-            draw.polygon(tuples, outline="red", width=2)
-            txt_pos = (box[0][0], box[0][1] - 25)
-            bbox = draw.textbbox(txt_pos, f"{txt}", font=font)
-            draw.rectangle(bbox, fill="red")
-            draw.text(txt_pos, txt, fill="white", font=font)
-        except: continue
-    return image
-def format_output(result):
-    lines = get_lines_from_result(result)
-    if not lines: return "Không tìm thấy văn bản.", "[]"
-    md_lines = []
-    json_data = []
-    # Sort top-down
-    try: sorted_lines = sorted(lines, key=lambda x: x[0][0][1])
-    except: sorted_lines = lines
-    for item in sorted_lines:
         try:
-            text = item[1][0]
-            conf = float(item[1][1])
-            box = item[0]
-            md_lines.append(f"- **{text}** ({conf:.1%})")
-            json_data.append({"text": text, "confidence": conf, "box": box})
         except: continue
-    return "\n".join(md_lines), json.dumps(json_data, ensure_ascii=False, indent=2)
-def predict_pipeline(image_file):
-    if image_file is None: return None, "", ""
-    img = np.array(Image.open(image_file).convert('RGB'))
-    # Gọi OCR (cls=True giúp nhận diện chiều văn bản tốt hơn)
-    result = ocr.ocr(img)
-    vis_img = draw_results(img.copy(), result, FONT_PATH)
-    md_out, json_out = format_output(result)
-    return vis_img, md_out, json_out
-# ==========================================
-# 3. GIAO DIỆN GRADIO
-# ==========================================
-custom_css = "body, .gradio-container { font-family: 'Noto Sans SC', sans-serif; }"
-with gr.Blocks(title="PaddleOCR Pro Fixed", css=custom_css, theme=gr.themes.Soft()) as app:
-    gr.Markdown("# 🇨🇳 PaddleOCR Pro (Server Mode - Fixed)")
     with gr.Row():
-        with gr.Column(scale=4):
-            input_image = gr.Image(type="filepath", label="Tải ảnh lên", height=400)
-            submit_btn = gr.Button("CHẠY NHẬN DIỆN", variant="primary")
-        with gr.Column(scale=6):
             with gr.Tabs():
-                with gr.TabItem("Kết quả"):
-                    output_image = gr.Image(type="pil", label="Visualization")
-                with gr.TabItem("Markdown"):
-                    output_md = gr.Markdown()
-                with gr.TabItem("JSON"):
-                    output_json = gr.Code(language="json")
-    submit_btn.click(predict_pipeline, inputs=[input_image], outputs=[output_image, output_md, output_json])
 if __name__ == "__main__":
-    app.launch(server_name="0.0.0.0", server_port=7860)

 import os
+# --- CẤU HÌNH HỆ THỐNG ---
 os.environ["FLAGS_use_mkldnn"] = "0"
+os.environ["FLAGS_enable_mkldnn"] = "0"
+os.environ["DN_ENABLE_MKLDNN"] = "0"
 os.environ["CPP_MIN_LOG_LEVEL"] = "3"
+import logging
+import re
+import gradio as gr
+from paddleocr import PaddleOCR
+from PIL import Image, ImageDraw, ImageFont
+import numpy as np
+import requests
+# Tắt log thừa
+logging.getLogger("ppocr").setLevel(logging.WARNING)
+print("Đang khởi tạo PaddleOCR (Coordinate Sync Mode)...")
+try:
+    ocr = PaddleOCR(use_textline_orientation=True, use_doc_orientation_classify=False,
+        use_doc_unwarping=False, lang='ch')
+except Exception as e:
+    print(f"Lỗi khởi tạo: {e}. Chuyển về chế độ mặc định.")
+    ocr = PaddleOCR(lang='ch')
+print("Model đã sẵn sàng!")
+# --- TẢI FONT ---
 def check_and_download_font():
     font_path = "./simfang.ttf"
     if not os.path.exists(font_path):
         try:
             url = "https://github.com/StellarCN/scp_zh/raw/master/fonts/SimFang.ttf"
             r = requests.get(url, allow_redirects=True)
             with open(font_path, 'wb') as f:
 FONT_PATH = check_and_download_font()
+# --- HÀM VẼ ĐA NĂNG ---
+def universal_draw(image, raw_data, font_path):
+    if image is None: return image
+    # Đảm bảo image là PIL
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
+    # Copy để vẽ
+    canvas = image.copy()
+    draw = ImageDraw.Draw(canvas)
     try:
+        font_size = 24
+        font = ImageFont.truetype(font_path, font_size) if font_path else ImageFont.load_default()
     except:
         font = ImageFont.load_default()
+    # Hàm parse box
+    def parse_box(b):
         try:
+            if hasattr(b, 'tolist'): b = b.tolist()
+            if len(b) > 0 and isinstance(b[0], list): return [tuple(p) for p in b]
+            if len(b) == 4 and isinstance(b[0], (int, float)):
+                 return [(b[0], b[1]), (b[2], b[1]), (b[2], b[3]), (b[0], b[3])]
+            return None
+        except: return None
+    items_to_draw = []
+    # Logic tìm box/text
+    # Ưu tiên cấu trúc PaddleX: rec_texts + dt_polys
+    processed = False
+    if isinstance(raw_data, list) and len(raw_data) > 0 and isinstance(raw_data[0], dict):
+        data_dict = raw_data[0]
+        texts = data_dict.get('rec_texts')
+        boxes = data_dict.get('dt_polys', data_dict.get('rec_polys', data_dict.get('dt_boxes')))
+        if texts and boxes and isinstance(texts, list) and isinstance(boxes, list):
+            for i in range(min(len(texts), len(boxes))):
+                txt = texts[i]
+                box = parse_box(boxes[i])
+                if box and txt: items_to_draw.append((box, txt))
+            processed = True
+    # Fallback Logic
+    if not processed:
+        def hunt(data):
+            if isinstance(data, dict):
+                box = None; text = None
+                for k in ['points', 'box', 'dt_boxes', 'poly']:
+                    if k in data: box = parse_box(data[k]); break
+                for k in ['transcription', 'text', 'rec_text', 'label']:
+                    if k in data: text = data[k]; break
+                if box and text: items_to_draw.append((box, text)); return
+                for v in data.values(): hunt(v)
+            elif isinstance(data, (list, tuple)):
+                if len(data) == 2 and isinstance(data[0], list) and len(data[0]) == 4:
+                    box = parse_box(data[0])
+                    txt_obj = data[1]
+                    text = txt_obj[0] if isinstance(txt_obj, (list, tuple)) else txt_obj
+                    if box and isinstance(text, str): items_to_draw.append((box, text)); return
+                for item in data: hunt(item)
+        hunt(raw_data)
+    # Vẽ
+    for box, txt in items_to_draw:
         try:
+            # Vẽ khung đỏ
+            draw.polygon(box, outline="red", width=3)
+            # Vẽ chữ
+            txt_x, txt_y = box[0]
+            if hasattr(draw, "textbbox"):
+                text_bbox = draw.textbbox((txt_x, txt_y), txt, font=font, anchor="lb")
+                draw.rectangle(text_bbox, fill="red")
+                draw.text((txt_x, txt_y), txt, fill="white", font=font, anchor="lb")
+            else:
+                draw.text((txt_x, txt_y - font_size), txt, fill="white", font=font)
         except: continue
+    return canvas
+# --- HÀM XỬ LÝ TEXT ---
+def deep_extract_text(data):
+    found_texts = []
+    if isinstance(data, str):
+        if len(data.strip()) > 0: return [data]
+        return []
+    if isinstance(data, (list, tuple)):
+        for item in data: found_texts.extend(deep_extract_text(item))
+    elif isinstance(data, dict):
+        for val in data.values(): found_texts.extend(deep_extract_text(val))
+    elif hasattr(data, '__dict__'): found_texts.extend(deep_extract_text(data.__dict__))
+    return found_texts
+def clean_text_result(text_list):
+    cleaned = []
+    block_list = ['min', 'max', 'general', 'header', 'footer', 'structure']
+    for t in text_list:
+        t = t.strip()
+        if len(t) < 2 and not any(u'\u4e00' <= c <= u'\u9fff' for c in t): continue
+        if t.lower().endswith(('.ttf', '.json', '.pdparams', '.yml', '.log')): continue
+        if t.lower() in block_list: continue
+        if not re.search(r'[\w\u4e00-\u9fff]', t): continue
+        cleaned.append(t)
+    return cleaned
+# --- MAIN PREDICT ---
+def predict(image):
+    if image is None: return None, "Chưa có ảnh.", "No Data"
+    try:
+        # Chuẩn bị ảnh đầu vào
+        original_pil = image.copy() if isinstance(image, Image.Image) else Image.fromarray(image).copy()
+        image_np = np.array(image)
+        # 1. OCR
+        raw_result = ocr.ocr(image_np)
+        # 2. XỬ LÝ ẢNH ĐỂ VẼ (KEY FIX: Lấy ảnh từ Preprocessor nếu có)
+        target_image_for_drawing = original_pil
+        # Kiểm tra xem Paddle có chỉnh sửa ảnh không (dựa vào key 'doc_preprocessor_res')
+        if isinstance(raw_result, list) and len(raw_result) > 0 and isinstance(raw_result[0], dict):
+            if 'doc_preprocessor_res' in raw_result[0]:
+                proc_res = raw_result[0]['doc_preprocessor_res']
+                # Nếu có ảnh đầu ra đã chỉnh sửa (output_img)
+                if 'output_img' in proc_res:
+                    print("Phát hiện ảnh đã qua xử lý hình học. Đang đồng bộ tọa độ...")
+                    numpy_img = proc_res['output_img']
+                    target_image_for_drawing = Image.fromarray(numpy_img)
+        # 3. Vẽ lên ảnh ĐÚNG (Target Image)
+        annotated_image = universal_draw(target_image_for_drawing, raw_result, FONT_PATH)
+        # 4. Xử lý Text
+        all_texts = deep_extract_text(raw_result)
+        final_texts = clean_text_result(all_texts)
+        text_output = "\n".join(final_texts) if final_texts else "Không tìm thấy văn bản."
+        # Debug Info
+        debug_str = str(raw_result)[:1000]
+        debug_info = f"Used Image Source: {'Preprocessed' if target_image_for_drawing != original_pil else 'Original'}\nData Preview:\n{debug_str}..."
+        return annotated_image, text_output, debug_info
+    except Exception as e:
+        import traceback
+        return image, f"Lỗi: {str(e)}", traceback.format_exc()
+# --- GIAO DIỆN ---
+with gr.Blocks(title="PaddleOCR Perfect Overlay") as iface:
+    gr.Markdown("## PaddleOCR Chinese - High Precision Overlay")
     with gr.Row():
+        with gr.Column():
+            input_img = gr.Image(type="pil", label="Input Image")
+            submit_btn = gr.Button("RUN OCR", variant="primary")
+        with gr.Column():
             with gr.Tabs():
+                with gr.TabItem("🖼️ Kết quả Khớp Tọa Độ"):
+                    output_img = gr.Image(type="pil", label="Overlay Result")
+                with gr.TabItem("📝 Văn bản"):
+                    output_txt = gr.Textbox(label="Text Content", lines=15)
+                with gr.TabItem("🐞 Debug"):
+                    output_debug = gr.Textbox(label="Debug Info", lines=15)
+    submit_btn.click(
+        fn=predict,
+        inputs=input_img,
+        outputs=[output_img, output_txt, output_debug]
+    )
 if __name__ == "__main__":
+    iface.launch(server_name="0.0.0.0", server_port=7860)