Spaces:

ranbac
/

PaddleOCR

Running

App Files Files Community

ranbac commited on 22 days ago

Commit

706aa67

verified ·

1 Parent(s): 4db1152

Update app.py

Browse files

Files changed (1) hide show

app.py +173 -183

app.py CHANGED Viewed

@@ -1,38 +1,25 @@
 import os
-# --- CẤU HÌNH HỆ THỐNG ---
-os.environ["FLAGS_use_mkldnn"] = "1"
-os.environ["FLAGS_enable_mkldnn"] = "1"
-os.environ["DN_ENABLE_MKLDNN"] = "0"
-os.environ["CPP_MIN_LOG_LEVEL"] = "3"
-import logging
-import re
 import gradio as gr
-from paddleocr import PaddleOCR
 from PIL import Image, ImageDraw, ImageFont
-import numpy as np
-import requests
-# Tắt log thừa
-logging.getLogger("ppocr").setLevel(logging.WARNING)
-print("Đang khởi tạo PaddleOCR (Coordinate Sync Mode)...")
-try:
-    ocr = PaddleOCR(use_textline_orientation=True, use_doc_orientation_classify=False,
-        use_doc_unwarping=False, lang='ch')
-except Exception as e:
-    print(f"Lỗi khởi tạo: {e}. Chuyển về chế độ mặc định.")
-    ocr = PaddleOCR(lang='ch')
-print("Model đã sẵn sàng!")
-# --- TẢI FONT ---
 def check_and_download_font():
     font_path = "./simfang.ttf"
     if not os.path.exists(font_path):
         try:
             url = "https://github.com/StellarCN/scp_zh/raw/master/fonts/SimFang.ttf"
             r = requests.get(url, allow_redirects=True)
             with open(font_path, 'wb') as f:
@@ -43,179 +30,182 @@ def check_and_download_font():
 FONT_PATH = check_and_download_font()
-# --- HÀM VẼ ĐA NĂNG ---
-def universal_draw(image, raw_data, font_path):
-    if image is None: return image
-    # Đảm bảo image là PIL
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
-    # Copy để vẽ
-    canvas = image.copy()
-    draw = ImageDraw.Draw(canvas)
     try:
-        font_size = 24
-        font = ImageFont.truetype(font_path, font_size) if font_path else ImageFont.load_default()
     except:
         font = ImageFont.load_default()
-    # Hàm parse box
-    def parse_box(b):
-        try:
-            if hasattr(b, 'tolist'): b = b.tolist()
-            if len(b) > 0 and isinstance(b[0], list): return [tuple(p) for p in b]
-            if len(b) == 4 and isinstance(b[0], (int, float)):
-                 return [(b[0], b[1]), (b[2], b[1]), (b[2], b[3]), (b[0], b[3])]
-            return None
-        except: return None
-    items_to_draw = []
-    # Logic tìm box/text
-    # Ưu tiên cấu trúc PaddleX: rec_texts + dt_polys
-    processed = False
-    if isinstance(raw_data, list) and len(raw_data) > 0 and isinstance(raw_data[0], dict):
-        data_dict = raw_data[0]
-        texts = data_dict.get('rec_texts')
-        boxes = data_dict.get('dt_polys', data_dict.get('rec_polys', data_dict.get('dt_boxes')))
-        if texts and boxes and isinstance(texts, list) and isinstance(boxes, list):
-            for i in range(min(len(texts), len(boxes))):
-                txt = texts[i]
-                box = parse_box(boxes[i])
-                if box and txt: items_to_draw.append((box, txt))
-            processed = True
-    # Fallback Logic
-    if not processed:
-        def hunt(data):
-            if isinstance(data, dict):
-                box = None; text = None
-                for k in ['points', 'box', 'dt_boxes', 'poly']:
-                    if k in data: box = parse_box(data[k]); break
-                for k in ['transcription', 'text', 'rec_text', 'label']:
-                    if k in data: text = data[k]; break
-                if box and text: items_to_draw.append((box, text)); return
-                for v in data.values(): hunt(v)
-            elif isinstance(data, (list, tuple)):
-                if len(data) == 2 and isinstance(data[0], list) and len(data[0]) == 4:
-                    box = parse_box(data[0])
-                    txt_obj = data[1]
-                    text = txt_obj[0] if isinstance(txt_obj, (list, tuple)) else txt_obj
-                    if box and isinstance(text, str): items_to_draw.append((box, text)); return
-                for item in data: hunt(item)
-        hunt(raw_data)
-    # Vẽ
-    for box, txt in items_to_draw:
-        try:
-            # Vẽ khung đỏ
-            draw.polygon(box, outline="red", width=3)
             # Vẽ chữ
-            txt_x, txt_y = box[0]
-            if hasattr(draw, "textbbox"):
-                text_bbox = draw.textbbox((txt_x, txt_y), txt, font=font, anchor="lb")
-                draw.rectangle(text_bbox, fill="red")
-                draw.text((txt_x, txt_y), txt, fill="white", font=font, anchor="lb")
-            else:
-                draw.text((txt_x, txt_y - font_size), txt, fill="white", font=font)
-        except: continue
-    return canvas
-# --- HÀM XỬ LÝ TEXT ---
-def deep_extract_text(data):
-    found_texts = []
-    if isinstance(data, str):
-        if len(data.strip()) > 0: return [data]
-        return []
-    if isinstance(data, (list, tuple)):
-        for item in data: found_texts.extend(deep_extract_text(item))
-    elif isinstance(data, dict):
-        for val in data.values(): found_texts.extend(deep_extract_text(val))
-    elif hasattr(data, '__dict__'): found_texts.extend(deep_extract_text(data.__dict__))
-    return found_texts
-def clean_text_result(text_list):
-    cleaned = []
-    block_list = ['min', 'max', 'general', 'header', 'footer', 'structure']
-    for t in text_list:
-        t = t.strip()
-        if len(t) < 2 and not any(u'\u4e00' <= c <= u'\u9fff' for c in t): continue
-        if t.lower().endswith(('.ttf', '.json', '.pdparams', '.yml', '.log')): continue
-        if t.lower() in block_list: continue
-        if not re.search(r'[\w\u4e00-\u9fff]', t): continue
-        cleaned.append(t)
-    return cleaned
-# --- MAIN PREDICT ---
-def predict(image):
-    if image is None: return None, "Chưa có ảnh.", "No Data"
-    try:
-        # Chuẩn bị ảnh đầu vào
-        original_pil = image.copy() if isinstance(image, Image.Image) else Image.fromarray(image).copy()
-        image_np = np.array(image)
-        # 1. OCR
-        raw_result = ocr.ocr(image_np)
-        # 2. XỬ LÝ ẢNH ĐỂ VẼ (KEY FIX: Lấy ảnh từ Preprocessor nếu có)
-        target_image_for_drawing = original_pil
-        # Kiểm tra xem Paddle có chỉnh sửa ảnh không (dựa vào key 'doc_preprocessor_res')
-        if isinstance(raw_result, list) and len(raw_result) > 0 and isinstance(raw_result[0], dict):
-            if 'doc_preprocessor_res' in raw_result[0]:
-                proc_res = raw_result[0]['doc_preprocessor_res']
-                # Nếu có ảnh đầu ra đã chỉnh sửa (output_img)
-                if 'output_img' in proc_res:
-                    print("Phát hiện ảnh đã qua xử lý hình học. Đang đồng bộ tọa độ...")
-                    numpy_img = proc_res['output_img']
-                    target_image_for_drawing = Image.fromarray(numpy_img)
-        # 3. Vẽ lên ảnh ĐÚNG (Target Image)
-        annotated_image = universal_draw(target_image_for_drawing, raw_result, FONT_PATH)
-        # 4. Xử lý Text
-        all_texts = deep_extract_text(raw_result)
-        final_texts = clean_text_result(all_texts)
-        text_output = "\n".join(final_texts) if final_texts else "Không tìm thấy văn bản."
-        # Debug Info
-        debug_str = str(raw_result)[:1000]
-        debug_info = f"Used Image Source: {'Preprocessed' if target_image_for_drawing != original_pil else 'Original'}\nData Preview:\n{debug_str}..."
-        return annotated_image, text_output, debug_info
-    except Exception as e:
-        import traceback
-        return image, f"Lỗi: {str(e)}", traceback.format_exc()
-# --- GIAO DIỆN ---
-with gr.Blocks(title="PaddleOCR Perfect Overlay") as iface:
-    gr.Markdown("## PaddleOCR Chinese - High Precision Overlay")
     with gr.Row():
-        with gr.Column():
-            input_img = gr.Image(type="pil", label="Input Image")
-            submit_btn = gr.Button("RUN OCR", variant="primary")
-        with gr.Column():
             with gr.Tabs():
-                with gr.TabItem("🖼️ Kết quả Khớp Tọa Độ"):
-                    output_img = gr.Image(type="pil", label="Overlay Result")
-                with gr.TabItem("📝 Văn bản"):
-                    output_txt = gr.Textbox(label="Text Content", lines=15)
-                with gr.TabItem("🐞 Debug"):
-                    output_debug = gr.Textbox(label="Debug Info", lines=15)
     submit_btn.click(
-        fn=predict,
-        inputs=input_img,
-        outputs=[output_img, output_txt, output_debug]
     )
 if __name__ == "__main__":
-    iface.launch(server_name="0.0.0.0", server_port=7860)

 import os
+import cv2
+import json
+import tarfile
+import requests
+import numpy as np
 import gradio as gr
 from PIL import Image, ImageDraw, ImageFont
+from paddleocr import PaddleOCR
+# ==========================================
+# 1. CẤU HÌNH & TẢI MODEL (SERVER VERSION)
+# ==========================================
+os.environ["FLAGS_use_mkldnn"] = "0"
+os.environ["CPP_MIN_LOG_LEVEL"] = "3"
+# Hàm tải font chữ Trung Quốc (Giữ lại từ code cũ của bạn vì rất tốt)
 def check_and_download_font():
     font_path = "./simfang.ttf"
     if not os.path.exists(font_path):
         try:
+            print("Đang tải font SimFang...")
             url = "https://github.com/StellarCN/scp_zh/raw/master/fonts/SimFang.ttf"
             r = requests.get(url, allow_redirects=True)
             with open(font_path, 'wb') as f:
 FONT_PATH = check_and_download_font()
+# Hàm tải Model Server (Độ chính xác cao)
+def download_model_server(save_dir="./server_models"):
+    urls = {
+        "det": "https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_det_server_infer.tar",
+        "rec": "https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_rec_server_infer.tar",
+        "cls": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar"
+    }
+    paths = {}
+    if not os.path.exists(save_dir): os.makedirs(save_dir)
+    for key, url in urls.items():
+        filename = url.split("/")[-1]
+        extract_name = filename.replace('.tar', '')
+        full_path = os.path.join(save_dir, extract_name)
+        if not os.path.exists(full_path):
+            print(f"Đang tải model {key.upper()} High-Accuracy...")
+            tar_path = os.path.join(save_dir, filename)
+            r = requests.get(url, stream=True)
+            with open(tar_path, 'wb') as f:
+                for chunk in r.iter_content(chunk_size=1024):
+                    if chunk: f.write(chunk)
+            with tarfile.open(tar_path) as tar:
+                tar.extractall(path=save_dir)
+            os.remove(tar_path)
+        paths[key] = full_path
+    return paths
+# Khởi tạo OCR
+print("Đang khởi tạo PaddleOCR Server Mode...")
+try:
+    models = download_model_server()
+    ocr = PaddleOCR(
+        use_angle_cls=True, lang='ch',
+        det_model_dir=models['det'],
+        rec_model_dir=models['rec'],
+        cls_model_dir=models['cls'],
+        use_textline_orientation=True
+    )
+    print("Model Server đã sẵn sàng!")
+except Exception as e:
+    print(f"Lỗi tải model server: {e}. Dùng Mobile model.")
+    ocr = PaddleOCR(use_angle_cls=True, lang='ch')
+# ==========================================
+# 2. XỬ LÝ HÌNH ẢNH & KẾT QUẢ
+# ==========================================
+def draw_results(image, result, font_path):
+    # Convert sang PIL để vẽ đẹp hơn
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
+    draw = ImageDraw.Draw(image)
     try:
+        font = ImageFont.truetype(font_path, 20) if font_path else ImageFont.load_default()
     except:
         font = ImageFont.load_default()
+    if result and result[0]:
+        for line in result[0]:
+            box = np.array(line[0]).astype(np.int32)
+            txt = line[1][0]
+            conf = line[1][1]
+            # Vẽ box
+            tuples = [tuple(p) for p in box]
+            draw.polygon(tuples, outline="red", width=2)
+            # Vẽ nền chữ
+            txt_pos = (box[0][0], box[0][1] - 25)
+            bbox = draw.textbbox(txt_pos, f"{txt} ({conf:.2f})", font=font)
+            draw.rectangle(bbox, fill="red")
             # Vẽ chữ
+            draw.text(txt_pos, txt, fill="white", font=font)
+    return image
+def format_output(result):
+    """Chuyển đổi kết quả sang Markdown và JSON sạch"""
+    if not result or not result[0]:
+        return "Không tìm thấy văn bản.", "[]"
+    # Tạo Markdown
+    md_lines = []
+    json_data = []
+    # Sắp xếp theo trục Y để tạo dòng văn bản tự nhiên
+    boxes = sorted(result[0], key=lambda x: x[0][0][1])
+    for item in boxes:
+        text = item[1][0]
+        conf = float(item[1][1])
+        box = item[0]
+        md_lines.append(f"- **{text}** (Độ tin cậy: {conf:.1%})")
+        json_data.append({
+            "text": text,
+            "confidence": conf,
+            "box": box
+        })
+    md_output = "\n".join(md_lines)
+    json_output = json.dumps(json_data, ensure_ascii=False, indent=2)
+    return md_output, json_output
+# ==========================================
+# 3. HÀM DỰ ĐOÁN CHÍNH
+# ==========================================
+def predict_pipeline(image_file):
+    if image_file is None:
+        return None, "", ""
+    # Đọc ảnh
+    img = np.array(Image.open(image_file).convert('RGB'))
+    # OCR
+    result = ocr.ocr(img, cls=True)
+    # 1. Vẽ Visualization
+    vis_img = draw_results(img.copy(), result, FONT_PATH)
+    # 2. Format dữ liệu
+    md_out, json_out = format_output(result)
+    return vis_img, md_out, json_out
+# ==========================================
+# 4. GIAO DIỆN GRADIO (Custom CSS giống bản Demo)
+# ==========================================
+custom_css = """
+body, .gradio-container { font-family: "Noto Sans SC", sans-serif; }
+.gradio-container { max-width: 1200px !important; margin: auto; }
+.header-area { text-align: center; margin-bottom: 20px; }
+.header-area h1 { margin-bottom: 5px; color: #2d3748; }
+.notice { background: #f0f9ff; border: 1px solid #bae6fd; padding: 10px; border-radius: 8px; color: #0369a1; font-size: 14px; margin-bottom: 15px; }
+"""
+with gr.Blocks(title="PaddleOCR Pro Local", css=custom_css, theme=gr.themes.Soft()) as app:
+    with gr.Column(elem_classes="header-area"):
+        gr.Markdown("# 🇨🇳 PaddleOCR Professional (Local Version)")
+        gr.HTML("<div class='notice'>⚡ Phiên bản Server-Mode: Chạy offline với độ chính xác cao hơn bản Mobile mặc định.</div>")
     with gr.Row():
+        # Cột TRÁI: Input
+        with gr.Column(scale=4):
+            input_image = gr.Image(type="filepath", label="Tải ảnh lên", height=400)
+            submit_btn = gr.Button("🚀 CHẠY NHẬN DIỆN", variant="primary", size="lg")
+            gr.Markdown("### 💡 Ghi chú:")
+            gr.Markdown("- Model sẽ tự động tải phiên bản **Server (High Accuracy)** (~200MB) trong lần chạy đầu.")
+            gr.Markdown("- Hỗ trợ tốt cho tài liệu scan, hóa đơn và văn bản tiếng Trung.")
+        # Cột PHẢI: Output (Tabbed UI)
+        with gr.Column(scale=6):
             with gr.Tabs():
+                with gr.TabItem("🖼️ Trực quan hóa (Visualization)"):
+                    output_image = gr.Image(type="pil", label="Kết quả")
+                with gr.TabItem("📝 Văn bản (Markdown)"):
+                    output_md = gr.Markdown(label="Nội dung trích xuất")
+                with gr.TabItem("📊 Dữ liệu thô (JSON)"):
+                    output_json = gr.Code(language="json", label="Chi tiết tọa độ & Confidence")
+    # Xử lý sự kiện
     submit_btn.click(
+        fn=predict_pipeline,
+        inputs=[input_image],
+        outputs=[output_image, output_md, output_json]
     )
 if __name__ == "__main__":
+    app.launch(server_name="0.0.0.0", server_port=7860)