Spaces:

ranbac
/

PaddleOCR

Running

App Files Files Community

ranbac commited on 22 days ago

Commit

fd2f280

verified ·

1 Parent(s): 6a65705

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -108

app.py CHANGED Viewed

@@ -9,12 +9,11 @@ from PIL import Image, ImageDraw, ImageFont
 from paddleocr import PaddleOCR
 # ==========================================
-# 1. CẤU HÌNH & TẢI MODEL (SERVER VERSION)
 # ==========================================
 os.environ["FLAGS_use_mkldnn"] = "0"
 os.environ["CPP_MIN_LOG_LEVEL"] = "3"
-# Hàm tải font chữ Trung Quốc (Giữ lại từ code cũ của bạn vì rất tốt)
 def check_and_download_font():
     font_path = "./simfang.ttf"
     if not os.path.exists(font_path):
@@ -30,14 +29,12 @@ def check_and_download_font():
 FONT_PATH = check_and_download_font()
-# Hàm tải Model Server (Độ chính xác cao)
 def download_model_server(save_dir="./server_models"):
     urls = {
         "det": "https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_det_server_infer.tar",
         "rec": "https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_rec_server_infer.tar",
         "cls": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar"
     }
     paths = {}
     if not os.path.exists(save_dir): os.makedirs(save_dir)
@@ -45,42 +42,51 @@ def download_model_server(save_dir="./server_models"):
         filename = url.split("/")[-1]
         extract_name = filename.replace('.tar', '')
         full_path = os.path.join(save_dir, extract_name)
         if not os.path.exists(full_path):
             print(f"Đang tải model {key.upper()} High-Accuracy...")
             tar_path = os.path.join(save_dir, filename)
-            r = requests.get(url, stream=True)
-            with open(tar_path, 'wb') as f:
-                for chunk in r.iter_content(chunk_size=1024):
-                    if chunk: f.write(chunk)
-            with tarfile.open(tar_path) as tar:
-                tar.extractall(path=save_dir)
-            os.remove(tar_path)
         paths[key] = full_path
     return paths
-# Khởi tạo OCR
-print("Đang khởi tạo PaddleOCR Server Mode...")
 try:
     models = download_model_server()
-    ocr = PaddleOCR(
-        use_angle_cls=True, lang='ch',
-        det_model_dir=models['det'],
-        rec_model_dir=models['rec'],
-        cls_model_dir=models['cls'],
-        use_textline_orientation=True
-    )
     print("Model Server đã sẵn sàng!")
-except Exception as e:
-    print(f"Lỗi tải model server: {e}. Dùng Mobile model.")
     ocr = PaddleOCR(use_angle_cls=True, lang='ch')
 # ==========================================
-# 2. XỬ LÝ HÌNH ẢNH & KẾT QUẢ
 # ==========================================
 def draw_results(image, result, font_path):
-    # Convert sang PIL để vẽ đẹp hơn
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
     draw = ImageDraw.Draw(image)
@@ -90,122 +96,78 @@ def draw_results(image, result, font_path):
     except:
         font = ImageFont.load_default()
-    if result and result[0]:
-        for line in result[0]:
             box = np.array(line[0]).astype(np.int32)
             txt = line[1][0]
             conf = line[1][1]
-            # Vẽ box
             tuples = [tuple(p) for p in box]
             draw.polygon(tuples, outline="red", width=2)
-            # Vẽ nền chữ
             txt_pos = (box[0][0], box[0][1] - 25)
-            bbox = draw.textbbox(txt_pos, f"{txt} ({conf:.2f})", font=font)
             draw.rectangle(bbox, fill="red")
-            # Vẽ chữ
             draw.text(txt_pos, txt, fill="white", font=font)
     return image
 def format_output(result):
-    """Chuyển đổi kết quả sang Markdown và JSON sạch"""
-    if not result or not result[0]:
-        return "Không tìm thấy văn bản.", "[]"
-    # Tạo Markdown
     md_lines = []
     json_data = []
-    # Sắp xếp theo trục Y để tạo dòng văn bản tự nhiên
-    boxes = sorted(result[0], key=lambda x: x[0][0][1])
-    for item in boxes:
-        text = item[1][0]
-        conf = float(item[1][1])
-        box = item[0]
-        md_lines.append(f"- **{text}** (Độ tin cậy: {conf:.1%})")
-        json_data.append({
-            "text": text,
-            "confidence": conf,
-            "box": box
-        })
-    md_output = "\n".join(md_lines)
-    json_output = json.dumps(json_data, ensure_ascii=False, indent=2)
-    return md_output, json_output
-# ==========================================
-# 3. HÀM DỰ ĐOÁN CHÍNH
-# ==========================================
 def predict_pipeline(image_file):
-    if image_file is None:
-        return None, "", ""
-    # Đọc ảnh
     img = np.array(Image.open(image_file).convert('RGB'))
-    # OCR
-    result = ocr.ocr(img)
-    # 1. Vẽ Visualization
     vis_img = draw_results(img.copy(), result, FONT_PATH)
-    # 2. Format dữ liệu
     md_out, json_out = format_output(result)
     return vis_img, md_out, json_out
 # ==========================================
-# 4. GIAO DIỆN GRADIO (Custom CSS giống bản Demo)
 # ==========================================
-custom_css = """
-body, .gradio-container { font-family: "Noto Sans SC", sans-serif; }
-.gradio-container { max-width: 1200px !important; margin: auto; }
-.header-area { text-align: center; margin-bottom: 20px; }
-.header-area h1 { margin-bottom: 5px; color: #2d3748; }
-.notice { background: #f0f9ff; border: 1px solid #bae6fd; padding: 10px; border-radius: 8px; color: #0369a1; font-size: 14px; margin-bottom: 15px; }
-"""
-with gr.Blocks(title="PaddleOCR Pro Local", css=custom_css, theme=gr.themes.Soft()) as app:
-    with gr.Column(elem_classes="header-area"):
-        gr.Markdown("# 🇨🇳 PaddleOCR Professional (Local Version)")
-        gr.HTML("<div class='notice'>⚡ Phiên bản Server-Mode: Chạy offline với độ chính xác cao hơn bản Mobile mặc định.</div>")
     with gr.Row():
-        # Cột TRÁI: Input
         with gr.Column(scale=4):
             input_image = gr.Image(type="filepath", label="Tải ảnh lên", height=400)
-            submit_btn = gr.Button("🚀 CHẠY NHẬN DIỆN", variant="primary", size="lg")
-            gr.Markdown("### 💡 Ghi chú:")
-            gr.Markdown("- Model sẽ tự động tải phiên bản **Server (High Accuracy)** (~200MB) trong lần chạy đầu.")
-            gr.Markdown("- Hỗ trợ tốt cho tài liệu scan, hóa đơn và văn bản tiếng Trung.")
-        # Cột PHẢI: Output (Tabbed UI)
         with gr.Column(scale=6):
             with gr.Tabs():
-                with gr.TabItem("🖼️ Trực quan hóa (Visualization)"):
-                    output_image = gr.Image(type="pil", label="Kết quả")
-                with gr.TabItem("📝 Văn bản (Markdown)"):
-                    output_md = gr.Markdown(label="Nội dung trích xuất")
-                with gr.TabItem("📊 Dữ liệu thô (JSON)"):
-                    output_json = gr.Code(language="json", label="Chi tiết tọa độ & Confidence")
-    # Xử lý sự kiện
-    submit_btn.click(
-        fn=predict_pipeline,
-        inputs=[input_image],
-        outputs=[output_image, output_md, output_json]
-    )
 if __name__ == "__main__":
     app.launch(server_name="0.0.0.0", server_port=7860)

 from paddleocr import PaddleOCR
 # ==========================================
+# 1. CẤU HÌNH & TẢI MODEL
 # ==========================================
 os.environ["FLAGS_use_mkldnn"] = "0"
 os.environ["CPP_MIN_LOG_LEVEL"] = "3"
 def check_and_download_font():
     font_path = "./simfang.ttf"
     if not os.path.exists(font_path):
 FONT_PATH = check_and_download_font()
 def download_model_server(save_dir="./server_models"):
     urls = {
         "det": "https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_det_server_infer.tar",
         "rec": "https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_rec_server_infer.tar",
         "cls": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar"
     }
     paths = {}
     if not os.path.exists(save_dir): os.makedirs(save_dir)
         filename = url.split("/")[-1]
         extract_name = filename.replace('.tar', '')
         full_path = os.path.join(save_dir, extract_name)
         if not os.path.exists(full_path):
             print(f"Đang tải model {key.upper()} High-Accuracy...")
             tar_path = os.path.join(save_dir, filename)
+            try:
+                r = requests.get(url, stream=True)
+                with open(tar_path, 'wb') as f:
+                    for chunk in r.iter_content(chunk_size=1024):
+                        if chunk: f.write(chunk)
+                with tarfile.open(tar_path) as tar:
+                    tar.extractall(path=save_dir)
+                os.remove(tar_path)
+            except Exception as e:
+                print(f"Lỗi tải {filename}: {e}")
         paths[key] = full_path
     return paths
+print("Đang khởi tạo PaddleOCR...")
 try:
     models = download_model_server()
+    ocr = PaddleOCR(use_angle_cls=True, lang='ch',
+                   det_model_dir=models.get('det'),
+                   rec_model_dir=models.get('rec'),
+                   cls_model_dir=models.get('cls'),
+                   use_textline_orientation=True)
     print("Model Server đã sẵn sàng!")
+except:
+    print("Lỗi tải model server. Dùng Mobile model.")
     ocr = PaddleOCR(use_angle_cls=True, lang='ch')
 # ==========================================
+# 2. XỬ LÝ HÌNH ẢNH & KẾT QUẢ (ĐÃ FIX)
 # ==========================================
+def get_lines_from_result(result):
+    """Hàm phụ trợ để chuẩn hóa đầu ra của PaddleOCR"""
+    if not result: return []
+    # Nếu là list phẳng [Line1, Line2] (cấu trúc mới)
+    if isinstance(result[0], list) and len(result[0]) == 2 and \
+       isinstance(result[0][1], (tuple, list)) and \
+       isinstance(result[0][1][0], str):
+        return result
+    # Nếu là batch [[Line1, Line2]] (cấu trúc cũ)
+    return result[0]
 def draw_results(image, result, font_path):
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
     draw = ImageDraw.Draw(image)
     except:
         font = ImageFont.load_default()
+    lines = get_lines_from_result(result)
+    for line in lines:
+        try:
             box = np.array(line[0]).astype(np.int32)
             txt = line[1][0]
             conf = line[1][1]
             tuples = [tuple(p) for p in box]
             draw.polygon(tuples, outline="red", width=2)
             txt_pos = (box[0][0], box[0][1] - 25)
+            bbox = draw.textbbox(txt_pos, f"{txt}", font=font)
             draw.rectangle(bbox, fill="red")
             draw.text(txt_pos, txt, fill="white", font=font)
+        except: continue
     return image
 def format_output(result):
+    lines = get_lines_from_result(result)
+    if not lines: return "Không tìm thấy văn bản.", "[]"
     md_lines = []
     json_data = []
+    # Sort top-down
+    try: sorted_lines = sorted(lines, key=lambda x: x[0][0][1])
+    except: sorted_lines = lines
+    for item in sorted_lines:
+        try:
+            text = item[1][0]
+            conf = float(item[1][1])
+            box = item[0]
+            md_lines.append(f"- **{text}** ({conf:.1%})")
+            json_data.append({"text": text, "confidence": conf, "box": box})
+        except: continue
+    return "\n".join(md_lines), json.dumps(json_data, ensure_ascii=False, indent=2)
 def predict_pipeline(image_file):
+    if image_file is None: return None, "", ""
     img = np.array(Image.open(image_file).convert('RGB'))
+    # Gọi OCR (cls=True giúp nhận diện chiều văn bản tốt hơn)
+    result = ocr.ocr(img, cls=True)
     vis_img = draw_results(img.copy(), result, FONT_PATH)
     md_out, json_out = format_output(result)
     return vis_img, md_out, json_out
 # ==========================================
+# 3. GIAO DIỆN GRADIO
 # ==========================================
+custom_css = "body, .gradio-container { font-family: 'Noto Sans SC', sans-serif; }"
+with gr.Blocks(title="PaddleOCR Pro Fixed", css=custom_css, theme=gr.themes.Soft()) as app:
+    gr.Markdown("# 🇨🇳 PaddleOCR Pro (Server Mode - Fixed)")
     with gr.Row():
         with gr.Column(scale=4):
             input_image = gr.Image(type="filepath", label="Tải ảnh lên", height=400)
+            submit_btn = gr.Button("CHẠY NHẬN DIỆN", variant="primary")
         with gr.Column(scale=6):
             with gr.Tabs():
+                with gr.TabItem("Kết quả"):
+                    output_image = gr.Image(type="pil", label="Visualization")
+                with gr.TabItem("Markdown"):
+                    output_md = gr.Markdown()
+                with gr.TabItem("JSON"):
+                    output_json = gr.Code(language="json")
+    submit_btn.click(predict_pipeline, inputs=[input_image], outputs=[output_image, output_md, output_json])
 if __name__ == "__main__":
     app.launch(server_name="0.0.0.0", server_port=7860)