Harry Pham
init project
ea9cf0f
# app.py ← đặt ở root project
import gradio as gr
import json, shutil, tempfile
from pathlib import Path
from PIL import Image
import cv2, numpy as np
from src.inference import run_pipelineas
# Auto-download weights nếu chưa có
import os
if not os.path.exists("best.pt"):
from huggingface_hub import hf_hub_download
hf_hub_download(
repo_id="phamha/drawing-model-weights",
filename="best.pt",
local_dir="."
)
CHECKPOINT = "best.pt"
# ── Hàm xử lý cho Gradio ─────────────────────────────────
def process(image: Image.Image):
if image is None:
return None, "{}", "Chưa có ảnh."
# Lưu ảnh PIL tạm
tmp_dir = tempfile.mkdtemp()
tmp_path = f"{tmp_dir}/input.jpg"
image.save(tmp_path, quality=95)
try:
result, vis_path = run_pipeline(
image_path=tmp_path,
output_dir=tmp_dir,
checkpoint=CHECKPOINT,
conf=0.3,
)
except Exception as e:
return None, "{}", f"Lỗi: {e}"
# ── Ảnh visualize ─────────────────────────────────────
vis_bgr = cv2.imread(vis_path)
vis_rgb = cv2.cvtColor(vis_bgr, cv2.COLOR_BGR2RGB)
# ── JSON (bỏ crop_path để gọn) ───────────────────────
clean = {"image": result["image"], "objects": []}
for obj in result["objects"]:
clean["objects"].append({
"id": obj["id"],
"class": obj["class"],
"confidence": obj["confidence"],
"bbox": obj["bbox"],
"ocr_content": obj["ocr_content"],
})
json_str = json.dumps(clean, ensure_ascii=False, indent=2)
# ── OCR text đẹp ──────────────────────────────────────
ocr_parts = []
for obj in result["objects"]:
if not obj["ocr_content"]:
continue
content = obj["ocr_content"]
if isinstance(content, dict): # Table
content = content.get("text", "")
sep = "─" * 44
ocr_parts.append(
f"{sep}\n"
f"[{obj['class']} #{obj['id']}] "
f"conf={obj['confidence']}\n"
f"{sep}\n{content}"
)
ocr_text = "\n\n".join(ocr_parts) if ocr_parts else "Không có vùng Note/Table."
return vis_rgb, json_str, ocr_text
# ── Giao diện Gradio ──────────────────────────────────────
with gr.Blocks(title="Engineering Drawing Analyzer") as demo:
gr.Markdown("""
# 🔧 Engineering Drawing Analyzer
Tự động phát hiện và trích xuất **PartDrawing · Note · Table** từ bản vẽ kỹ thuật.
""")
with gr.Row():
with gr.Column(scale=1):
inp_image = gr.Image(
type="pil",
label="Upload bản vẽ kỹ thuật",
)
btn = gr.Button("🔍 Detect & OCR", variant="primary")
with gr.Column(scale=1):
out_image = gr.Image(label="Kết quả detection")
with gr.Row():
with gr.Column(scale=1):
out_json = gr.Code(
language="json",
label="JSON output",
lines=20,
)
with gr.Column(scale=1):
out_ocr = gr.Textbox(
label="OCR content (Note & Table)",
lines=20,
max_lines=40,
)
btn.click(
fn=process,
inputs=[inp_image],
outputs=[out_image, out_json, out_ocr],
)
gr.Examples(
examples=[["test.jpg"]], # thêm ảnh mẫu nếu có
inputs=[inp_image],
)
if __name__ == "__main__":
demo.launch(
share=True, # tạo link public tạm thời
server_port=7860,
)