Spaces:

minhvh
/

ocr

Sleeping

File size: 3,523 Bytes

import gradio as gr
from paddleocr import PaddleOCR
from PIL import Image, ImageDraw
from typing import Dict

# Khởi tạo OCR
ocr = PaddleOCR(
    text_detection_model_name="PP-OCRv5_mobile_det",
    text_recognition_model_name="PP-OCRv5_mobile_rec",
    use_doc_orientation_classify=False,
    use_doc_unwarping=False,
    use_textline_orientation=True
)

def replace_text(img: Image.Image, regions: list, new_text="NEW",
                 text_color="white", box_color=None, font=None) -> Image.Image:
    draw = ImageDraw.Draw(img)

    for region in regions:
        bbox = region.get("bbox", {})
        if not bbox:
            continue
        x1, y1, x2, y2 = bbox["x1"], bbox["y1"], bbox["x2"], bbox["y2"]

        # add background
        draw.rectangle([x1, y1, x2, y2], fill=(0, 0, 0))

        # add new text
        draw.text((x1, y1), new_text, fill=text_color, font=font)

        # Tuỳ chọn khoanh vùng
        if box_color:
            draw.rectangle([x1, y1, x2, y2], outline=box_color, width=2)

    return img

def draw_ocr(image: Image.Image, text_regions: list, box_color="red", text_color="yellow") -> Image.Image:
    draw = ImageDraw.Draw(image)
    for region in text_regions:
        bbox = region.get("bbox", {})
        text = region.get("text", "")
        confidence = region.get("confidence", 0)
        if bbox:
            x1, y1 = bbox["x1"], bbox["y1"]
            x2, y2 = bbox["x2"], bbox["y2"]
            draw.rectangle([x1, y1, x2, y2], outline=box_color, width=2)
            # draw.text((x1, max(y1 - 12, 0)), f"{text} ({confidence})", fill=text_color)
    return image

def extract_json(result_json: Dict, include_bbox: bool = False) -> Dict:
    data = result_json.get("res", {})
    texts = data.get("rec_texts", [])
    scores = data.get("rec_scores", [])
    boxes = data.get("rec_boxes", []) if include_bbox else []
    clean_texts = [t.strip() for t, s in zip(texts, scores) if t and t.strip() and s > 0.3]
    text_regions = [
        {
            "text": t,
            "bbox": {
                "x1": int(b[0]),
                "y1": int(b[1]),
                "x2": int(b[2]),
                "y2": int(b[3]),
                "width": int(b[2] - b[0]),
                "height": int(b[3] - b[1])
            },
            "confidence": round(float(s), 3)
        }
        for i, (t, s) in enumerate(zip(texts, scores))
        if t and t.strip() and s > 0.3
        for b in ([boxes[i]] if include_bbox and i < len(boxes) and len(boxes[i]) >= 4 else [None])
        if b is not None or not include_bbox
    ]

    return {
        "extracted_text": "\n".join(clean_texts),
        "text_count": len(clean_texts),
        "avg_confidence": round(sum(scores) / len(scores), 2) if scores else 0,
        **({"text_regions": text_regions} if include_bbox else {})
    }

def inference(img, lang=None):
    res = ocr.predict(img)
    r = res[0]
    data = extract_json(r.json, include_bbox=True)
    pil_img = Image.open(img).convert("RGB")
    im_show = draw_ocr(pil_img, data.get("text_regions", []))

    img_w, img_h = pil_img.size

    return im_show, data.get("extracted_text")

# ===== Giao diện Gradio =====
title = "OCR"
description = """
Support Chinese, Japanese, Korean.
"""

demo = gr.Interface(
    fn=inference,
    inputs=[gr.Image(type="filepath", label="Upload ảnh")],
    outputs=[gr.Image(type="pil", label="Output"), gr.Textbox(label="Text")],
    title=title,
    description=description
)


if __name__ == "__main__":
    demo.launch()