File size: 3,523 Bytes
05e0c4b
1b20983
6855d5b
 
6a009c3
6855d5b
1dd1f8c
 
 
 
 
 
 
 
63089ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1dd1f8c
 
 
 
 
 
 
 
 
 
c3a69eb
1dd1f8c
 
 
63089ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1dd1f8c
63089ba
 
1dd1f8c
 
6855d5b
63089ba
 
 
 
 
 
 
 
6855d5b
6a009c3
 
63089ba
6a009c3
63089ba
6a009c3
 
27397d7
6a009c3
6855d5b
63089ba
6a009c3
6855d5b
27397d7
05e0c4b
63089ba
2b207f2
7913abc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import gradio as gr
from paddleocr import PaddleOCR
from PIL import Image, ImageDraw
from typing import Dict

# Khởi tạo OCR
ocr = PaddleOCR(
    text_detection_model_name="PP-OCRv5_mobile_det",
    text_recognition_model_name="PP-OCRv5_mobile_rec",
    use_doc_orientation_classify=False,
    use_doc_unwarping=False,
    use_textline_orientation=True
)

def replace_text(img: Image.Image, regions: list, new_text="NEW",
                 text_color="white", box_color=None, font=None) -> Image.Image:
    draw = ImageDraw.Draw(img)

    for region in regions:
        bbox = region.get("bbox", {})
        if not bbox:
            continue
        x1, y1, x2, y2 = bbox["x1"], bbox["y1"], bbox["x2"], bbox["y2"]

        # add background
        draw.rectangle([x1, y1, x2, y2], fill=(0, 0, 0))

        # add new text
        draw.text((x1, y1), new_text, fill=text_color, font=font)

        # Tuỳ chọn khoanh vùng
        if box_color:
            draw.rectangle([x1, y1, x2, y2], outline=box_color, width=2)

    return img

def draw_ocr(image: Image.Image, text_regions: list, box_color="red", text_color="yellow") -> Image.Image:
    draw = ImageDraw.Draw(image)
    for region in text_regions:
        bbox = region.get("bbox", {})
        text = region.get("text", "")
        confidence = region.get("confidence", 0)
        if bbox:
            x1, y1 = bbox["x1"], bbox["y1"]
            x2, y2 = bbox["x2"], bbox["y2"]
            draw.rectangle([x1, y1, x2, y2], outline=box_color, width=2)
            # draw.text((x1, max(y1 - 12, 0)), f"{text} ({confidence})", fill=text_color)
    return image

def extract_json(result_json: Dict, include_bbox: bool = False) -> Dict:
    data = result_json.get("res", {})
    texts = data.get("rec_texts", [])
    scores = data.get("rec_scores", [])
    boxes = data.get("rec_boxes", []) if include_bbox else []
    clean_texts = [t.strip() for t, s in zip(texts, scores) if t and t.strip() and s > 0.3]
    text_regions = [
        {
            "text": t,
            "bbox": {
                "x1": int(b[0]),
                "y1": int(b[1]),
                "x2": int(b[2]),
                "y2": int(b[3]),
                "width": int(b[2] - b[0]),
                "height": int(b[3] - b[1])
            },
            "confidence": round(float(s), 3)
        }
        for i, (t, s) in enumerate(zip(texts, scores))
        if t and t.strip() and s > 0.3
        for b in ([boxes[i]] if include_bbox and i < len(boxes) and len(boxes[i]) >= 4 else [None])
        if b is not None or not include_bbox
    ]

    return {
        "extracted_text": "\n".join(clean_texts),
        "text_count": len(clean_texts),
        "avg_confidence": round(sum(scores) / len(scores), 2) if scores else 0,
        **({"text_regions": text_regions} if include_bbox else {})
    }

def inference(img, lang=None):
    res = ocr.predict(img)
    r = res[0]
    data = extract_json(r.json, include_bbox=True)
    pil_img = Image.open(img).convert("RGB")
    im_show = draw_ocr(pil_img, data.get("text_regions", []))

    img_w, img_h = pil_img.size

    return im_show, data.get("extracted_text")

# ===== Giao diện Gradio =====
title = "OCR"
description = """
Support Chinese, Japanese, Korean.
"""

demo = gr.Interface(
    fn=inference,
    inputs=[gr.Image(type="filepath", label="Upload ảnh")],
    outputs=[gr.Image(type="pil", label="Output"), gr.Textbox(label="Text")],
    title=title,
    description=description
)


if __name__ == "__main__":
    demo.launch()