File size: 3,523 Bytes
05e0c4b 1b20983 6855d5b 6a009c3 6855d5b 1dd1f8c 63089ba 1dd1f8c c3a69eb 1dd1f8c 63089ba 1dd1f8c 63089ba 1dd1f8c 6855d5b 63089ba 6855d5b 6a009c3 63089ba 6a009c3 63089ba 6a009c3 27397d7 6a009c3 6855d5b 63089ba 6a009c3 6855d5b 27397d7 05e0c4b 63089ba 2b207f2 7913abc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | import gradio as gr
from paddleocr import PaddleOCR
from PIL import Image, ImageDraw
from typing import Dict
# Khởi tạo OCR
ocr = PaddleOCR(
text_detection_model_name="PP-OCRv5_mobile_det",
text_recognition_model_name="PP-OCRv5_mobile_rec",
use_doc_orientation_classify=False,
use_doc_unwarping=False,
use_textline_orientation=True
)
def replace_text(img: Image.Image, regions: list, new_text="NEW",
text_color="white", box_color=None, font=None) -> Image.Image:
draw = ImageDraw.Draw(img)
for region in regions:
bbox = region.get("bbox", {})
if not bbox:
continue
x1, y1, x2, y2 = bbox["x1"], bbox["y1"], bbox["x2"], bbox["y2"]
# add background
draw.rectangle([x1, y1, x2, y2], fill=(0, 0, 0))
# add new text
draw.text((x1, y1), new_text, fill=text_color, font=font)
# Tuỳ chọn khoanh vùng
if box_color:
draw.rectangle([x1, y1, x2, y2], outline=box_color, width=2)
return img
def draw_ocr(image: Image.Image, text_regions: list, box_color="red", text_color="yellow") -> Image.Image:
draw = ImageDraw.Draw(image)
for region in text_regions:
bbox = region.get("bbox", {})
text = region.get("text", "")
confidence = region.get("confidence", 0)
if bbox:
x1, y1 = bbox["x1"], bbox["y1"]
x2, y2 = bbox["x2"], bbox["y2"]
draw.rectangle([x1, y1, x2, y2], outline=box_color, width=2)
# draw.text((x1, max(y1 - 12, 0)), f"{text} ({confidence})", fill=text_color)
return image
def extract_json(result_json: Dict, include_bbox: bool = False) -> Dict:
data = result_json.get("res", {})
texts = data.get("rec_texts", [])
scores = data.get("rec_scores", [])
boxes = data.get("rec_boxes", []) if include_bbox else []
clean_texts = [t.strip() for t, s in zip(texts, scores) if t and t.strip() and s > 0.3]
text_regions = [
{
"text": t,
"bbox": {
"x1": int(b[0]),
"y1": int(b[1]),
"x2": int(b[2]),
"y2": int(b[3]),
"width": int(b[2] - b[0]),
"height": int(b[3] - b[1])
},
"confidence": round(float(s), 3)
}
for i, (t, s) in enumerate(zip(texts, scores))
if t and t.strip() and s > 0.3
for b in ([boxes[i]] if include_bbox and i < len(boxes) and len(boxes[i]) >= 4 else [None])
if b is not None or not include_bbox
]
return {
"extracted_text": "\n".join(clean_texts),
"text_count": len(clean_texts),
"avg_confidence": round(sum(scores) / len(scores), 2) if scores else 0,
**({"text_regions": text_regions} if include_bbox else {})
}
def inference(img, lang=None):
res = ocr.predict(img)
r = res[0]
data = extract_json(r.json, include_bbox=True)
pil_img = Image.open(img).convert("RGB")
im_show = draw_ocr(pil_img, data.get("text_regions", []))
img_w, img_h = pil_img.size
return im_show, data.get("extracted_text")
# ===== Giao diện Gradio =====
title = "OCR"
description = """
Support Chinese, Japanese, Korean.
"""
demo = gr.Interface(
fn=inference,
inputs=[gr.Image(type="filepath", label="Upload ảnh")],
outputs=[gr.Image(type="pil", label="Output"), gr.Textbox(label="Text")],
title=title,
description=description
)
if __name__ == "__main__":
demo.launch()
|