ocr / app.py
minhvh's picture
Update app.py
c3a69eb verified
import gradio as gr
from paddleocr import PaddleOCR
from PIL import Image, ImageDraw
from typing import Dict
# Khởi tạo OCR
ocr = PaddleOCR(
text_detection_model_name="PP-OCRv5_mobile_det",
text_recognition_model_name="PP-OCRv5_mobile_rec",
use_doc_orientation_classify=False,
use_doc_unwarping=False,
use_textline_orientation=True
)
def replace_text(img: Image.Image, regions: list, new_text="NEW",
text_color="white", box_color=None, font=None) -> Image.Image:
draw = ImageDraw.Draw(img)
for region in regions:
bbox = region.get("bbox", {})
if not bbox:
continue
x1, y1, x2, y2 = bbox["x1"], bbox["y1"], bbox["x2"], bbox["y2"]
# add background
draw.rectangle([x1, y1, x2, y2], fill=(0, 0, 0))
# add new text
draw.text((x1, y1), new_text, fill=text_color, font=font)
# Tuỳ chọn khoanh vùng
if box_color:
draw.rectangle([x1, y1, x2, y2], outline=box_color, width=2)
return img
def draw_ocr(image: Image.Image, text_regions: list, box_color="red", text_color="yellow") -> Image.Image:
draw = ImageDraw.Draw(image)
for region in text_regions:
bbox = region.get("bbox", {})
text = region.get("text", "")
confidence = region.get("confidence", 0)
if bbox:
x1, y1 = bbox["x1"], bbox["y1"]
x2, y2 = bbox["x2"], bbox["y2"]
draw.rectangle([x1, y1, x2, y2], outline=box_color, width=2)
# draw.text((x1, max(y1 - 12, 0)), f"{text} ({confidence})", fill=text_color)
return image
def extract_json(result_json: Dict, include_bbox: bool = False) -> Dict:
data = result_json.get("res", {})
texts = data.get("rec_texts", [])
scores = data.get("rec_scores", [])
boxes = data.get("rec_boxes", []) if include_bbox else []
clean_texts = [t.strip() for t, s in zip(texts, scores) if t and t.strip() and s > 0.3]
text_regions = [
{
"text": t,
"bbox": {
"x1": int(b[0]),
"y1": int(b[1]),
"x2": int(b[2]),
"y2": int(b[3]),
"width": int(b[2] - b[0]),
"height": int(b[3] - b[1])
},
"confidence": round(float(s), 3)
}
for i, (t, s) in enumerate(zip(texts, scores))
if t and t.strip() and s > 0.3
for b in ([boxes[i]] if include_bbox and i < len(boxes) and len(boxes[i]) >= 4 else [None])
if b is not None or not include_bbox
]
return {
"extracted_text": "\n".join(clean_texts),
"text_count": len(clean_texts),
"avg_confidence": round(sum(scores) / len(scores), 2) if scores else 0,
**({"text_regions": text_regions} if include_bbox else {})
}
def inference(img, lang=None):
res = ocr.predict(img)
r = res[0]
data = extract_json(r.json, include_bbox=True)
pil_img = Image.open(img).convert("RGB")
im_show = draw_ocr(pil_img, data.get("text_regions", []))
img_w, img_h = pil_img.size
return im_show, data.get("extracted_text")
# ===== Giao diện Gradio =====
title = "OCR"
description = """
Support Chinese, Japanese, Korean.
"""
demo = gr.Interface(
fn=inference,
inputs=[gr.Image(type="filepath", label="Upload ảnh")],
outputs=[gr.Image(type="pil", label="Output"), gr.Textbox(label="Text")],
title=title,
description=description
)
if __name__ == "__main__":
demo.launch()