| import gradio as gr |
| from paddleocr import PaddleOCR |
| from PIL import Image, ImageDraw |
| from typing import Dict |
|
|
| |
| ocr = PaddleOCR( |
| text_detection_model_name="PP-OCRv5_mobile_det", |
| text_recognition_model_name="PP-OCRv5_mobile_rec", |
| use_doc_orientation_classify=False, |
| use_doc_unwarping=False, |
| use_textline_orientation=True |
| ) |
|
|
| def replace_text(img: Image.Image, regions: list, new_text="NEW", |
| text_color="white", box_color=None, font=None) -> Image.Image: |
| draw = ImageDraw.Draw(img) |
|
|
| for region in regions: |
| bbox = region.get("bbox", {}) |
| if not bbox: |
| continue |
| x1, y1, x2, y2 = bbox["x1"], bbox["y1"], bbox["x2"], bbox["y2"] |
|
|
| |
| draw.rectangle([x1, y1, x2, y2], fill=(0, 0, 0)) |
|
|
| |
| draw.text((x1, y1), new_text, fill=text_color, font=font) |
|
|
| |
| if box_color: |
| draw.rectangle([x1, y1, x2, y2], outline=box_color, width=2) |
|
|
| return img |
|
|
| def draw_ocr(image: Image.Image, text_regions: list, box_color="red", text_color="yellow") -> Image.Image: |
| draw = ImageDraw.Draw(image) |
| for region in text_regions: |
| bbox = region.get("bbox", {}) |
| text = region.get("text", "") |
| confidence = region.get("confidence", 0) |
| if bbox: |
| x1, y1 = bbox["x1"], bbox["y1"] |
| x2, y2 = bbox["x2"], bbox["y2"] |
| draw.rectangle([x1, y1, x2, y2], outline=box_color, width=2) |
| |
| return image |
|
|
| def extract_json(result_json: Dict, include_bbox: bool = False) -> Dict: |
| data = result_json.get("res", {}) |
| texts = data.get("rec_texts", []) |
| scores = data.get("rec_scores", []) |
| boxes = data.get("rec_boxes", []) if include_bbox else [] |
| clean_texts = [t.strip() for t, s in zip(texts, scores) if t and t.strip() and s > 0.3] |
| text_regions = [ |
| { |
| "text": t, |
| "bbox": { |
| "x1": int(b[0]), |
| "y1": int(b[1]), |
| "x2": int(b[2]), |
| "y2": int(b[3]), |
| "width": int(b[2] - b[0]), |
| "height": int(b[3] - b[1]) |
| }, |
| "confidence": round(float(s), 3) |
| } |
| for i, (t, s) in enumerate(zip(texts, scores)) |
| if t and t.strip() and s > 0.3 |
| for b in ([boxes[i]] if include_bbox and i < len(boxes) and len(boxes[i]) >= 4 else [None]) |
| if b is not None or not include_bbox |
| ] |
|
|
| return { |
| "extracted_text": "\n".join(clean_texts), |
| "text_count": len(clean_texts), |
| "avg_confidence": round(sum(scores) / len(scores), 2) if scores else 0, |
| **({"text_regions": text_regions} if include_bbox else {}) |
| } |
|
|
| def inference(img, lang=None): |
| res = ocr.predict(img) |
| r = res[0] |
| data = extract_json(r.json, include_bbox=True) |
| pil_img = Image.open(img).convert("RGB") |
| im_show = draw_ocr(pil_img, data.get("text_regions", [])) |
|
|
| img_w, img_h = pil_img.size |
|
|
| return im_show, data.get("extracted_text") |
|
|
| |
| title = "OCR" |
| description = """ |
| Support Chinese, Japanese, Korean. |
| """ |
|
|
| demo = gr.Interface( |
| fn=inference, |
| inputs=[gr.Image(type="filepath", label="Upload ảnh")], |
| outputs=[gr.Image(type="pil", label="Output"), gr.Textbox(label="Text")], |
| title=title, |
| description=description |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|