Update app.py
Browse files
app.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from paddleocr import PaddleOCR
|
| 3 |
-
from PIL import Image
|
| 4 |
-
from typing import
|
| 5 |
-
|
| 6 |
|
|
|
|
| 7 |
ocr = PaddleOCR(
|
| 8 |
text_detection_model_name="PP-OCRv5_mobile_det",
|
| 9 |
text_recognition_model_name="PP-OCRv5_mobile_rec",
|
|
@@ -14,78 +14,56 @@ ocr = PaddleOCR(
|
|
| 14 |
|
| 15 |
def draw_ocr(image: Image.Image, text_regions: list, box_color="red", text_color="yellow") -> Image.Image:
|
| 16 |
draw = ImageDraw.Draw(image)
|
| 17 |
-
|
| 18 |
for region in text_regions:
|
| 19 |
bbox = region.get("bbox", {})
|
| 20 |
text = region.get("text", "")
|
| 21 |
confidence = region.get("confidence", 0)
|
| 22 |
-
|
| 23 |
if bbox:
|
| 24 |
x1, y1 = bbox["x1"], bbox["y1"]
|
| 25 |
x2, y2 = bbox["x2"], bbox["y2"]
|
| 26 |
-
|
| 27 |
draw.rectangle([x1, y1, x2, y2], outline=box_color, width=2)
|
| 28 |
draw.text((x1, max(y1 - 12, 0)), f"{text} ({confidence})", fill=text_color)
|
| 29 |
-
|
| 30 |
return image
|
| 31 |
|
| 32 |
-
|
| 33 |
def extract_json(result_json: Dict, include_bbox: bool = False) -> Dict:
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
rec_scores = ocr_data.get('rec_scores', [])
|
| 38 |
-
rec_boxes = ocr_data.get('rec_boxes', []) if include_bbox else []
|
| 39 |
|
| 40 |
clean_texts = []
|
| 41 |
text_regions = []
|
| 42 |
|
| 43 |
for i, (text, score) in enumerate(zip(rec_texts, rec_scores)):
|
| 44 |
-
if text and text.strip() and score > 0.3:
|
| 45 |
clean_text = text.strip()
|
| 46 |
clean_texts.append(clean_text)
|
| 47 |
-
|
| 48 |
if include_bbox and i < len(rec_boxes):
|
| 49 |
box = rec_boxes[i]
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
"height": int(y2 - y1)
|
| 61 |
-
},
|
| 62 |
-
"confidence": round(float(score), 3)
|
| 63 |
-
})
|
| 64 |
-
|
| 65 |
-
# Create output
|
| 66 |
-
ouput = {
|
| 67 |
"extracted_text": " ".join(clean_texts),
|
| 68 |
"text_count": len(clean_texts),
|
| 69 |
"avg_confidence": round(sum(rec_scores) / len(rec_scores), 2) if rec_scores else 0
|
| 70 |
}
|
| 71 |
-
|
| 72 |
-
# Add bounding boxes if requested
|
| 73 |
if include_bbox:
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
return ouput
|
| 77 |
-
|
| 78 |
|
|
|
|
| 79 |
|
| 80 |
-
def inference(img, lang):
|
| 81 |
-
result = ocr.predict(img)
|
| 82 |
-
res = result[0]
|
| 83 |
-
|
| 84 |
-
data = extract_json(res, true)
|
| 85 |
image = Image.open(img).convert("RGB")
|
|
|
|
|
|
|
| 86 |
im_show = draw_ocr(image, data.get("text_regions", []))
|
| 87 |
-
|
| 88 |
-
return im_show, data.get('extracted_text')
|
| 89 |
|
| 90 |
# ===== Giao diện Gradio =====
|
| 91 |
title = "PaddleOCR Multi-language (v3.x)"
|
|
@@ -96,18 +74,12 @@ PaddleOCR 3.x hỗ trợ Chinese, Japanese, Korean.
|
|
| 96 |
- Xuất ảnh kết quả và văn bản nhận diện.
|
| 97 |
"""
|
| 98 |
|
| 99 |
-
|
| 100 |
demo = gr.Interface(
|
| 101 |
fn=inference,
|
| 102 |
-
inputs=[
|
| 103 |
-
|
| 104 |
-
],
|
| 105 |
-
outputs=[
|
| 106 |
-
gr.Image(type="pil", label="Ảnh kết quả"),
|
| 107 |
-
gr.Textbox(label="Văn bản nhận diện")
|
| 108 |
-
],
|
| 109 |
title=title,
|
| 110 |
-
description=description
|
| 111 |
)
|
| 112 |
|
| 113 |
if __name__ == "__main__":
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from paddleocr import PaddleOCR
|
| 3 |
+
from PIL import Image, ImageDraw
|
| 4 |
+
from typing import Dict
|
|
|
|
| 5 |
|
| 6 |
+
# Khởi tạo OCR
|
| 7 |
ocr = PaddleOCR(
|
| 8 |
text_detection_model_name="PP-OCRv5_mobile_det",
|
| 9 |
text_recognition_model_name="PP-OCRv5_mobile_rec",
|
|
|
|
| 14 |
|
| 15 |
def draw_ocr(image: Image.Image, text_regions: list, box_color="red", text_color="yellow") -> Image.Image:
|
| 16 |
draw = ImageDraw.Draw(image)
|
|
|
|
| 17 |
for region in text_regions:
|
| 18 |
bbox = region.get("bbox", {})
|
| 19 |
text = region.get("text", "")
|
| 20 |
confidence = region.get("confidence", 0)
|
|
|
|
| 21 |
if bbox:
|
| 22 |
x1, y1 = bbox["x1"], bbox["y1"]
|
| 23 |
x2, y2 = bbox["x2"], bbox["y2"]
|
|
|
|
| 24 |
draw.rectangle([x1, y1, x2, y2], outline=box_color, width=2)
|
| 25 |
draw.text((x1, max(y1 - 12, 0)), f"{text} ({confidence})", fill=text_color)
|
|
|
|
| 26 |
return image
|
| 27 |
|
|
|
|
| 28 |
def extract_json(result_json: Dict, include_bbox: bool = False) -> Dict:
|
| 29 |
+
rec_texts = [line[1][0] for line in result_json]
|
| 30 |
+
rec_scores = [line[1][1] for line in result_json]
|
| 31 |
+
rec_boxes = [line[0] for line in result_json] if include_bbox else []
|
|
|
|
|
|
|
| 32 |
|
| 33 |
clean_texts = []
|
| 34 |
text_regions = []
|
| 35 |
|
| 36 |
for i, (text, score) in enumerate(zip(rec_texts, rec_scores)):
|
| 37 |
+
if text and text.strip() and score > 0.3:
|
| 38 |
clean_text = text.strip()
|
| 39 |
clean_texts.append(clean_text)
|
|
|
|
| 40 |
if include_bbox and i < len(rec_boxes):
|
| 41 |
box = rec_boxes[i]
|
| 42 |
+
x1, y1 = map(int, box[0])
|
| 43 |
+
x2, y2 = map(int, box[2])
|
| 44 |
+
text_regions.append({
|
| 45 |
+
"text": clean_text,
|
| 46 |
+
"bbox": {"x1": x1, "y1": y1, "x2": x2, "y2": y2,
|
| 47 |
+
"width": x2 - x1, "height": y2 - y1},
|
| 48 |
+
"confidence": round(float(score), 3)
|
| 49 |
+
})
|
| 50 |
+
|
| 51 |
+
output = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
"extracted_text": " ".join(clean_texts),
|
| 53 |
"text_count": len(clean_texts),
|
| 54 |
"avg_confidence": round(sum(rec_scores) / len(rec_scores), 2) if rec_scores else 0
|
| 55 |
}
|
|
|
|
|
|
|
| 56 |
if include_bbox:
|
| 57 |
+
output["text_regions"] = text_regions
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
+
return output
|
| 60 |
|
| 61 |
+
def inference(img, lang=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
image = Image.open(img).convert("RGB")
|
| 63 |
+
result = ocr.ocr(image) # PaddleOCR 3.x
|
| 64 |
+
data = extract_json(result, include_bbox=True)
|
| 65 |
im_show = draw_ocr(image, data.get("text_regions", []))
|
| 66 |
+
return im_show, data.get("extracted_text")
|
|
|
|
| 67 |
|
| 68 |
# ===== Giao diện Gradio =====
|
| 69 |
title = "PaddleOCR Multi-language (v3.x)"
|
|
|
|
| 74 |
- Xuất ảnh kết quả và văn bản nhận diện.
|
| 75 |
"""
|
| 76 |
|
|
|
|
| 77 |
demo = gr.Interface(
|
| 78 |
fn=inference,
|
| 79 |
+
inputs=[gr.Image(type="filepath", label="Upload ảnh")],
|
| 80 |
+
outputs=[gr.Image(type="pil", label="Ảnh kết quả"), gr.Textbox(label="Văn bản nhận diện")],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
title=title,
|
| 82 |
+
description=description
|
| 83 |
)
|
| 84 |
|
| 85 |
if __name__ == "__main__":
|