Spaces:

minhvh
/

ocr

Sleeping

App Files Files Community

ocr / app.py

minhvh

Update app.py

c3a69eb verified 9 months ago

raw

history blame contribute delete

3.52 kB

	import gradio as gr
	from paddleocr import PaddleOCR
	from PIL import Image, ImageDraw
	from typing import Dict

	# Khởi tạo OCR
	ocr = PaddleOCR(
	text_detection_model_name="PP-OCRv5_mobile_det",
	text_recognition_model_name="PP-OCRv5_mobile_rec",
	use_doc_orientation_classify=False,
	use_doc_unwarping=False,
	use_textline_orientation=True
	)

	def replace_text(img: Image.Image, regions: list, new_text="NEW",
	text_color="white", box_color=None, font=None) -> Image.Image:
	draw = ImageDraw.Draw(img)

	for region in regions:
	bbox = region.get("bbox", {})
	if not bbox:
	continue
	x1, y1, x2, y2 = bbox["x1"], bbox["y1"], bbox["x2"], bbox["y2"]

	# add background
	draw.rectangle([x1, y1, x2, y2], fill=(0, 0, 0))

	# add new text
	draw.text((x1, y1), new_text, fill=text_color, font=font)

	# Tuỳ chọn khoanh vùng
	if box_color:
	draw.rectangle([x1, y1, x2, y2], outline=box_color, width=2)

	return img

	def draw_ocr(image: Image.Image, text_regions: list, box_color="red", text_color="yellow") -> Image.Image:
	draw = ImageDraw.Draw(image)
	for region in text_regions:
	bbox = region.get("bbox", {})
	text = region.get("text", "")
	confidence = region.get("confidence", 0)
	if bbox:
	x1, y1 = bbox["x1"], bbox["y1"]
	x2, y2 = bbox["x2"], bbox["y2"]
	draw.rectangle([x1, y1, x2, y2], outline=box_color, width=2)
	# draw.text((x1, max(y1 - 12, 0)), f"{text} ({confidence})", fill=text_color)
	return image

	def extract_json(result_json: Dict, include_bbox: bool = False) -> Dict:
	data = result_json.get("res", {})
	texts = data.get("rec_texts", [])
	scores = data.get("rec_scores", [])
	boxes = data.get("rec_boxes", []) if include_bbox else []
	clean_texts = [t.strip() for t, s in zip(texts, scores) if t and t.strip() and s > 0.3]
	text_regions = [
	{
	"text": t,
	"bbox": {
	"x1": int(b[0]),
	"y1": int(b[1]),
	"x2": int(b[2]),
	"y2": int(b[3]),
	"width": int(b[2] - b[0]),
	"height": int(b[3] - b[1])
	},
	"confidence": round(float(s), 3)
	}
	for i, (t, s) in enumerate(zip(texts, scores))
	if t and t.strip() and s > 0.3
	for b in ([boxes[i]] if include_bbox and i < len(boxes) and len(boxes[i]) >= 4 else [None])
	if b is not None or not include_bbox
	]

	return {
	"extracted_text": "\n".join(clean_texts),
	"text_count": len(clean_texts),
	"avg_confidence": round(sum(scores) / len(scores), 2) if scores else 0,
	**({"text_regions": text_regions} if include_bbox else {})
	}

	def inference(img, lang=None):
	res = ocr.predict(img)
	r = res[0]
	data = extract_json(r.json, include_bbox=True)
	pil_img = Image.open(img).convert("RGB")
	im_show = draw_ocr(pil_img, data.get("text_regions", []))

	img_w, img_h = pil_img.size

	return im_show, data.get("extracted_text")

	# ===== Giao diện Gradio =====
	title = "OCR"
	description = """
	Support Chinese, Japanese, Korean.
	"""

	demo = gr.Interface(
	fn=inference,
	inputs=[gr.Image(type="filepath", label="Upload ảnh")],
	outputs=[gr.Image(type="pil", label="Output"), gr.Textbox(label="Text")],
	title=title,
	description=description
	)


	if __name__ == "__main__":
	demo.launch()