Spaces:

AnAdiga
/

GenAI_STEM_TAMIDS

Sleeping

GenAI_STEM_TAMIDS / backend /routes /regions_detect.py

Aniruddh

clean new branch

8608e55 6 months ago

3.31 kB

	"""
	region_detection.py

	This module provides functionality to detect text inside a specific region
	of an uploaded image using OpenCV and EasyOCR.

	Key functionalities:
	- Extract a specified rectangular region from an image.
	- Perform OCR (Optical Character Recognition) on the cropped region.
	- Return detected text boxes along with their coordinates.
	- Return the cropped region as a base64-encoded image.
	"""

	from fastapi import APIRouter, File, UploadFile, Form
	import cv2
	import numpy as np
	import easyocr
	import base64

	# Initialize FastAPI router
	router = APIRouter()

	# Initialize EasyOCR reader (English language)
	reader = easyocr.Reader(['en'])


	def detect_text_in_region(img, region):
	"""
	Detects text within a specified rectangular region of an image.

	Steps:
	1. Crop the region of interest (ROI) from the original image.
	2. Run EasyOCR to detect text inside the cropped region.
	3. Adjust bounding box coordinates relative to the original image.
	4. Convert the cropped region to base64 for return.

	Args:
	img (numpy.ndarray): The original OpenCV image.
	region (tuple): A tuple (x, y, w, h) specifying the top-left
	coordinates, width, and height of the region.

	Returns:
	tuple:
	- text_boxes (list of dict): Each dict contains:
	- id (int): Box index
	- x1, y1 (int): Top-left coordinates
	- x2, y2 (int): Bottom-right coordinates
	- text (str): Detected text
	- crop_base64 (str): Base64-encoded cropped image.
	"""
	x, y, w, h = region
	crop = img[y:y+h, x:x+w]
	results = reader.readtext(crop)
	text_boxes = []

	for i, (bbox, text, prob) in enumerate(results):
	(top_left, _, bottom_right, _) = bbox
	top_left = [int(top_left[0] + x), int(top_left[1] + y)]
	bottom_right = [int(bottom_right[0] + x), int(bottom_right[1] + y)]
	text_boxes.append({
	"id": i+1,
	"x1": top_left[0],
	"y1": top_left[1],
	"x2": bottom_right[0],
	"y2": bottom_right[1],
	"text": text
	})

	# Convert cropped region to base64 string
	_, buffer = cv2.imencode(".jpg", crop)
	crop_base64 = base64.b64encode(buffer).decode("utf-8")

	return text_boxes, crop_base64


	@router.post("/region-detect")
	async def detect_in_region(
	file: UploadFile = File(...),
	x: int = Form(...),
	y: int = Form(...),
	w: int = Form(...),
	h: int = Form(...)
	):
	"""
	FastAPI endpoint to detect text within a user-specified region of an uploaded image.

	Steps:
	1. Accepts an image file and region coordinates (x, y, w, h).
	2. Decodes the image into an OpenCV format.
	3. Calls `detect_text_in_region` to extract text and crop region.
	4. Returns:
	- Detected text boxes with coordinates and recognized text.
	- Cropped image region as a base64 string.


	"""
	image_bytes = await file.read()
	nparr = np.frombuffer(image_bytes, np.uint8)
	img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)

	detections, crop_base64 = detect_text_in_region(img, (x, y, w, h))
	return {
	"detections": detections,
	"cropped_image": f"data:image/jpeg;base64,{crop_base64}"
	}