Spaces:

credent007
/

easyocr-phi3

Paused

App Files Files Community

easyocr-phi3 / ocr_utils.py

credent007

Upload 5 files

24d4193 verified about 1 month ago

raw

history blame contribute delete

1.99 kB

	import easyocr
	import asyncio
	import numpy as np

	# Initialize reader once at module level
	reader = easyocr.Reader(['hi', 'en'], gpu=False)
	print('instance of reader ocr is created ')
	def process_ocr_output(results):
	"""
	Converts raw EasyOCR list into a list of dictionaries.
	"""
	print('andara ayaa ')
	invoice_data = []
	for bbox, text, conf in results:
	# bbox comes as [[x,y], [x,y], [x,y], [x,y]]
	# We convert to list for JSON serializability
	invoice_data.append(str({
	"bbox": [[int(pt[0]) , int(pt[1])] for pt in bbox],
	"text": text,
	"confidence": float(conf)
	}))
	print('yaah pr')
	return invoice_data

	async def ocr_image(image: np.ndarray):
	"""
	Runs OCR in a thread pool to avoid blocking the FastAPI event loop.
	"""
	loop = asyncio.get_event_loop()
	# EasyOCR's readtext is CPU bound, so we run in executor
	results = await loop.run_in_executor(None, reader.readtext, image)

	results=process_ocr_output(results)
	print(results)
	return results

	async def process_pdf_page(page):
	"""
	Converts PDF page to image and processes OCR.
	"""
	pix = page.get_pixmap()
	# Convert PyMuPDF pixmap to numpy array
	img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)

	if pix.n == 4: # Convert RGBA to RGB
	img = img[:, :, :3]

	# Get raw results
	raw_results = await ocr_image(img)

	# 1. Create the clean string for the LLM
	full_text = " ".join([res[1] for res in raw_results])

	# 2. Create the detailed JSON structure for the response
	structured_ocr = process_ocr_output(raw_results)

	# Optional: If you want to call LLM here
	# llm_result = await call_llm(full_text)

	return {
	"page_number": page.number + 1,
	"ocr_details": structured_ocr,
	"raw_text": full_text,
	"llm_analysis": "llm_result_placeholder"
	}