easyocr-phi3 / ocr_utils.py
credent007's picture
Upload 5 files
24d4193 verified
import easyocr
import asyncio
import numpy as np
# Initialize reader once at module level
reader = easyocr.Reader(['hi', 'en'], gpu=False)
print('instance of reader ocr is created ')
def process_ocr_output(results):
"""
Converts raw EasyOCR list into a list of dictionaries.
"""
print('andara ayaa ')
invoice_data = []
for bbox, text, conf in results:
# bbox comes as [[x,y], [x,y], [x,y], [x,y]]
# We convert to list for JSON serializability
invoice_data.append(str({
"bbox": [[int(pt[0]) , int(pt[1])] for pt in bbox],
"text": text,
"confidence": float(conf)
}))
print('yaah pr')
return invoice_data
async def ocr_image(image: np.ndarray):
"""
Runs OCR in a thread pool to avoid blocking the FastAPI event loop.
"""
loop = asyncio.get_event_loop()
# EasyOCR's readtext is CPU bound, so we run in executor
results = await loop.run_in_executor(None, reader.readtext, image)
results=process_ocr_output(results)
print(results)
return results
async def process_pdf_page(page):
"""
Converts PDF page to image and processes OCR.
"""
pix = page.get_pixmap()
# Convert PyMuPDF pixmap to numpy array
img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
if pix.n == 4: # Convert RGBA to RGB
img = img[:, :, :3]
# Get raw results
raw_results = await ocr_image(img)
# 1. Create the clean string for the LLM
full_text = " ".join([res[1] for res in raw_results])
# 2. Create the detailed JSON structure for the response
structured_ocr = process_ocr_output(raw_results)
# Optional: If you want to call LLM here
# llm_result = await call_llm(full_text)
return {
"page_number": page.number + 1,
"ocr_details": structured_ocr,
"raw_text": full_text,
"llm_analysis": "llm_result_placeholder"
}