Spaces:
Paused
Paused
File size: 1,994 Bytes
24d4193 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | import easyocr
import asyncio
import numpy as np
# Initialize reader once at module level
reader = easyocr.Reader(['hi', 'en'], gpu=False)
print('instance of reader ocr is created ')
def process_ocr_output(results):
"""
Converts raw EasyOCR list into a list of dictionaries.
"""
print('andara ayaa ')
invoice_data = []
for bbox, text, conf in results:
# bbox comes as [[x,y], [x,y], [x,y], [x,y]]
# We convert to list for JSON serializability
invoice_data.append(str({
"bbox": [[int(pt[0]) , int(pt[1])] for pt in bbox],
"text": text,
"confidence": float(conf)
}))
print('yaah pr')
return invoice_data
async def ocr_image(image: np.ndarray):
"""
Runs OCR in a thread pool to avoid blocking the FastAPI event loop.
"""
loop = asyncio.get_event_loop()
# EasyOCR's readtext is CPU bound, so we run in executor
results = await loop.run_in_executor(None, reader.readtext, image)
results=process_ocr_output(results)
print(results)
return results
async def process_pdf_page(page):
"""
Converts PDF page to image and processes OCR.
"""
pix = page.get_pixmap()
# Convert PyMuPDF pixmap to numpy array
img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
if pix.n == 4: # Convert RGBA to RGB
img = img[:, :, :3]
# Get raw results
raw_results = await ocr_image(img)
# 1. Create the clean string for the LLM
full_text = " ".join([res[1] for res in raw_results])
# 2. Create the detailed JSON structure for the response
structured_ocr = process_ocr_output(raw_results)
# Optional: If you want to call LLM here
# llm_result = await call_llm(full_text)
return {
"page_number": page.number + 1,
"ocr_details": structured_ocr,
"raw_text": full_text,
"llm_analysis": "llm_result_placeholder"
} |