import easyocr import asyncio import numpy as np # Initialize reader once at module level reader = easyocr.Reader(['hi', 'en'], gpu=False) print('instance of reader ocr is created ') def process_ocr_output(results): """ Converts raw EasyOCR list into a list of dictionaries. """ print('andara ayaa ') invoice_data = [] for bbox, text, conf in results: # bbox comes as [[x,y], [x,y], [x,y], [x,y]] # We convert to list for JSON serializability invoice_data.append(str({ "bbox": [[int(pt[0]) , int(pt[1])] for pt in bbox], "text": text, "confidence": float(conf) })) print('yaah pr') return invoice_data async def ocr_image(image: np.ndarray): """ Runs OCR in a thread pool to avoid blocking the FastAPI event loop. """ loop = asyncio.get_event_loop() # EasyOCR's readtext is CPU bound, so we run in executor results = await loop.run_in_executor(None, reader.readtext, image) results=process_ocr_output(results) print(results) return results async def process_pdf_page(page): """ Converts PDF page to image and processes OCR. """ pix = page.get_pixmap() # Convert PyMuPDF pixmap to numpy array img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n) if pix.n == 4: # Convert RGBA to RGB img = img[:, :, :3] # Get raw results raw_results = await ocr_image(img) # 1. Create the clean string for the LLM full_text = " ".join([res[1] for res in raw_results]) # 2. Create the detailed JSON structure for the response structured_ocr = process_ocr_output(raw_results) # Optional: If you want to call LLM here # llm_result = await call_llm(full_text) return { "page_number": page.number + 1, "ocr_details": structured_ocr, "raw_text": full_text, "llm_analysis": "llm_result_placeholder" }