File size: 1,994 Bytes
24d4193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import easyocr
import asyncio
import numpy as np

# Initialize reader once at module level
reader = easyocr.Reader(['hi', 'en'], gpu=False)
print('instance of reader ocr is created ')
def process_ocr_output(results):
    """
    Converts raw EasyOCR list into a list of dictionaries.
    """
    print('andara ayaa ')
    invoice_data = []
    for bbox, text, conf in results:
        # bbox comes as [[x,y], [x,y], [x,y], [x,y]]
        # We convert to list for JSON serializability
        invoice_data.append(str({
            "bbox": [[int(pt[0]) , int(pt[1])] for pt in bbox],
            "text": text,
            "confidence": float(conf)
        }))
    print('yaah pr')
    return invoice_data

async def ocr_image(image: np.ndarray):
    """
    Runs OCR in a thread pool to avoid blocking the FastAPI event loop.
    """
    loop = asyncio.get_event_loop()
    # EasyOCR's readtext is CPU bound, so we run in executor
    results = await loop.run_in_executor(None, reader.readtext, image)
    
    results=process_ocr_output(results)
    print(results)
    return results

async def process_pdf_page(page):
    """
    Converts PDF page to image and processes OCR.
    """
    pix = page.get_pixmap()
    # Convert PyMuPDF pixmap to numpy array
    img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
    
    if pix.n == 4:  # Convert RGBA to RGB
        img = img[:, :, :3]
    
    # Get raw results
    raw_results = await ocr_image(img)
    
    # 1. Create the clean string for the LLM
    full_text = " ".join([res[1] for res in raw_results])
    
    # 2. Create the detailed JSON structure for the response
    structured_ocr = process_ocr_output(raw_results)
    
    # Optional: If you want to call LLM here
    # llm_result = await call_llm(full_text)
    
    return {
        "page_number": page.number + 1, 
        "ocr_details": structured_ocr, 
        "raw_text": full_text,
        "llm_analysis": "llm_result_placeholder"
    }