File size: 2,366 Bytes
6034171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# File: main.py
import os
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.responses import JSONResponse
import uvicorn
from llm_processor import load_llm_model, generate_json_from_text
from ocr_processor import extract_text_from_image

# Set environment variables for performance
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Create the FastAPI app
app = FastAPI(
    title="Invoice Processing API",
    description="A single endpoint to process an invoice image and return both raw text and structured JSON."
)

@app.on_event("startup")
def startup_event():
    """Load models once when the server starts."""
    load_llm_model()

@app.get("/", summary="Health Check")
def read_root():
    """A simple endpoint to check if the API is running."""
    return {"status": "API is running"}

@app.post("/process_invoice/", summary="Process Invoice to Text & JSON")
async def process_invoice_endpoint(file: UploadFile = File(...)):
    """
    Accepts an image file and returns both the extracted OCR text and the structured JSON data.
    """
    # Validate file type
    if not file.content_type.startswith("image/"):
        raise HTTPException(status_code=400, detail="Only image files are supported (e.g., PNG, JPEG).")

    try:
        image_bytes = await file.read()
        
        # Step 1: Extract text from the image using the OCR processor
        raw_text = extract_text_from_image(image_bytes)
        
        if not raw_text or "No text detected" in raw_text:
            return JSONResponse(content={
                "extracted_text": raw_text,
                "structured_json": {"error": "No text could be extracted from the image."}
            })
        
        # Step 2: Generate structured JSON from the extracted text
        json_data = generate_json_from_text(raw_text)
        
        # Step 3: Combine both results into a single response
        combined_response = {
            "extracted_text": raw_text,
            "structured_json": json_data
        }
        
        return JSONResponse(content=combined_response)
        
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")

if __name__ == "__main__":
    uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=False)  # Disable reload for production