Spaces:
Sleeping
Sleeping
| # File: main.py | |
| import os | |
| from fastapi import FastAPI, File, UploadFile, HTTPException | |
| from fastapi.responses import JSONResponse | |
| import uvicorn | |
| from llm_processor import load_llm_model, generate_json_from_text | |
| from ocr_processor import extract_text_from_image | |
| # Set environment variables for performance | |
| os.environ["OMP_NUM_THREADS"] = "1" | |
| os.environ["TOKENIZERS_PARALLELISM"] = "false" | |
| # Create the FastAPI app | |
| app = FastAPI( | |
| title="Invoice Processing API", | |
| description="A single endpoint to process an invoice image and return both raw text and structured JSON." | |
| ) | |
| def startup_event(): | |
| """Load models once when the server starts.""" | |
| load_llm_model() | |
| def read_root(): | |
| """A simple endpoint to check if the API is running.""" | |
| return {"status": "API is running"} | |
| async def process_invoice_endpoint(file: UploadFile = File(...)): | |
| """ | |
| Accepts an image file and returns both the extracted OCR text and the structured JSON data. | |
| """ | |
| # Validate file type | |
| if not file.content_type.startswith("image/"): | |
| raise HTTPException(status_code=400, detail="Only image files are supported (e.g., PNG, JPEG).") | |
| try: | |
| image_bytes = await file.read() | |
| # Step 1: Extract text from the image using the OCR processor | |
| raw_text = extract_text_from_image(image_bytes) | |
| if not raw_text or "No text detected" in raw_text: | |
| return JSONResponse(content={ | |
| "extracted_text": raw_text, | |
| "structured_json": {"error": "No text could be extracted from the image."} | |
| }) | |
| # Step 2: Generate structured JSON from the extracted text | |
| json_data = generate_json_from_text(raw_text) | |
| # Step 3: Combine both results into a single response | |
| combined_response = { | |
| "extracted_text": raw_text, | |
| "structured_json": json_data | |
| } | |
| return JSONResponse(content=combined_response) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}") | |
| if __name__ == "__main__": | |
| uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=False) # Disable reload for production |