# app.py import os import io import uvicorn from fastapi import FastAPI, File, UploadFile, HTTPException from fastapi.responses import StreamingResponse from fastapi.middleware.cors import CORSMiddleware from PIL import Image from pydantic import BaseModel from typing import List from image_processor import enhance_image_fast, extract_text_from_image_fast, process_pdf_in_parallel app = FastAPI( title="High-Speed OCR API", description="An API to extract text from images and PDFs, optimized for speed.", version="5.0.0-hf-final" ) # CORS Middleware to allow requests from any origin origins = ["*"] app.add_middleware( CORSMiddleware, allow_origins=origins, allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Pydantic Models for structured responses class ImageOCRResponse(BaseModel): filename: str text: str class PageResult(BaseModel): page_number: int text: str class PDFOCRResponse(BaseModel): filename: str total_pages: int results: List[PageResult] # API Endpoints @app.get("/", tags=["General"]) def read_root(): return {"message": "Welcome to the High-Speed OCR API. See /docs for documentation."} @app.post("/ocr-image", response_model=ImageOCRResponse, tags=["OCR"]) async def ocr_image_endpoint(file: UploadFile = File(...)): if not file.content_type.startswith("image/"): raise HTTPException(status_code=400, detail="File must be an image.") try: contents = await file.read() image = Image.open(io.BytesIO(contents)) enhanced_image = enhance_image_fast(image) text = extract_text_from_image_fast(enhanced_image) return {"filename": file.filename, "text": text} except Exception as e: raise HTTPException(status_code=500, detail=f"Error processing image OCR: {str(e)}") @app.post("/ocr-pdf", response_model=PDFOCRResponse, tags=["OCR"]) async def ocr_pdf_endpoint(file: UploadFile = File(...)): if file.content_type != "application/pdf": raise HTTPException(status_code=400, detail="File must be a PDF.") try: contents = await file.read() results = process_pdf_in_parallel(contents) return { "filename": file.filename, "total_pages": len(results), "results": results } except Exception as e: raise HTTPException(status_code=500, detail=f"Error processing PDF: {str(e)}") # The `if __name__ == "__main__":` block has been completely removed. # The platform will import the `app` object and run it.