from fastapi import FastAPI, UploadFile, File from fastapi.responses import JSONResponse from pdf2image import convert_from_bytes from PIL import Image import pytesseract import io app = FastAPI() @app.post("/ocr") async def extract_text(file: UploadFile = File(...)): filename = file.filename.lower() allowed_ext = (".jpg", ".jpeg", ".png", ".pdf") if not filename.endswith(allowed_ext): return JSONResponse( content={"error": "❌ Unsupported file format! Please upload JPG, PNG, or PDF."}, status_code=400 ) contents = await file.read() extracted_text = "" try: if filename.endswith(".pdf"): images = convert_from_bytes(contents) for page in images: text = pytesseract.image_to_string(page, lang="hin+eng") extracted_text += text + "\n\n" else: image = Image.open(io.BytesIO(contents)) text = pytesseract.image_to_string(image, lang="hin+eng") extracted_text = text return {"text": extracted_text.strip() or "⚠️ No text found."} except Exception as e: return JSONResponse( content={"error": "🚫 Failed to process file", "details": str(e)}, status_code=500 )