File size: 1,302 Bytes
1540402 1db1a4f 1540402 1db1a4f 1540402 6298ba6 1db1a4f 6298ba6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
from pdf2image import convert_from_bytes
from PIL import Image
import pytesseract
import io
app = FastAPI()
@app.post("/ocr")
async def extract_text(file: UploadFile = File(...)):
filename = file.filename.lower()
allowed_ext = (".jpg", ".jpeg", ".png", ".pdf")
if not filename.endswith(allowed_ext):
return JSONResponse(
content={"error": "❌ Unsupported file format! Please upload JPG, PNG, or PDF."},
status_code=400
)
contents = await file.read()
extracted_text = ""
try:
if filename.endswith(".pdf"):
images = convert_from_bytes(contents)
for page in images:
text = pytesseract.image_to_string(page, lang="hin+eng")
extracted_text += text + "\n\n"
else:
image = Image.open(io.BytesIO(contents))
text = pytesseract.image_to_string(image, lang="hin+eng")
extracted_text = text
return {"text": extracted_text.strip() or "⚠️ No text found."}
except Exception as e:
return JSONResponse(
content={"error": "🚫 Failed to process file", "details": str(e)},
status_code=500
)
|