File size: 1,302 Bytes
1540402
1db1a4f
1540402
 
1db1a4f
1540402
 
 
 
 
 
6298ba6
 
 
 
 
 
 
 
 
1db1a4f
6298ba6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
from pdf2image import convert_from_bytes
from PIL import Image
import pytesseract
import io

app = FastAPI()

@app.post("/ocr")
async def extract_text(file: UploadFile = File(...)):
    filename = file.filename.lower()
    allowed_ext = (".jpg", ".jpeg", ".png", ".pdf")

    if not filename.endswith(allowed_ext):
        return JSONResponse(
            content={"error": "❌ Unsupported file format! Please upload JPG, PNG, or PDF."},
            status_code=400
        )

    contents = await file.read()
    extracted_text = ""

    try:
        if filename.endswith(".pdf"):
            images = convert_from_bytes(contents)
            for page in images:
                text = pytesseract.image_to_string(page, lang="hin+eng")
                extracted_text += text + "\n\n"
        else:
            image = Image.open(io.BytesIO(contents))
            text = pytesseract.image_to_string(image, lang="hin+eng")
            extracted_text = text

        return {"text": extracted_text.strip() or "⚠️ No text found."}
    
    except Exception as e:
        return JSONResponse(
            content={"error": "🚫 Failed to process file", "details": str(e)},
            status_code=500
        )