| from fastapi import FastAPI, UploadFile, File | |
| from fastapi.responses import JSONResponse | |
| from pdf2image import convert_from_bytes | |
| from PIL import Image | |
| import pytesseract | |
| import io | |
| app = FastAPI() | |
| async def extract_text(file: UploadFile = File(...)): | |
| filename = file.filename.lower() | |
| allowed_ext = (".jpg", ".jpeg", ".png", ".pdf") | |
| if not filename.endswith(allowed_ext): | |
| return JSONResponse( | |
| content={"error": "β Unsupported file format! Please upload JPG, PNG, or PDF."}, | |
| status_code=400 | |
| ) | |
| contents = await file.read() | |
| extracted_text = "" | |
| try: | |
| if filename.endswith(".pdf"): | |
| images = convert_from_bytes(contents) | |
| for page in images: | |
| text = pytesseract.image_to_string(page, lang="hin+eng") | |
| extracted_text += text + "\n\n" | |
| else: | |
| image = Image.open(io.BytesIO(contents)) | |
| text = pytesseract.image_to_string(image, lang="hin+eng") | |
| extracted_text = text | |
| return {"text": extracted_text.strip() or "β οΈ No text found."} | |
| except Exception as e: | |
| return JSONResponse( | |
| content={"error": "π« Failed to process file", "details": str(e)}, | |
| status_code=500 | |
| ) | |