ImageToOCRPdf / app.py
arasuezofis's picture
Update app.py
46467f1 verified
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import StreamingResponse
import fitz # PyMuPDF
import pytesseract
from PIL import Image
from io import BytesIO
app = FastAPI()
@app.get("/")
def home():
return {
"message": "OCR API is running",
"endpoint": "/ocr",
"method": "POST",
"upload": "PNG, JPG, JPEG, PDF",
"output": "Searchable PDF"
}
def ocr_image_to_pdf(image_bytes: bytes):
img = Image.open(BytesIO(image_bytes))
pdf_bytes = pytesseract.image_to_pdf_or_hocr(img, extension='pdf')
return pdf_bytes
def ocr_pdf_to_searchable(pdf_bytes: bytes):
original_pdf = fitz.open(stream=pdf_bytes, filetype="pdf")
ocr_output = fitz.open()
for page_num in range(len(original_pdf)):
page = original_pdf.load_page(page_num)
pix = page.get_pixmap(dpi=300)
img_bytes = pix.tobytes("png")
ocr_pdf_page = pytesseract.image_to_pdf_or_hocr(img_bytes, extension="pdf")
ocr_output.insert_pdf(fitz.open("pdf", ocr_pdf_page))
output_bytes = ocr_output.tobytes()
ocr_output.close()
return output_bytes
@app.post("/ocr")
async def upload_and_ocr(file: UploadFile = File(...)):
file_bytes = await file.read()
filename = file.filename.lower()
if filename.endswith((".png", ".jpg", ".jpeg")):
pdf_bytes = ocr_image_to_pdf(file_bytes)
elif filename.endswith(".pdf"):
pdf_bytes = ocr_pdf_to_searchable(file_bytes)
else:
return {"error": "Unsupported file type"}
return StreamingResponse(
BytesIO(pdf_bytes),
media_type="application/pdf",
headers={
"Content-Disposition": f"attachment; filename=searchable_{file.filename}.pdf"
}
)