Vocal-Eyes-Fast_api

Sleeping

App Files Files Community

Vocal-Eyes-Fast_api / app.py

ABDRauf

fixed errors

67434ff verified 27 days ago

raw

history blame contribute delete

3.52 kB

	from transformers import BlipProcessor, BlipForConditionalGeneration
	import io
	from PIL import Image
	from fastapi import FastAPI, File, UploadFile, HTTPException
	from fastapi.responses import JSONResponse
	import uvicorn
	import torch

	# ── Load BLIP Model ────────────────────────────────────────────────────────
	processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
	model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

	def image_to_speech(image: Image.Image) -> str:
	# Step 1: Generate a more detailed caption
	# The processor handles the conversion from PIL Image to PyTorch tensors
	inputs = processor(image, return_tensors="pt")

	out = model.generate(
	**inputs,
	max_length=90, # Allow longer, more detailed sentences
	num_beams=8, # Use beam search to improve quality
	repetition_penalty=1.2, # Prevent repeating words
	length_penalty=1.0, # Balanced caption length
	early_stopping=True
	)
	caption = processor.decode(out[0], skip_special_tokens=True)
	return caption # Removed the trailing comma

	# ── FastAPI app ────────────────────────────────────────────────────────────
	app = FastAPI(
	title="VocalEyes API",
	description="Converts an uploaded image into a short scene description via BLIP.",
	version="1.0.0",
	)

	@app.get("/")
	def root():
	return {"status": "ok", "message": "VocalEyes API is running. POST an image to /predict"}

	@app.post("/predict")
	async def predict(file: UploadFile = File(...)):
	# ── Validate content type ──────────────────────────────────────────────
	if file.content_type not in ("image/jpeg", "image/png", "image/webp", "image/bmp"):
	raise HTTPException(
	status_code=415,
	detail=f"Unsupported file type '{file.content_type}'. Send JPEG, PNG, WEBP, or BMP.",
	)

	# ── Read & preprocess ──────────────────────────────────────────────────
	try:
	raw = await file.read()
	image = Image.open(io.BytesIO(raw)).convert("RGB")
	except Exception as e:
	raise HTTPException(status_code=400, detail=f"Could not read image: {e}")

	# ── Run pipeline ───────────────────────────────────────────────────────
	try:
	# Pass the PIL Image directly to our BLIP function
	caption = image_to_speech(image)
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Pipeline error: {e}")

	# Return the clean description
	return JSONResponse({
	"description": caption
	})

	# ── Entry point ────────────────────────────────────────────────────────────
	if __name__ == "__main__":
	# Make sure this file is saved as app.py if you are passing "app:app"
	uvicorn.run("app:app", host="0.0.0.0", port=7860)