Spaces:

ma4389
/

Image_To_text_

Sleeping

Image_To_text_ / app.py

Update app.py

29dcc5e verified 7 months ago

1.16 kB

	import torch
	from transformers import AutoProcessor, AutoModelForVision2Seq
	from PIL import Image
	import gradio as gr

	# Device
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	# Load processor & model
	processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
	model = AutoModelForVision2Seq.from_pretrained(
	"Salesforce/blip-image-captioning-large"
	).to(device)

	# Inference function
	def generate_caption(image):
	try:
	image = image.convert("RGB")
	with torch.inference_mode():
	inputs = processor(images=image, return_tensors="pt").to(device)
	output = model.generate(**inputs)
	caption = processor.decode(output[0], skip_special_tokens=True)
	return caption
	except Exception as e:
	return f"Error: {str(e)}"

	# Gradio UI
	interface = gr.Interface(
	fn=generate_caption,
	inputs=gr.Image(type="pil"),
	outputs="text",
	title="🖼️ Image to Text Captioning",
	description="Upload an image and get a caption using BLIP (Salesforce/blip-image-captioning-large)."
	)

	if __name__ == "__main__":
	interface.launch(share=True)