Spaces:

josephtran04
/

image-captioning

Sleeping

image-captioning / app.py

Update app.py

a666c6e verified 7 months ago

1.44 kB

	# This script creates a simple web application using Gradio to generate captions for images using the BLIP model from Hugging Face's Transformers library.
	# Import necessary libraries
	import gradio as gr
	import numpy as np
	from PIL import Image
	from transformers import AutoProcessor, BlipForConditionalGeneration

	# Load the pretrained processor and model
	processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
	model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

	# Define the function to process the image and generate a caption
	def caption_image(input_image: np.ndarray):
	# Convert numpy array to PIL Image and convert to RGB
	raw_image = Image.fromarray(input_image).convert('RGB')

	# Process the image
	text = "An image of"
	inputs = processor(images=raw_image, text=text, return_tensors="pt")

	# Generate a caption for the image
	outputs = model.generate(**inputs, max_length=100)

	# Decode the generated tokens to text and store it into `caption`
	caption = processor.decode(outputs[0], skip_special_tokens=True)

	return caption

	# Create a Gradio interface
	iface = gr.Interface(
	fn=caption_image,
	inputs=gr.Image(),
	outputs="text",
	title="Image Captioning",
	description="This is a simple web app for generating captions for images using BLIP model from Salesforce."
	)

	# Launch the Gradio app
	iface.launch()