Spaces:

ysuneu
/

Assignment_for_deeplearning

Build error

App Files Files Community

Assignment_for_deeplearning / app.py

ysuneu

Update app.py

a3cc323 verified 9 months ago

raw

history blame contribute delete

1.97 kB

	import streamlit as st
	from PIL import Image
	from transformers import pipeline

	def generate_image_caption(image):
	"""Generates a caption for the given image using a pre-trained model."""
	img2caption = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")

	# Generate caption
	result = img2caption(image)
	return result[0]['generated_text']

	def text2story(text):
	text_to_story_model = pipeline("text-generation", model="pranavpsv/genre-story-generator-v2")
	story_text = text_to_story_model(text, max_new_tokens=150)[0]['generated_text']
	words = story_text.split()
	if len(words) > 100:
	story_text = ' '.join(words[:100]) + '.'
	return story_text

	def text2speech(text):
	"""Converts text to speech using a pre-trained model."""
	speech_pipe = pipeline("text-to-speech", model="facebook/mms-tts-eng")
	speech_output = speech_pipe(text)
	return speech_output

	def main():
	# App title
	st.title("Storyteller on Hugging Face")
	st.write("Welcome to the image to story audio app!")

	uploaded_image = st.file_uploader("Upload an image (jpg, jpeg, png)", type=["jpg", "jpeg", "png"])

	if uploaded_image is not None:
	image = Image.open(uploaded_image).convert("RGB")
	st.image(image, caption="Uploaded Image", use_column_width=True)

	# Stage 1: Image to Text
	st.text('Processing img2text...')
	image_caption = generate_image_caption(image)
	st.write(image_caption)

	# Stage 2: Text to Story
	st.text('Processing text2story...')
	story = text2story(image_caption)
	st.write("Generated Story:", story)

	# Stage 3: Story to Speech
	st.text('Processing story2speech...')
	speech_output = text2speech(story)
	st.audio(speech_output["audio"], sample_rate=speech_output["sampling_rate"])
	else:
	st.warning("⚠️ Please upload an image file")

	if __name__ == "__main__":
	main()