Spaces:

CR7CAD
/

Assignment1

Sleeping

App Files Files Community

Assignment1 / app.py

CR7CAD

Update app.py

1e8cc2c verified 10 months ago

raw

history blame

2.64 kB

	import streamlit as st
	from transformers import pipeline
	from PIL import Image
	import io
	from gtts import gTTS
	import time

	# Set page title
	st.set_page_config(page_title="Kids Story Generator")

	# Title and introduction
	st.title("Kids Story Generator")
	st.write("Upload a picture and let's create a magical story!")

	# Initialize models
	@st.cache_resource
	def load_models():
	image_to_text = pipeline("image-to-text", model="microsoft/git-base-coco")
	story_generator = pipeline("text-generation", model="gpt2")
	return image_to_text, story_generator

	image_to_text, story_generator = load_models()

	# Function to generate caption from image
	def generate_caption(image):
	caption = image_to_text(image)[0]['generated_text']
	return caption

	# Function to generate story from caption (less than 100 words)
	def generate_story(caption):
	prompt = f"Once upon a time, {caption} "

	# Set max_length to control story length (approximately 100 words)
	# Typical English word is ~5 characters, so ~500 characters ≈ 100 words
	story = story_generator(prompt, max_length=100, do_sample=True)[0]['generated_text']

	# Ensure story doesn't exceed 100 words
	words = story.split()
	if len(words) > 100:
	words = words[:100]
	story = " ".join(words)
	# Add period to the end if needed
	if not story.endswith(('.', '!', '?')):
	story += '.'

	return story

	# Function to convert text to speech
	def text_to_speech(text):
	tts = gTTS(text=text, lang='en', slow=False)
	audio_file = "story_audio.mp3"
	tts.save(audio_file)
	return audio_file

	# File uploader
	uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])

	if uploaded_file is not None:
	# Display the uploaded image
	image = Image.open(uploaded_file)
	st.image(image, caption='Uploaded Image', use_column_width=True)

	# Generate button
	if st.button("Generate Story"):
	with st.spinner("Generating your story..."):
	# Generate caption
	caption = generate_caption(image)
	st.write("Image caption:", caption)

	# Generate story
	story = generate_story(caption)
	word_count = len(story.split())
	st.write(f"### Your Story ({word_count} words)")
	st.write(story)

	# Generate audio
	audio_file = text_to_speech(story)

	# Display audio
	st.write("### Listen to your story")
	st.audio(audio_file)

	st.markdown("---")
	st.write("Created for ISOM5240 Assignment")