Assignment1 / app.py
CR7CAD's picture
Update app.py
8fe6281 verified
raw
history blame
3.8 kB
# Imports
import streamlit as st
from transformers import pipeline
from PIL import Image
import torch
from gtts import gTTS
import os
import tempfile
# Simple image-to-text function
def img2text(image):
image_to_text = pipeline("image-to-text", model="sooh-j/blip-image-captioning-base")
text = image_to_text(image)[0]["generated_text"]
return text
# Improved text-to-story function with natural ending
def text2story(text):
generator = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
prompt = f"Write a short children's story based on this: {text}. The story should have a clear beginning, middle, and end. Keep it under 150 words. Once upon a time, "
# Generate a longer text to ensure we get a complete story
story_result = generator(
prompt,
max_length=300,
num_return_sequences=1,
temperature=0.7,
do_sample=True
)
story_text = story_result[0]['generated_text']
story_text = story_text.replace(prompt, "Once upon a time, ")
# Find natural ending points (end of sentences)
periods = [i for i, char in enumerate(story_text) if char == '.']
question_marks = [i for i, char in enumerate(story_text) if char == '?']
exclamation_marks = [i for i, char in enumerate(story_text) if char == '!']
# Combine all ending punctuation and sort
all_endings = sorted(periods + question_marks + exclamation_marks)
# If we have any sentence endings
if all_endings:
# Get the index where the story should reasonably end (after at least 100 characters)
min_story_length = 100
suitable_endings = [i for i in all_endings if i >= min_story_length]
if suitable_endings:
# Find an ending that completes a thought (not just the first sentence)
if len(suitable_endings) > 2:
# Use the third sentence ending or later for a more complete story
return story_text[:suitable_endings[2]+1]
else:
# If we don't have many sentences, use the last one we found
return story_text[:suitable_endings[-1]+1]
# If no good ending is found, return as is
return story_text
# Updated text-to-audio function using gTTS instead of transformers
def text2audio(story_text):
# Create a temporary file
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
temp_filename = temp_file.name
temp_file.close()
# Use gTTS to convert text to speech
tts = gTTS(text=story_text, lang='en', slow=False)
tts.save(temp_filename)
# Read the audio file
with open(temp_filename, 'rb') as audio_file:
audio_bytes = audio_file.read()
# Clean up the temporary file
os.unlink(temp_filename)
return audio_bytes
# Basic Streamlit interface
st.title("Image to Audio Story")
uploaded_file = st.file_uploader("Upload an image")
if uploaded_file is not None:
# Display image
st.image(uploaded_file, caption="Uploaded Image")
# Convert to PIL Image
image = Image.open(uploaded_file)
# Image to Text
with st.spinner("Generating caption..."):
caption = img2text(image)
st.write(f"Caption: {caption}")
# Text to Story
with st.spinner("Creating story..."):
story = text2story(caption)
st.write(f"Story: {story}")
# Text to Audio
with st.spinner("Generating audio..."):
try:
audio_bytes = text2audio(story)
# Play audio
st.audio(audio_bytes, format='audio/mp3')
except Exception as e:
st.error(f"Error generating or playing audio: {e}")
st.write("Make sure gTTS is installed with: pip install gTTS")