Spaces:

CR7CAD
/

Assignment1

Sleeping

App Files Files Community

Assignment1 / app.py

CR7CAD

Update app.py

862568a verified 10 months ago

raw

history blame

5.2 kB

	# Imports
	import streamlit as st
	from transformers import pipeline
	from PIL import Image
	import torch
	import os
	import tempfile
	import sys
	import subprocess

	# Try to import gTTS, install if missing
	try:
	from gtts import gTTS
	except ImportError:
	st.warning("Installing required package: gTTS...")
	subprocess.check_call([sys.executable, "-m", "pip", "install", "gTTS"])
	from gtts import gTTS
	st.success("gTTS installed successfully!")

	# Simple image-to-text function
	def img2text(image):
	image_to_text = pipeline("image-to-text", model="sooh-j/blip-image-captioning-base")
	text = image_to_text(image)[0]["generated_text"]
	return text

	# Improved text-to-story function with natural ending
	def text2story(text):
	generator = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
	prompt = f"Write a short children's story based on this: {text}. The story should have a clear beginning, middle, and end. Keep it under 150 words. Once upon a time, "

	# Generate a longer text to ensure we get a complete story
	story_result = generator(
	prompt,
	max_length=300,
	num_return_sequences=1,
	temperature=0.7,
	do_sample=True
	)

	story_text = story_result[0]['generated_text']
	story_text = story_text.replace(prompt, "Once upon a time, ")

	# Find natural ending points (end of sentences)
	periods = [i for i, char in enumerate(story_text) if char == '.']
	question_marks = [i for i, char in enumerate(story_text) if char == '?']
	exclamation_marks = [i for i, char in enumerate(story_text) if char == '!']

	# Combine all ending punctuation and sort
	all_endings = sorted(periods + question_marks + exclamation_marks)

	# If we have any sentence endings
	if all_endings:
	# Get the index where the story should reasonably end (after at least 100 characters)
	min_story_length = 100
	suitable_endings = [i for i in all_endings if i >= min_story_length]

	if suitable_endings:
	# Find an ending that completes a thought (not just the first sentence)
	if len(suitable_endings) > 2:
	# Use the third sentence ending or later for a more complete story
	return story_text[:suitable_endings[2]+1]
	else:
	# If we don't have many sentences, use the last one we found
	return story_text[:suitable_endings[-1]+1]

	# If no good ending is found, return as is
	return story_text

	# Updated text-to-audio function using gTTS
	def text2audio(story_text):
	# Create a temporary file
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
	temp_filename = temp_file.name
	temp_file.close()

	# Use gTTS to convert text to speech
	tts = gTTS(text=story_text, lang='en', slow=False)
	tts.save(temp_filename)

	# Read the audio file
	with open(temp_filename, 'rb') as audio_file:
	audio_bytes = audio_file.read()

	# Clean up the temporary file
	os.unlink(temp_filename)

	return audio_bytes

	# Basic Streamlit interface
	st.title("Image to Audio Story")
	uploaded_file = st.file_uploader("Upload an image")

	if uploaded_file is not None:
	# Display image
	st.image(uploaded_file, caption="Uploaded Image")

	# Convert to PIL Image
	image = Image.open(uploaded_file)

	# Image to Text
	with st.spinner("Generating caption..."):
	caption = img2text(image)
	st.write(f"Caption: {caption}")

	# Text to Story
	with st.spinner("Creating story..."):
	story = text2story(caption)
	st.write(f"Story: {story}")

	# Text to Audio
	with st.spinner("Generating audio..."):
	try:
	audio_bytes = text2audio(story)

	# Play audio
	st.audio(audio_bytes, format='audio/mp3')
	except Exception as e:
	st.error(f"Error generating or playing audio: {e}")
	st.info("If you're having issues with gTTS, you might need to manually install it with: pip install gTTS")

	# Fallback to a simple TTS if gTTS fails
	try:
	st.write("Attempting fallback to pyttsx3...")
	import pyttsx3
	engine = pyttsx3.init()

	# Create a temporary file for the fallback audio
	temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
	temp_wav_filename = temp_wav.name
	temp_wav.close()

	# Generate and save speech
	engine.save_to_file(story, temp_wav_filename)
	engine.runAndWait()

	# Read the audio file
	with open(temp_wav_filename, 'rb') as audio_file:
	fallback_audio = audio_file.read()

	# Clean up
	os.unlink(temp_wav_filename)

	st.audio(fallback_audio, format='audio/wav')
	except:
	st.error("Both TTS methods failed. Please install gTTS manually.")