111 / app.py
LCNada's picture
Update app.py
f3ae4e1 verified
import streamlit as st
from PIL import Image
from transformers import pipeline
def generate_caption(image_file):
image = Image.open(image_file)
caption_generator = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
caption_results = caption_generator(image)
caption = caption_results[0]['generated_text']
return caption
def generate_story(caption):
story_generator = pipeline("text-generation", model="gpt2")
prompt = f"Based on the following image caption: '{caption}', generate a complete fairy tale story for children with at least 100 words. "
result = story_generator(prompt, max_length=300, num_return_sequences=1)
story = result[0]['generated_text']
if len(story.split()) < 100:
additional = story_generator(prompt, max_length=350, num_return_sequences=1)[0]['generated_text']
story += " " + additional
return story
def text_to_speech(text, output_file="output.mp3"):
from gtts import gTTS
tts = gTTS(text=text, lang="en")
tts.save(output_file)
return output_file
def main():
st.title("CREATE YOUR STORY FOR CHILDREN!")
st.write("Upload a picture. We create a story and read it for you.")
uploaded_file = st.file_uploader("Choose a picture:", type=["png", "jpg", "jpeg"])
if uploaded_file is not None:
image = Image.open(uploaded_file)
st.image(image, caption="Picture you choose", use_column_width=True)
with st.spinner("Generating..."):
caption = generate_caption(uploaded_file)
st.write("Picture description:", caption)
with st.spinner("Fenerating..."):
story = generate_story(caption)
st.write("Your story:")
st.write(story)
# 文本转语音
with st.spinner("Ready to read..."):
audio_file = text_to_speech(story)
st.audio(audio_file, format="audio/mp3")
if __name__ == "__main__":
main()