Spaces:

EmmaL1
/

Story

Sleeping

App Files Files Community

EmmaL1 commited on Mar 7, 2025

Commit

c63d328

verified ·

1 Parent(s): a4d3132

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -16

app.py CHANGED Viewed

@@ -1,48 +1,62 @@
 # import part
 import streamlit as st
 from transformers import pipeline
-from PIL import Image
 # function part
-# image to text
 def img2text(img):
     image_to_text_model = pipeline("image-to-text",
                                    model="Salesforce/blip-image-captioning-base")
     text = image_to_text_model(img)[0]["generated_text"]
     return text
-    st.write(text)
-# text to story
 def text2story(text):
     text_generation_model = pipeline("text-generation",
                                    model="meta-llama/Meta-Llama-3-8B")
     story_text = "Once upon a time in a land far, far away"
-    generated_story = story_text(story_text,
                             max_length=100,
                             num_return_sequences=1)
     return generated_story
-    st.write(generated_story)
-# text to audio
-def text2audio(story_text)
     text_to_speech_model = pipeline("text-to-speech", model="facebook/mms-tts-eng")
     text_to_speak = """
     Hi everyone, I'm going to start with a story now！
     """
     speech_output = text_to_speech_model(text_to_speak)
-    print("Text-to-Speech Output:", speech_output)
-from IPython.display import Audio
-    print(text_to_speak)
-    st.audio(speech_output['audio'],
-             sample_rate=speech_output['sampling_rate'])
 # main part
 st.set_page_config(page_title="Your Image to Audio Story",
                     page_icon="*")
 st.header("Turn Your Image to Audio Story")
 uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "png", "jpeg"])
 if uploaded_file is not None:
     image = Image.open(uploaded_file).convert("RGB")
-    st.image(image, caption="Uploaded Image", use_column_width=True)

 # import part
 import streamlit as st
 from transformers import pipeline
 # function part
+# image2text
 def img2text(img):
     image_to_text_model = pipeline("image-to-text",
                                    model="Salesforce/blip-image-captioning-base")
     text = image_to_text_model(img)[0]["generated_text"]
     return text
+# text2story
 def text2story(text):
     text_generation_model = pipeline("text-generation",
                                    model="meta-llama/Meta-Llama-3-8B")
     story_text = "Once upon a time in a land far, far away"
+    generated_story = text_generation_model(story_text,
                             max_length=100,
                             num_return_sequences=1)
     return generated_story
+# text2audio
+def text2audio(story_text):
     text_to_speech_model = pipeline("text-to-speech", model="facebook/mms-tts-eng")
     text_to_speak = """
     Hi everyone, I'm going to start with a story now！
     """
     speech_output = text_to_speech_model(text_to_speak)
+    return speech_output
 # main part
 st.set_page_config(page_title="Your Image to Audio Story",
                     page_icon="*")
 st.header("Turn Your Image to Audio Story")
 uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "png", "jpeg"])
 if uploaded_file is not None:
     image = Image.open(uploaded_file).convert("RGB")
+    st.image(image, caption="Uploaded Image", use_column_width=True)
+    # stage 1
+    st.text('Processing img2text...')
+    scenario = img2text(uploaded_file.name)
+    st.write(scenario)
+     # stage 2
+    st.text('Generating a story...')
+    scenario = text2story(scenario)
+    st.write(story)
+     # stage 3
+    st.text('Generating audio data...')
+    audio_data = text2audio(story)
+if st.button("Play Audio"):
+    st.audio(speech_output['audio'],
+             format="audio/wav",
+             start_time=0,
+             sample_rate=speech_output['sampling_rate'])
+    st.audio("kids_playing_audio.wav")