Spaces:

DWD1211
/

LAB2

Sleeping

App Files Files Community

DWD1211 commited on Apr 28, 2025

Commit

a526533

verified ·

1 Parent(s): a71e738

Create app.py

Browse files

Files changed (1) hide show

app.py +71 -0

app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import streamlit as st
+from transformers import pipeline
+# --- Load models once at the beginning ---
+image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
+text_to_story_model = pipeline("text-generation", model="pranavpsv/genre-story-generator-v2")
+story_to_audio_model = pipeline("text-to-speech", model="facebook/mms-tts-eng")
+# --- Define Functions ---
+# img2text
+def img2text(image_path):
+    text = image_to_text_model(image_path)[0]["generated_text"]
+    return text
+# text2story
+def text2story(text):
+    story_text = text_to_story_model(text, max_new_tokens=150)[0]['generated_text']
+    words = story_text.split()
+    if len(words) > 100:
+        story_text = ' '.join(words[:100]) + '.'
+    return story_text
+# text2audio
+def text2audio(story_text):
+    speech_data = story_to_audio_model(story_text)
+    return speech_data
+# play_audio
+def play_audio(audio_data):
+    audio_buffer = io.BytesIO()
+    sf.write(audio_buffer, audio_data['audio'], audio_data['sampling_rate'], format='WAV')
+    audio_buffer.seek(0)
+    st.audio(audio_buffer, format='audio/wav')
+# --- Streamlit App ---
+st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
+st.header("Turn Your Image to Audio Story")
+uploaded_file = st.file_uploader("Select an Image...")
+if uploaded_file is not None:
+    bytes_data = uploaded_file.getvalue()
+    with open(uploaded_file.name, "wb") as file:
+        file.write(bytes_data)
+    st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
+    # Stage 1: Image to Text
+    if "scenario" not in st.session_state:
+        with st.spinner('Processing image to text...'):
+            st.session_state.scenario = img2text(uploaded_file.name)
+    st.subheader("Image Description:")
+    st.write(st.session_state.scenario)
+    # Stage 2: Text to Story
+    if "story" not in st.session_state:
+        with st.spinner('Generating a story...'):
+            st.session_state.story = text2story(st.session_state.scenario)
+    st.subheader("Generated Story:")
+    st.write(st.session_state.story)
+    # Stage 3: Story to Audio
+    if "audio_data" not in st.session_state:
+        with st.spinner('Generating audio narration...'):
+            st.session_state.audio_data = text2audio(st.session_state.story)
+    # Play Audio Button
+    if st.button("Play Audio"):
+        if "audio_data" in st.session_state:
+            play_audio(st.session_state.audio_data)
+        else:
+            st.warning("Please generate the audio first.")