Spaces:

ysuneu
/

Assignment_for_deeplearning

Build error

App Files Files Community

ysuneu commited on Apr 25, 2025

Commit

61cb4f3

verified ·

1 Parent(s): a0a7ccb

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -32

app.py CHANGED Viewed

@@ -1,36 +1,54 @@
 import streamlit as st
 from PIL import Image
-import time
 from transformers import pipeline
-# Load models once at startup
-@st.cache_resource
-def load_models():
-    caption_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
-    text_model = pipeline("text-generation", model="distilbert/distilgpt2")
-    speech_model = pipeline("text-to-speech", model="facebook/mms-tts-eng")
-    return caption_model, text_model, speech_model
-caption, generator, speech = load_models()
-# App title
-st.title("APP on Hugging Face")
-st.write("Welcome to the app!")
-uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
-if uploaded_image is not None:
-    image = Image.open(uploaded_image)
-    st.image(image, caption="Uploaded Image")
-    with st.spinner("Generating caption..."):
-        caption_result = caption(image)
-        st.write("Image Caption:", caption_result[0]['generated_text'])
-    with st.spinner("Generating story..."):
-        story = generator(caption_result[0]['generated_text'], max_length=100)
-        st.write("Generated Story:", story[0]['generated_text'])
-    with st.spinner("Generating speech..."):
-        speech_output = speech(story[0]['generated_text'])
-        st.audio(speech_output["audio"], sample_rate=speech_output["sampling_rate"])

 import streamlit as st
 from PIL import Image
 from transformers import pipeline
+def generate_image_caption(image):
+    """Generates a caption for the given image using a pre-trained model."""
+    img2caption = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
+    # Generate caption
+    result = img2caption(image)
+    return result[0]['generated_text']
+def text2story(text):
+    """Generates a story based on the input text using a pre-trained model."""
+    pipe = pipeline("text-generation", model="pranavpsv/genre-story-generator-v2")
+    story_text = pipe(text)[0]['generated_text']
+    return story_text
+def text2speech(text):
+    """Converts text to speech using a pre-trained model."""
+    speech_pipe = pipeline("text-to-speech", model="facebook/mms-tts-eng")
+    speech_output = speech_pipe(text)
+    return speech_output
+def main():
+    # App title
+    st.title("Streamlit Demo on Hugging Face")
+    st.write("Welcome to the image to story audio app!")
+    uploaded_image = st.file_uploader("Upload an image(jpg, jpeg, png)", type=["jpg", "jpeg", "png"])
+    if uploaded_image is not None:
+        image = Image.open(uploaded_image).convert("RGB")
+        st.image(image, caption="Uploaded Image", use_column_width=True)
+        # Stage 1: Image to Text
+        st.text('Processing img2text...')
+        image_caption = generate_image_caption(image)
+        st.write(image_caption)
+        # Stage 2: Text to Story
+        st.text('Processing text2story...')
+        story = text2story(image_caption)
+        st.write("Generated Story:", story)
+        # Stage 3: Story to Speech
+        st.text('Processing story2speech...')
+        speech_output = text2speech(story)
+        st.audio(speech_output["audio"], sample_rate=speech_output["sampling_rate"])
+    else:
+        st.warning("⚠️ Unsupported file type")
+if __name__ == "__main__":
+    main()