ysuneu commited on
Commit
de2e2f3
·
verified ·
1 Parent(s): 8286021

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -0
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import time
4
+ from transformers import pipeline
5
+
6
+ # Load models once at startup
7
+ @st.cache_resource
8
+ def load_models():
9
+ caption_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
10
+ text_model = pipeline("text-generation", model="distilbert/distilgpt2")
11
+ speech_model = pipeline("text-to-speech", model="facebook/mms-tts-eng")
12
+ return caption_model, text_model, speech_model
13
+
14
+ caption, generator, speech = load_models()
15
+
16
+ # App title
17
+ st.title("Streamlit Demo on Hugging Face")
18
+ st.write("Welcome to the app!")
19
+
20
+ uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
21
+
22
+ if uploaded_image is not None:
23
+ image = Image.open(uploaded_image)
24
+ st.image(image, caption="Uploaded Image")
25
+
26
+ with st.spinner("Generating caption..."):
27
+ caption_result = caption(image, max_length=30)
28
+ st.write("Image Caption:", caption_result[0]['generated_text'])
29
+
30
+ with st.spinner("Generating story..."):
31
+ story = generator(caption_result[0]['generated_text'], max_length=100)
32
+ st.write("Generated Story:", story[0]['generated_text'])
33
+
34
+ with st.spinner("Generating speech..."):
35
+ speech_output = speech(story[0]['generated_text'])
36
+ st.audio(speech_output["audio"], sample_rate=speech_output["sampling_rate"])