ysuneu commited on
Commit
61cb4f3
·
verified ·
1 Parent(s): a0a7ccb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -32
app.py CHANGED
@@ -1,36 +1,54 @@
1
  import streamlit as st
2
  from PIL import Image
3
- import time
4
  from transformers import pipeline
5
 
6
- # Load models once at startup
7
- @st.cache_resource
8
- def load_models():
9
- caption_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
10
- text_model = pipeline("text-generation", model="distilbert/distilgpt2")
11
- speech_model = pipeline("text-to-speech", model="facebook/mms-tts-eng")
12
- return caption_model, text_model, speech_model
13
-
14
- caption, generator, speech = load_models()
15
-
16
- # App title
17
- st.title("APP on Hugging Face")
18
- st.write("Welcome to the app!")
19
-
20
- uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
21
-
22
- if uploaded_image is not None:
23
- image = Image.open(uploaded_image)
24
- st.image(image, caption="Uploaded Image")
25
-
26
- with st.spinner("Generating caption..."):
27
- caption_result = caption(image)
28
- st.write("Image Caption:", caption_result[0]['generated_text'])
29
-
30
- with st.spinner("Generating story..."):
31
- story = generator(caption_result[0]['generated_text'], max_length=100)
32
- st.write("Generated Story:", story[0]['generated_text'])
33
-
34
- with st.spinner("Generating speech..."):
35
- speech_output = speech(story[0]['generated_text'])
36
- st.audio(speech_output["audio"], sample_rate=speech_output["sampling_rate"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  from PIL import Image
 
3
  from transformers import pipeline
4
 
5
+ def generate_image_caption(image):
6
+ """Generates a caption for the given image using a pre-trained model."""
7
+ img2caption = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
8
+
9
+ # Generate caption
10
+ result = img2caption(image)
11
+ return result[0]['generated_text']
12
+
13
+ def text2story(text):
14
+ """Generates a story based on the input text using a pre-trained model."""
15
+ pipe = pipeline("text-generation", model="pranavpsv/genre-story-generator-v2")
16
+ story_text = pipe(text)[0]['generated_text']
17
+ return story_text
18
+
19
+ def text2speech(text):
20
+ """Converts text to speech using a pre-trained model."""
21
+ speech_pipe = pipeline("text-to-speech", model="facebook/mms-tts-eng")
22
+ speech_output = speech_pipe(text)
23
+ return speech_output
24
+
25
+ def main():
26
+ # App title
27
+ st.title("Streamlit Demo on Hugging Face")
28
+ st.write("Welcome to the image to story audio app!")
29
+
30
+ uploaded_image = st.file_uploader("Upload an image(jpg, jpeg, png)", type=["jpg", "jpeg", "png"])
31
+
32
+ if uploaded_image is not None:
33
+ image = Image.open(uploaded_image).convert("RGB")
34
+ st.image(image, caption="Uploaded Image", use_column_width=True)
35
+
36
+ # Stage 1: Image to Text
37
+ st.text('Processing img2text...')
38
+ image_caption = generate_image_caption(image)
39
+ st.write(image_caption)
40
+
41
+ # Stage 2: Text to Story
42
+ st.text('Processing text2story...')
43
+ story = text2story(image_caption)
44
+ st.write("Generated Story:", story)
45
+
46
+ # Stage 3: Story to Speech
47
+ st.text('Processing story2speech...')
48
+ speech_output = text2speech(story)
49
+ st.audio(speech_output["audio"], sample_rate=speech_output["sampling_rate"])
50
+ else:
51
+ st.warning("⚠️ Unsupported file type")
52
+
53
+ if __name__ == "__main__":
54
+ main()