TLH01 commited on
Commit
2af5d0c
·
verified ·
1 Parent(s): bea354b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -9
app.py CHANGED
@@ -1,8 +1,9 @@
1
  import streamlit as st
2
  from transformers import pipeline
3
  from PIL import Image
 
4
 
5
- # Stage 1: Image to Caption
6
  @st.cache_resource
7
  def load_image_caption_model():
8
  return pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
@@ -12,21 +13,21 @@ def generate_caption(image):
12
  result = caption_model(image)
13
  return result[0]['generated_text']
14
 
15
- # Stage 2: Caption to Story
16
  @st.cache_resource
17
  def load_story_generator():
18
- return pipeline("text-generation", model="pranavpsv/gpt2-genre-story-generator", max_length=120)
19
 
20
  def text2story(description):
21
  story_gen = load_story_generator()
22
- prompt = f"A short and fun children's story about {description}."
23
  story = story_gen(prompt)[0]['generated_text']
24
  return story
25
 
26
- # Stage 3: Story to Speech
27
  @st.cache_resource
28
  def load_tts():
29
- return pipeline("text-to-speech", model="facebook/fastspeech2-en-ljspeech")
30
 
31
  def story_to_audio(story_text):
32
  tts = load_tts()
@@ -46,16 +47,16 @@ def main():
46
  image = Image.open(uploaded_image).convert("RGB")
47
  st.image(image, caption="Uploaded Image", use_column_width=True)
48
 
49
- with st.spinner("Step 1: Generating description..."):
50
  caption = generate_caption(image)
51
  st.success(f"Caption: {caption}")
52
 
53
- with st.spinner("Step 2: Generating a short story..."):
54
  story = text2story(caption)
55
  st.success("Here's your story:")
56
  st.write(story)
57
 
58
- with st.spinner("Step 3: Converting story to audio..."):
59
  audio, sample_rate = story_to_audio(story)
60
  st.audio(audio, format="audio/wav", sample_rate=sample_rate)
61
 
 
1
  import streamlit as st
2
  from transformers import pipeline
3
  from PIL import Image
4
+ import io
5
 
6
+ # Stage 1: Image to Text (Captioning)
7
  @st.cache_resource
8
  def load_image_caption_model():
9
  return pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
 
13
  result = caption_model(image)
14
  return result[0]['generated_text']
15
 
16
+ # Stage 2: Text to Story (Children-friendly)
17
  @st.cache_resource
18
  def load_story_generator():
19
+ return pipeline("text2text-generation", model="google/flan-t5-base", max_length=100)
20
 
21
  def text2story(description):
22
  story_gen = load_story_generator()
23
+ prompt = f"Generate a short and imaginative children's story about: {description}"
24
  story = story_gen(prompt)[0]['generated_text']
25
  return story
26
 
27
+ # Stage 3: Story to Speech (Lightweight & Compatible)
28
  @st.cache_resource
29
  def load_tts():
30
+ return pipeline("text-to-speech", model="suno/bark-small")
31
 
32
  def story_to_audio(story_text):
33
  tts = load_tts()
 
47
  image = Image.open(uploaded_image).convert("RGB")
48
  st.image(image, caption="Uploaded Image", use_column_width=True)
49
 
50
+ with st.spinner("Generating description..."):
51
  caption = generate_caption(image)
52
  st.success(f"Caption: {caption}")
53
 
54
+ with st.spinner("Generating story from caption..."):
55
  story = text2story(caption)
56
  st.success("Here's your story:")
57
  st.write(story)
58
 
59
+ with st.spinner("Converting story to audio..."):
60
  audio, sample_rate = story_to_audio(story)
61
  st.audio(audio, format="audio/wav", sample_rate=sample_rate)
62