EmmaL1 commited on
Commit
c63d328
·
verified ·
1 Parent(s): a4d3132

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -16
app.py CHANGED
@@ -1,48 +1,62 @@
1
  # import part
2
  import streamlit as st
3
  from transformers import pipeline
4
- from PIL import Image
5
 
6
  # function part
7
- # image to text
8
  def img2text(img):
9
  image_to_text_model = pipeline("image-to-text",
10
  model="Salesforce/blip-image-captioning-base")
11
  text = image_to_text_model(img)[0]["generated_text"]
12
  return text
13
- st.write(text)
14
 
15
- # text to story
16
  def text2story(text):
17
  text_generation_model = pipeline("text-generation",
18
  model="meta-llama/Meta-Llama-3-8B")
19
  story_text = "Once upon a time in a land far, far away"
20
- generated_story = story_text(story_text,
21
  max_length=100,
22
  num_return_sequences=1)
23
-
24
  return generated_story
25
- st.write(generated_story)
26
 
27
- # text to audio
28
- def text2audio(story_text)
29
  text_to_speech_model = pipeline("text-to-speech", model="facebook/mms-tts-eng")
30
  text_to_speak = """
31
  Hi everyone, I'm going to start with a story now!
32
  """
33
  speech_output = text_to_speech_model(text_to_speak)
34
- print("Text-to-Speech Output:", speech_output)
35
-
36
- from IPython.display import Audio
37
- print(text_to_speak)
38
- st.audio(speech_output['audio'],
39
- sample_rate=speech_output['sampling_rate'])
40
 
41
  # main part
42
  st.set_page_config(page_title="Your Image to Audio Story",
43
  page_icon="*")
44
  st.header("Turn Your Image to Audio Story")
45
  uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "png", "jpeg"])
 
46
  if uploaded_file is not None:
47
  image = Image.open(uploaded_file).convert("RGB")
48
- st.image(image, caption="Uploaded Image", use_column_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # import part
2
  import streamlit as st
3
  from transformers import pipeline
 
4
 
5
  # function part
6
+ # image2text
7
  def img2text(img):
8
  image_to_text_model = pipeline("image-to-text",
9
  model="Salesforce/blip-image-captioning-base")
10
  text = image_to_text_model(img)[0]["generated_text"]
11
  return text
 
12
 
13
+ # text2story
14
  def text2story(text):
15
  text_generation_model = pipeline("text-generation",
16
  model="meta-llama/Meta-Llama-3-8B")
17
  story_text = "Once upon a time in a land far, far away"
18
+ generated_story = text_generation_model(story_text,
19
  max_length=100,
20
  num_return_sequences=1)
 
21
  return generated_story
 
22
 
23
+ # text2audio
24
+ def text2audio(story_text):
25
  text_to_speech_model = pipeline("text-to-speech", model="facebook/mms-tts-eng")
26
  text_to_speak = """
27
  Hi everyone, I'm going to start with a story now!
28
  """
29
  speech_output = text_to_speech_model(text_to_speak)
30
+ return speech_output
 
 
 
 
 
31
 
32
  # main part
33
  st.set_page_config(page_title="Your Image to Audio Story",
34
  page_icon="*")
35
  st.header("Turn Your Image to Audio Story")
36
  uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "png", "jpeg"])
37
+
38
  if uploaded_file is not None:
39
  image = Image.open(uploaded_file).convert("RGB")
40
+ st.image(image, caption="Uploaded Image", use_column_width=True)
41
+
42
+ # stage 1
43
+ st.text('Processing img2text...')
44
+ scenario = img2text(uploaded_file.name)
45
+ st.write(scenario)
46
+
47
+ # stage 2
48
+ st.text('Generating a story...')
49
+ scenario = text2story(scenario)
50
+ st.write(story)
51
+
52
+ # stage 3
53
+ st.text('Generating audio data...')
54
+ audio_data = text2audio(story)
55
+
56
+
57
+ if st.button("Play Audio"):
58
+ st.audio(speech_output['audio'],
59
+ format="audio/wav",
60
+ start_time=0,
61
+ sample_rate=speech_output['sampling_rate'])
62
+ st.audio("kids_playing_audio.wav")