Bondya commited on
Commit
38da107
·
verified ·
1 Parent(s): bc98b06

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -9
app.py CHANGED
@@ -1,29 +1,37 @@
 
1
  import streamlit as st
2
  from transformers import pipeline
3
  from gtts import gTTS
4
  import tempfile
5
  import os
6
 
 
 
7
  def img2text(img_path):
 
8
  captioner = pipeline(
9
  "image-to-text",
10
- model="nlpconnect/vit-gpt2-image-captioning" # 修正模型名称
11
  )
12
  result = captioner(img_path)
13
  return result[0]["generated_text"]
14
 
 
15
  def text2story(scenario):
 
16
  generator = pipeline(
17
  "text-generation",
18
- model="gpt2", # 修正模型名称
19
- max_length=200,
20
- num_return_sequences=1
21
  )
22
  prompt = f"Create a children's story based on: {scenario}"
23
  story = generator(prompt)[0]["generated_text"]
24
  return story
25
 
 
26
  def text2audio(story_text):
 
27
  tts = gTTS(text=story_text, lang="en")
28
  audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
29
  tts.save(audio_file.name)
@@ -47,22 +55,26 @@ def main():
47
  f.write(uploaded_file.getvalue())
48
 
49
  st.image(uploaded_file)
50
-
 
51
  with st.status("🖼️ Processing image..."):
52
  scenario = img2text(temp_img)
53
  st.write("Image Caption:", scenario)
54
-
 
55
  with st.status("📖 Generating story..."):
56
  story = text2story(scenario)
57
  st.subheader("Story")
58
  st.write(story)
59
-
 
60
  with st.status("🔊 Converting audio..."):
61
  audio_path = text2audio(story)
62
-
63
  if st.button("▶️ Play Audio Story"):
64
  st.audio(audio_path, format="audio/mp3")
65
-
 
66
  os.unlink(temp_img)
67
  os.unlink(audio_path)
68
 
 
1
+
2
  import streamlit as st
3
  from transformers import pipeline
4
  from gtts import gTTS
5
  import tempfile
6
  import os
7
 
8
+ # function part
9
+ # img2text
10
  def img2text(img_path):
11
+ # Image captioning model
12
  captioner = pipeline(
13
  "image-to-text",
14
+ model="nlpconnect/vit-gpt2-image-captioning" #This model is relatively fast and accurate
15
  )
16
  result = captioner(img_path)
17
  return result[0]["generated_text"]
18
 
19
+ # text2story
20
  def text2story(scenario):
21
+ # Story generator config
22
  generator = pipeline(
23
  "text-generation",
24
+ model="gpt2", #Relatively small but fast
25
+ max_length=200, # Maximum story lengt
26
+ num_return_sequences=1 #Number of variants to generate
27
  )
28
  prompt = f"Create a children's story based on: {scenario}"
29
  story = generator(prompt)[0]["generated_text"]
30
  return story
31
 
32
+ # text2audio
33
  def text2audio(story_text):
34
+ # Audio file creation
35
  tts = gTTS(text=story_text, lang="en")
36
  audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
37
  tts.save(audio_file.name)
 
55
  f.write(uploaded_file.getvalue())
56
 
57
  st.image(uploaded_file)
58
+
59
+ #Stage 1: Image to Text
60
  with st.status("🖼️ Processing image..."):
61
  scenario = img2text(temp_img)
62
  st.write("Image Caption:", scenario)
63
+
64
+ #Stage 2: Text to Story
65
  with st.status("📖 Generating story..."):
66
  story = text2story(scenario)
67
  st.subheader("Story")
68
  st.write(story)
69
+
70
+ #Stage 3: Story to Audio data
71
  with st.status("🔊 Converting audio..."):
72
  audio_path = text2audio(story)
73
+ # Play button
74
  if st.button("▶️ Play Audio Story"):
75
  st.audio(audio_path, format="audio/mp3")
76
+
77
+ # Cleanup
78
  os.unlink(temp_img)
79
  os.unlink(audio_path)
80