Bondya commited on
Commit
2d5370d
·
verified ·
1 Parent(s): 7ab5226

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -21
app.py CHANGED
@@ -1,28 +1,36 @@
1
 
2
  from transformers import pipeline
3
  import streamlit as st
4
- #from gtts import gTTS
 
 
5
 
6
  # function part
7
  # img2text
8
- def img2text(img):
9
- image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
10
- text = image_to_text_model(img)[0]["generated_text"]
11
- return text
 
 
 
12
 
13
  # text2story
14
  def text2story(scenario):
15
- generator = pipeline("text-generation", model="deepseek-ai/DeepSeek-V3")
16
- story = generator(
17
- f"Create a children's story based on: {scenario}",
18
- max_length=150,
19
- num_return_sequences=1
 
20
  )
21
- return story[0]["generated_text"]
 
 
22
 
23
  # text2audio
24
  def text2audio(story_text):
25
- tts = gTTS(text=story_text, lang="en", slow=False)
26
  audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
27
  tts.save(audio_file.name)
28
  return audio_file.name
@@ -34,18 +42,17 @@ st.set_page_config(page_title="Your Image to Audio Story",
34
  st.header("Turn Your Image to Audio Story")
35
  uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "png", "jpeg"])
36
 
37
- if uploaded_file is not None:
38
- print(uploaded_file)
39
- bytes_data = uploaded_file.getvalue()
40
- with open(uploaded_file.name, "wb") as file:
41
- file.write(bytes_data)
42
- st.image(uploaded_file, caption="Uploaded Image",
43
- use_column_width=True)
44
 
45
  #Stage 1: Image to Text
46
  st.text('Processing img2text...')
47
- scenario = img2text(uploaded_file.name)
48
- st.write(scenario)
49
 
50
  #Stage 2: Text to Story
51
  st.text('Generating a story...')
@@ -58,6 +65,9 @@ if uploaded_file is not None:
58
  audio_data =text2audio(story)
59
  st.subheader("Audio Story")
60
  st.audio(audio_path, format="audio/mp3")
 
 
 
61
  # Play button
62
  if st.button("Play Audio"):
63
  st.audio(audio_data['audio'],
 
1
 
2
  from transformers import pipeline
3
  import streamlit as st
4
+ from gtts import gTTS
5
+ import tempfile
6
+ import os
7
 
8
  # function part
9
  # img2text
10
+ def img2text(img_path):
11
+ captioner = pipeline(
12
+ "image-to-text",
13
+ model="Salesforce/blip-image-captioning-base"
14
+ )
15
+ result = captioner(img_path)
16
+ return result[0]["generated_text"]
17
 
18
  # text2story
19
  def text2story(scenario):
20
+ generator = pipeline(
21
+ "text-generation",
22
+ model="gpt2-medium",
23
+ trust_remote_code=True,
24
+ max_length=150,
25
+ num_return_sequences=1
26
  )
27
+ prompt = f"Create a children's story based on: {scenario}"
28
+ story = generator(prompt)[0]["generated_text"]
29
+ return story
30
 
31
  # text2audio
32
  def text2audio(story_text):
33
+ tts = gTTS(text=story_text, lang="en")
34
  audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
35
  tts.save(audio_file.name)
36
  return audio_file.name
 
42
  st.header("Turn Your Image to Audio Story")
43
  uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "png", "jpeg"])
44
 
45
+ if uploaded_file:
46
+ temp_img = os.path.join(tempfile.gettempdir(), uploaded_file.name)
47
+ with open(temp_img, "wb") as f:
48
+ f.write(uploaded_file.getvalue())
49
+
50
+ st.image(uploaded_file)
 
51
 
52
  #Stage 1: Image to Text
53
  st.text('Processing img2text...')
54
+ scenario = img2text(temp_img)
55
+ st.write("Image Caption:", scenario)
56
 
57
  #Stage 2: Text to Story
58
  st.text('Generating a story...')
 
65
  audio_data =text2audio(story)
66
  st.subheader("Audio Story")
67
  st.audio(audio_path, format="audio/mp3")
68
+
69
+ os.unlink(temp_img)
70
+ os.unlink(audio_path)
71
  # Play button
72
  if st.button("Play Audio"):
73
  st.audio(audio_data['audio'],