Spaces:

Bondya
/

testasd1

Build error

App Files Files Community

Bondya commited on Mar 9, 2025

Commit

38da107

verified ·

1 Parent(s): bc98b06

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -9

app.py CHANGED Viewed

@@ -1,29 +1,37 @@
 import streamlit as st
 from transformers import pipeline
 from gtts import gTTS
 import tempfile
 import os
 def img2text(img_path):
     captioner = pipeline(
         "image-to-text",
-        model="nlpconnect/vit-gpt2-image-captioning"  # 修正模型名称
     )
     result = captioner(img_path)
     return result[0]["generated_text"]
 def text2story(scenario):
     generator = pipeline(
         "text-generation",
-        model="gpt2",  # 修正模型名称
-        max_length=200,
-        num_return_sequences=1
     )
     prompt = f"Create a children's story based on: {scenario}"
     story = generator(prompt)[0]["generated_text"]
     return story
 def text2audio(story_text):
     tts = gTTS(text=story_text, lang="en")
     audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
     tts.save(audio_file.name)
@@ -47,22 +55,26 @@ def main():
             f.write(uploaded_file.getvalue())
         st.image(uploaded_file)
         with st.status("🖼️ Processing image..."):
             scenario = img2text(temp_img)
             st.write("Image Caption:", scenario)
         with st.status("📖 Generating story..."):
             story = text2story(scenario)
             st.subheader("Story")
             st.write(story)
         with st.status("🔊 Converting audio..."):
             audio_path = text2audio(story)
         if st.button("▶️ Play Audio Story"):
             st.audio(audio_path, format="audio/mp3")
         os.unlink(temp_img)
         os.unlink(audio_path)

 import streamlit as st
 from transformers import pipeline
 from gtts import gTTS
 import tempfile
 import os
+# function part
+# img2text
 def img2text(img_path):
+    # Image captioning model
     captioner = pipeline(
         "image-to-text",
+        model="nlpconnect/vit-gpt2-image-captioning"  #This model is relatively fast and accurate
     )
     result = captioner(img_path)
     return result[0]["generated_text"]
+# text2story
 def text2story(scenario):
+    # Story generator config
     generator = pipeline(
         "text-generation",
+        model="gpt2",  #Relatively small but fast
+        max_length=200, # Maximum story lengt
+        num_return_sequences=1 #Number of variants to generate
     )
     prompt = f"Create a children's story based on: {scenario}"
     story = generator(prompt)[0]["generated_text"]
     return story
+# text2audio
 def text2audio(story_text):
+    # Audio file creation
     tts = gTTS(text=story_text, lang="en")
     audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
     tts.save(audio_file.name)
             f.write(uploaded_file.getvalue())
         st.image(uploaded_file)
+        #Stage 1: Image to Text
         with st.status("🖼️ Processing image..."):
             scenario = img2text(temp_img)
             st.write("Image Caption:", scenario)
+        #Stage 2: Text to Story
         with st.status("📖 Generating story..."):
             story = text2story(scenario)
             st.subheader("Story")
             st.write(story)
+        #Stage 3: Story to Audio data
         with st.status("🔊 Converting audio..."):
             audio_path = text2audio(story)
+        # Play button
         if st.button("▶️ Play Audio Story"):
             st.audio(audio_path, format="audio/mp3")
+        # Cleanup
         os.unlink(temp_img)
         os.unlink(audio_path)