Spaces:

TLH01
/

Individualssignment

Build error

App Files Files Community

TLH01 commited on May 1, 2025

Commit

a7bd32c

verified ·

1 Parent(s): 5e5ea3c

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -46

app.py CHANGED Viewed

@@ -2,16 +2,15 @@ import streamlit as st
 from transformers import pipeline
 from PIL import Image
 import tempfile
-import numpy as np
 import torch
-import soundfile as sf
 # ======================
 # Stage 1: Image Captioning
 # ======================
 @st.cache_resource
 def load_image_captioner():
-    """Load BLIP model for image caption generation"""
     return pipeline(
         "image-to-text",
         model="Salesforce/blip-image-captioning-base",
@@ -19,7 +18,6 @@ def load_image_captioner():
     )
 def generate_caption(_pipeline, image):
-    """Generate English description from image"""
     try:
         result = _pipeline(image, max_new_tokens=50)
         return result[0]['generated_text']
@@ -32,89 +30,76 @@ def generate_caption(_pipeline, image):
 # ======================
 @st.cache_resource
 def load_story_generator():
-    """Load fine-tuned story generator"""
     return pipeline(
         "text-generation",
-        model="pranavpsv/gpt2-genre-story-generator",
         device="cuda" if torch.cuda.is_available() else "cpu"
     )
-def generate_story(_pipeline, keywords):
-    """Generate children's story based on keywords"""
-    prompt = f"""Generate a children's story (60-80 words) about: {keywords}
-    Requirements:
-    - Use simple English
-    - Include magical elements
-    - Happy ending
-    Story:"""
     try:
-        story = _pipeline(
-            prompt,
-            max_length=200,
-            temperature=0.7
-        )[0]['generated_text']
         return story.replace(prompt, "").strip()
     except Exception as e:
         st.error(f"Story generation failed: {str(e)}")
         return None
 # ======================
-# Stage 3: Text-to-Speech
 # ======================
 @st.cache_resource
 def load_tts():
-    """Load TTS model for audio generation"""
-    return pipeline(
-        "text-to-speech",
-        model="facebook/mms-tts-eng",
-        device="cuda" if torch.cuda.is_available() else "cpu"
-    )
-def text_to_speech(_pipeline, text):
-    """Convert text to speech audio"""
     try:
-        audio = _pipeline(text)
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
-            sf.write(f.name, audio["audio"], audio["sampling_rate"])
             return f.name
     except Exception as e:
         st.error(f"Audio generation failed: {str(e)}")
         return None
-# Main App
 def main():
     st.set_page_config(page_title="Magic Story Generator", layout="wide")
     st.title("🧚 Magic Story Generator")
-    uploaded_image = st.file_uploader("Upload a photo", type=["jpg", "png"])
     if not uploaded_image:
         return
     image = Image.open(uploaded_image)
-    st.image(image, use_container_width=True)  # Fixed deprecated parameter
-    # Process stages
-    with st.spinner("Processing..."):
         caption_pipe = load_image_captioner()
         story_pipe = load_story_generator()
-        tts_pipe = load_tts()
-        # Stage 1
         caption = generate_caption(caption_pipe, image)
         if caption:
             st.success(f"Image description: {caption}")
-            # Stage 2
             story = generate_story(story_pipe, caption)
             if story:
-                st.subheader("Your Story")
-                st.markdown(f'<div class="story-box">{story}</div>', unsafe_allow_html=True)
-                # Stage 3
-                audio_path = text_to_speech(tts_pipe, story)
                 if audio_path:
                     st.audio(audio_path, format="audio/wav")
 if __name__ == "__main__":
-    main()

 from transformers import pipeline
 from PIL import Image
 import tempfile
 import torch
+from TTS.api import TTS  # Coqui TTS
+import os
 # ======================
 # Stage 1: Image Captioning
 # ======================
 @st.cache_resource
 def load_image_captioner():
     return pipeline(
         "image-to-text",
         model="Salesforce/blip-image-captioning-base",
     )
 def generate_caption(_pipeline, image):
     try:
         result = _pipeline(image, max_new_tokens=50)
         return result[0]['generated_text']
 # ======================
 @st.cache_resource
 def load_story_generator():
     return pipeline(
         "text-generation",
+        model="pranavpsv/gpt2-genre-story-generator",  # 可以替换为更强模型
         device="cuda" if torch.cuda.is_available() else "cpu"
     )
+def generate_story(_pipeline, caption):
+    prompt = f"""You are a children's storyteller. Based on the following image description: "{caption}", write a short children's story (80 words max).
+The story should:
+- Use simple and friendly language
+- Be related to the content of the image
+- Include a magical or fun twist
+- End happily
+Story:"""
     try:
+        story = _pipeline(prompt, max_length=200, temperature=0.7)[0]['generated_text']
         return story.replace(prompt, "").strip()
     except Exception as e:
         st.error(f"Story generation failed: {str(e)}")
         return None
 # ======================
+# Stage 3: Text-to-Speech using Coqui TTS
 # ======================
 @st.cache_resource
 def load_tts():
+    return TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=torch.cuda.is_available())
+def text_to_speech(tts_model, story_text):
     try:
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
+            tts_model.tts_to_file(text=story_text, file_path=f.name)
             return f.name
     except Exception as e:
         st.error(f"Audio generation failed: {str(e)}")
         return None
+# ======================
+# Main Streamlit App
+# ======================
 def main():
     st.set_page_config(page_title="Magic Story Generator", layout="wide")
     st.title("🧚 Magic Story Generator")
+    uploaded_image = st.file_uploader("Upload a photo", type=["jpg", "jpeg", "png"])
     if not uploaded_image:
         return
     image = Image.open(uploaded_image)
+    st.image(image, use_container_width=True)
+    with st.spinner("Processing your magical story..."):
         caption_pipe = load_image_captioner()
         story_pipe = load_story_generator()
+        tts_model = load_tts()
         caption = generate_caption(caption_pipe, image)
         if caption:
             st.success(f"Image description: {caption}")
             story = generate_story(story_pipe, caption)
             if story:
+                st.subheader("Your Magical Story")
+                st.markdown(story)
+                audio_path = text_to_speech(tts_model, story)
                 if audio_path:
                     st.audio(audio_path, format="audio/wav")
 if __name__ == "__main__":
+    main()