Spaces:

TLH01
/

Individualssignment

Build error

App Files Files Community

TLH01 commited on May 1, 2025

Commit

5e5ea3c

verified ·

1 Parent(s): 0311f5b

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -74

app.py CHANGED Viewed

@@ -1,39 +1,38 @@
-"""
-儿童故事生成器 (Children's Story Generator)
-功能：上传图片 → 生成描述 → 创作故事 → 语音朗读
-"""
-# ============ 导入模块 ============
 import streamlit as st
 from PIL import Image
 import tempfile
-from transformers import pipeline
 import torch
-import os
-# ============ 第一阶段：图片描述生成 ============
-@st.cache_resource  # 缓存模型避免重复加载
 def load_image_captioner():
-    """加载图片描述模型（BLIP模型）"""
     return pipeline(
         "image-to-text",
         model="Salesforce/blip-image-captioning-base",
-        device="cuda" if torch.cuda.is_available() else "cpu"  # 自动检测GPU
     )
 def generate_caption(_pipeline, image):
-    """生成图片英文描述"""
     try:
-        result = _pipeline(image, max_new_tokens=50)  # 限制生成长度
         return result[0]['generated_text']
     except Exception as e:
-        st.error(f"生成描述失败: {str(e)}")
         return None
-# ============ 第二阶段：故事创作 ============
 @st.cache_resource
 def load_story_generator():
-    """加载儿童故事生成模型"""
     return pipeline(
         "text-generation",
         model="pranavpsv/gpt2-genre-story-generator",
@@ -41,10 +40,10 @@ def load_story_generator():
     )
 def generate_story(_pipeline, keywords):
-    """根据关键词生成儿童故事"""
-    prompt = f"""Generate a children's story (60-80 words) in English about: {keywords}
     Requirements:
-    - Use simple words
     - Include magical elements
     - Happy ending
     Story:"""
@@ -53,17 +52,19 @@ def generate_story(_pipeline, keywords):
         story = _pipeline(
             prompt,
             max_length=200,
-            temperature=0.7  # 控制创意程度
         )[0]['generated_text']
         return story.replace(prompt, "").strip()
     except Exception as e:
-        st.error(f"生成故事失败: {str(e)}")
         return None
-# ============ 第三阶段：语音合成 ============
 @st.cache_resource
 def load_tts():
-    """加载文本转语音模型"""
     return pipeline(
         "text-to-speech",
         model="facebook/mms-tts-eng",
@@ -71,74 +72,49 @@ def load_tts():
     )
 def text_to_speech(_pipeline, text):
-    """将文本转为语音"""
     try:
         audio = _pipeline(text)
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
-            import soundfile as sf
-            sf.write(f.name, audio["audio"].squeeze().numpy(), audio["sampling_rate"])
             return f.name
     except Exception as e:
-        st.error(f"语音生成失败: {str(e)}")
         return None
-# ============ 主界面 ============
 def main():
-    # 界面设置
-    st.set_page_config(
-        page_title="魔法故事生成器",
-        page_icon="🧚",
-        layout="wide"
-    )
-    # 儿童风格CSS
-    st.markdown("""
-    <style>
-    .main { background-color: #FFF5E6 }
-    h1 { color: #FF6B6B; font-family: 'Comic Sans MS' }
-    .stButton>button { background-color: #4CAF50; border-radius: 20px }
-    </style>
-    """, unsafe_allow_html=True)
-    st.title("🧚 魔法故事生成器")
-    st.write("上传小朋友的照片，AI会生成专属故事并朗读！")
-    # 图片上传
-    uploaded_file = st.file_uploader("选择照片", type=["jpg", "png"])
-    if not uploaded_file:
-        st.info("请先上传照片")
         return
-    image = Image.open(uploaded_file)
-    st.image(image, use_column_width=True)
-    # 加载模型
-    with st.spinner("正在准备魔法..."):
         caption_pipe = load_image_captioner()
         story_pipe = load_story_generator()
         tts_pipe = load_tts()
-    # 第一阶段
-    with st.spinner("正在分析图片..."):
         caption = generate_caption(caption_pipe, image)
         if caption:
-            st.success(f"图片描述: {caption}")
-    # 第二阶段
-    if caption:
-        with st.spinner("正在创作故事..."):
             story = generate_story(story_pipe, caption)
             if story:
-                st.subheader("你的故事")
-                st.markdown(f'<div style="background-color:#FFF0F5; padding:20px; border-radius:15px">{story}</div>', unsafe_allow_html=True)
-                # 第三阶段
-                with st.spinner("正在生成语音..."):
-                    audio_path = text_to_speech(tts_pipe, story)
-                    if audio_path:
-                        st.audio(audio_path, format="audio/wav")
 if __name__ == "__main__":
-    os.environ["HF_HUB_CACHE"] = "/tmp/huggingface"  # 设置缓存路径
     main()

 import streamlit as st
+from transformers import pipeline
 from PIL import Image
 import tempfile
+import numpy as np
 import torch
+import soundfile as sf
+# ======================
+# Stage 1: Image Captioning
+# ======================
+@st.cache_resource
 def load_image_captioner():
+    """Load BLIP model for image caption generation"""
     return pipeline(
         "image-to-text",
         model="Salesforce/blip-image-captioning-base",
+        device="cuda" if torch.cuda.is_available() else "cpu"
     )
 def generate_caption(_pipeline, image):
+    """Generate English description from image"""
     try:
+        result = _pipeline(image, max_new_tokens=50)
         return result[0]['generated_text']
     except Exception as e:
+        st.error(f"Caption generation failed: {str(e)}")
         return None
+# ======================
+# Stage 2: Story Generation
+# ======================
 @st.cache_resource
 def load_story_generator():
+    """Load fine-tuned story generator"""
     return pipeline(
         "text-generation",
         model="pranavpsv/gpt2-genre-story-generator",
     )
 def generate_story(_pipeline, keywords):
+    """Generate children's story based on keywords"""
+    prompt = f"""Generate a children's story (60-80 words) about: {keywords}
     Requirements:
+    - Use simple English
     - Include magical elements
     - Happy ending
     Story:"""
         story = _pipeline(
             prompt,
             max_length=200,
+            temperature=0.7
         )[0]['generated_text']
         return story.replace(prompt, "").strip()
     except Exception as e:
+        st.error(f"Story generation failed: {str(e)}")
         return None
+# ======================
+# Stage 3: Text-to-Speech
+# ======================
 @st.cache_resource
 def load_tts():
+    """Load TTS model for audio generation"""
     return pipeline(
         "text-to-speech",
         model="facebook/mms-tts-eng",
     )
 def text_to_speech(_pipeline, text):
+    """Convert text to speech audio"""
     try:
         audio = _pipeline(text)
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
+            sf.write(f.name, audio["audio"], audio["sampling_rate"])
             return f.name
     except Exception as e:
+        st.error(f"Audio generation failed: {str(e)}")
         return None
+# Main App
 def main():
+    st.set_page_config(page_title="Magic Story Generator", layout="wide")
+    st.title("🧚 Magic Story Generator")
+    uploaded_image = st.file_uploader("Upload a photo", type=["jpg", "png"])
+    if not uploaded_image:
         return
+    image = Image.open(uploaded_image)
+    st.image(image, use_container_width=True)  # Fixed deprecated parameter
+    # Process stages
+    with st.spinner("Processing..."):
         caption_pipe = load_image_captioner()
         story_pipe = load_story_generator()
         tts_pipe = load_tts()
+        # Stage 1
         caption = generate_caption(caption_pipe, image)
         if caption:
+            st.success(f"Image description: {caption}")
+            # Stage 2
             story = generate_story(story_pipe, caption)
             if story:
+                st.subheader("Your Story")
+                st.markdown(f'<div class="story-box">{story}</div>', unsafe_allow_html=True)
+                # Stage 3
+                audio_path = text_to_speech(tts_pipe, story)
+                if audio_path:
+                    st.audio(audio_path, format="audio/wav")
 if __name__ == "__main__":
     main()