testtest

Sleeping

App Files Files Community

TLH01 commited on May 2, 2025

Commit

870428e

verified ·

1 Parent(s): bed9467

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -73

app.py CHANGED Viewed

@@ -7,93 +7,86 @@ import torch
 from gtts import gTTS
 import io
-# ======================
-# Stage 1: Image Captioning
-# ======================
-def image_to_caption(uploaded_image):
-    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-    model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
-    try:
-        img = Image.open(uploaded_image).convert("RGB")
-        inputs = processor(
-            images=img,
-            return_tensors="pt",
-            padding=True,
-            truncation=True,
-            max_length=30
-        )
-        outputs = model.generate(**inputs)
-        return processor.decode(outputs[0], skip_special_tokens=True)
-    except:
-        return "a happy scene with children"  # Fallback caption
-# ======================
-# Stage 2: Story Generation
-# ======================
-def generate_story(caption):
-    tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
-    model = GPT2LMHeadModel.from_pretrained("gpt2")
-    prompt = f"""Create a children's story (3-6 years old) about {caption} with:
-1. Friendly animals
 2. Happy ending
 3. 50-100 words
 Story:"""
-    try:
-        inputs = tokenizer(prompt, return_tensors="pt")
-        outputs = model.generate(
-            inputs.input_ids,
-            max_length=300,
-            num_return_sequences=1,
-            no_repeat_ngram_size=2,
-            early_stopping=True
-        )
-        story = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        return story.replace(prompt, "").strip()[:500]  # Length control
-    except:
-        return """Once upon a time, there was a friendly bear who loved playing with children.
-They had wonderful adventures every day, always ending with big hugs and happy smiles!"""
-# ======================
-# Stage 3: Text-to-Speech
-# ======================
-def create_audio(story_text):
-    try:
-        tts = gTTS(text=story_text[:500], lang='en', slow=False)
-        audio_buffer = io.BytesIO()
-        tts.write_to_fp(audio_buffer)
-        audio_buffer.seek(0)
-        return audio_buffer
-    except:
-        return None  # Silent fallback
-# ======================
-# Main Application
-# ======================
 def main():
-    st.title("🎈 Children's Story Maker")
-    uploaded_file = st.file_uploader("Upload a child's photo", type=["jpg", "png"])
     if uploaded_file:
-        img = Image.open(uploaded_file)
-        st.image(img, use_column_width=True)
         # Processing pipeline
-        caption = image_to_caption(uploaded_file)
-        story = generate_story(caption)
-        st.subheader("Generated Story")
-        st.write(story)
-        if audio_data := create_audio(story):
-            st.audio(audio_data, format="audio/mp3")
-            st.download_button("Download Audio",
-                             data=audio_data,
-                             file_name="story.mp3",
-                             mime="audio/mp3")
 if __name__ == "__main__":
     main()

 from gtts import gTTS
 import io
+# Pre-load models during app initialization
+@st.cache_resource
+def load_models():
+    # Image captioning model
+    img_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+    img_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
+    # Story generation model
+    text_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
+    text_model = GPT2LMHeadModel.from_pretrained("gpt2")
+    return img_processor, img_model, text_tokenizer, text_model
+def generate_caption(uploaded_image, processor, model):
+    img = Image.open(uploaded_image).convert("RGB")
+    inputs = processor(
+        images=img,
+        return_tensors="pt",
+        padding=True,
+        truncation=True
+    )
+    outputs = model.generate(**inputs)
+    return processor.decode(outputs[0], skip_special_tokens=True)
+def create_story(caption, tokenizer, model):
+    prompt = f"""Create a children's story about {caption} with:
+1. Friendly characters
 2. Happy ending
 3. 50-100 words
 Story:"""
+    inputs = tokenizer(prompt, return_tensors="pt")
+    outputs = model.generate(
+        inputs.input_ids,
+        max_length=300,
+        num_return_sequences=1,
+        no_repeat_ngram_size=2,
+        early_stopping=True
+    )
+    return tokenizer.decode(outputs[0], skip_special_tokens=True).replace(prompt, "").strip()
+def text_to_audio(text):
+    audio_buffer = io.BytesIO()
+    tts = gTTS(text=text, lang='en')
+    tts.write_to_fp(audio_buffer)
+    audio_buffer.seek(0)
+    return audio_buffer
 def main():
+    st.title("Children's Story Generator")
+    # Load models once at startup
+    img_processor, img_model, text_tokenizer, text_model = load_models()
+    uploaded_file = st.file_uploader("Upload a photo", type=["jpg", "png", "jpeg"])
     if uploaded_file:
+        # Display image with corrected parameter
+        st.image(uploaded_file, use_container_width=True)
         # Processing pipeline
+        with st.spinner("Analyzing image..."):
+            caption = generate_caption(uploaded_file, img_processor, img_model)
+            st.subheader("Image Analysis")
+            st.write(f"Detected scene: {caption}")
+        with st.spinner("Writing story..."):
+            story = create_story(caption, text_tokenizer, text_model)
+            st.subheader("Generated Story")
+            st.write(story)
+        with st.spinner("Creating audio..."):
+            audio = text_to_audio(story)
+            st.audio(audio, format="audio/mp3")
+            st.download_button(
+                "Download Audio",
+                data=audio,
+                file_name="story.mp3",
+                mime="audio/mp3"
+            )
 if __name__ == "__main__":
     main()