testtest

Sleeping

App Files Files Community

TLH01 commited on May 2, 2025

Commit

bed9467

verified ·

1 Parent(s): cf274d2

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -116

app.py CHANGED Viewed

@@ -1,145 +1,99 @@
 import streamlit as st
 from PIL import Image
-import requests
 from transformers import BlipProcessor, BlipForConditionalGeneration
 from transformers import GPT2Tokenizer, GPT2LMHeadModel
 import torch
 import io
-import soundfile as sf
-from speechbrain.pretrained import Tacotron2
-from speechbrain.pretrained import HIFIGAN
-# Stage 1: Image to Keyword/Caption
-def image_to_keyword(uploaded_image):
     try:
-        # Load model
-        processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-        model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
-        # Process image
-        raw_image = Image.open(uploaded_image).convert('RGB')
-        inputs = processor(raw_image, return_tensors="pt")
-        # Generate caption
-        out = model.generate(**inputs)
-        caption = processor.decode(out[0], skip_special_tokens=True)
-        return caption
-    except Exception as e:
-        st.error(f"Error in image captioning: {str(e)}")
-        return None
-# Stage 2: Keyword to Story
-def keyword_to_story(keyword):
     try:
-        # Load model
-        tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
-        model = GPT2LMHeadModel.from_pretrained("gpt2")
-        # Create prompt
-        prompt = f"Write a short story between 50-100 words based on: {keyword}\n\nStory:"
-        # Generate story
         inputs = tokenizer(prompt, return_tensors="pt")
         outputs = model.generate(
             inputs.input_ids,
-            max_length=200,
             num_return_sequences=1,
             no_repeat_ngram_size=2,
             early_stopping=True
         )
         story = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Clean up the story (remove prompt if it appears)
-        story = story.replace(prompt, "").strip()
-        # Ensure story length is between 50-100 words
-        words = story.split()
-        if len(words) > 100:
-            story = " ".join(words[:100])
-        elif len(words) < 50:
-            # If too short, try again with higher temperature
-            outputs = model.generate(
-                inputs.input_ids,
-                max_length=200,
-                num_return_sequences=1,
-                no_repeat_ngram_size=2,
-                do_sample=True,
-                temperature=0.9,
-                early_stopping=True
-            )
-            story = tokenizer.decode(outputs[0], skip_special_tokens=True)
-            story = story.replace(prompt, "").strip()
-        return story
-    except Exception as e:
-        st.error(f"Error in story generation: {str(e)}")
-        return None
-# Stage 3: Story to Audio
-def story_to_audio(story_text):
     try:
-        # Initialize TTS
-        tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmp_tts")
-        hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmp_vocoder")
-        # Generate mel spectrogram and waveform
-        mel_output, mel_length, alignment = tacotron2.encode_text(story_text)
-        waveforms = hifi_gan.decode_batch(mel_output)
-        # Convert to bytes
-        audio_bytes = io.BytesIO()
-        sf.write(audio_bytes, waveforms.squeeze(1).cpu().numpy(), 22050, format='WAV')
-        audio_bytes.seek(0)
-        return audio_bytes
-    except Exception as e:
-        st.error(f"Error in audio generation: {str(e)}")
-        return None
-# Main App Function
 def main():
-    st.title("Image to Story Generator")
-    st.write("Upload an image to generate a story and audio narration")
-    # File uploader
-    uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
-    if uploaded_file is not None:
-        # Display image
-        image = Image.open(uploaded_file)
-        st.image(image, caption='Uploaded Image', use_column_width=True)
-        # Stage 1: Image to Keyword
-        st.write("Generating caption from image...")
-        caption = image_to_keyword(uploaded_file)
-        if caption:
-            st.success(f"Generated Caption: {caption}")
-            # Stage 2: Keyword to Story
-            st.write("Generating story from caption...")
-            story = keyword_to_story(caption)
-            if story:
-                st.subheader("Generated Story")
-                st.write(story)
-                # Stage 3: Story to Audio
-                st.write("Converting story to audio...")
-                audio_bytes = story_to_audio(story)
-                if audio_bytes:
-                    st.audio(audio_bytes, format='audio/wav')
-                    # Download button for audio
-                    st.download_button(
-                        label="Download Audio",
-                        data=audio_bytes,
-                        file_name="generated_story.wav",
-                        mime="audio/wav"
-                    )
 if __name__ == "__main__":
     main()

+# app.py
 import streamlit as st
 from PIL import Image
 from transformers import BlipProcessor, BlipForConditionalGeneration
 from transformers import GPT2Tokenizer, GPT2LMHeadModel
 import torch
+from gtts import gTTS
 import io
+# ======================
+# Stage 1: Image Captioning
+# ======================
+def image_to_caption(uploaded_image):
+    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+    model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
     try:
+        img = Image.open(uploaded_image).convert("RGB")
+        inputs = processor(
+            images=img,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=30
+        )
+        outputs = model.generate(**inputs)
+        return processor.decode(outputs[0], skip_special_tokens=True)
+    except:
+        return "a happy scene with children"  # Fallback caption
+# ======================
+# Stage 2: Story Generation
+# ======================
+def generate_story(caption):
+    tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
+    model = GPT2LMHeadModel.from_pretrained("gpt2")
+    prompt = f"""Create a children's story (3-6 years old) about {caption} with:
+1. Friendly animals
+2. Happy ending
+3. 50-100 words
+Story:"""
     try:
         inputs = tokenizer(prompt, return_tensors="pt")
         outputs = model.generate(
             inputs.input_ids,
+            max_length=300,
             num_return_sequences=1,
             no_repeat_ngram_size=2,
             early_stopping=True
         )
         story = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        return story.replace(prompt, "").strip()[:500]  # Length control
+    except:
+        return """Once upon a time, there was a friendly bear who loved playing with children.
+They had wonderful adventures every day, always ending with big hugs and happy smiles!"""
+# ======================
+# Stage 3: Text-to-Speech
+# ======================
+def create_audio(story_text):
     try:
+        tts = gTTS(text=story_text[:500], lang='en', slow=False)
+        audio_buffer = io.BytesIO()
+        tts.write_to_fp(audio_buffer)
+        audio_buffer.seek(0)
+        return audio_buffer
+    except:
+        return None  # Silent fallback
+# ======================
+# Main Application
+# ======================
 def main():
+    st.title("🎈 Children's Story Maker")
+    uploaded_file = st.file_uploader("Upload a child's photo", type=["jpg", "png"])
+    if uploaded_file:
+        img = Image.open(uploaded_file)
+        st.image(img, use_column_width=True)
+        # Processing pipeline
+        caption = image_to_caption(uploaded_file)
+        story = generate_story(caption)
+        st.subheader("Generated Story")
+        st.write(story)
+        if audio_data := create_audio(story):
+            st.audio(audio_data, format="audio/mp3")
+            st.download_button("Download Audio",
+                             data=audio_data,
+                             file_name="story.mp3",
+                             mime="audio/mp3")
 if __name__ == "__main__":
     main()