testtest

Sleeping

App Files Files Community

TLH01 commited on May 2, 2025

Commit

1394a8a

verified ·

1 Parent(s): 796dba0

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -51

app.py CHANGED Viewed

@@ -1,92 +1,73 @@
-# app.py
 import streamlit as st
 from PIL import Image
 from transformers import BlipProcessor, BlipForConditionalGeneration
 from transformers import GPT2Tokenizer, GPT2LMHeadModel
-import torch
 from gtts import gTTS
 import io
-# Pre-load models during app initialization
 @st.cache_resource
 def load_models():
-    # Image captioning model
     img_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
     img_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
-    # Story generation model
     text_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
     text_model = GPT2LMHeadModel.from_pretrained("gpt2")
     return img_processor, img_model, text_tokenizer, text_model
-def generate_caption(uploaded_image, processor, model):
-    img = Image.open(uploaded_image).convert("RGB")
-    inputs = processor(
-        images=img,
-        return_tensors="pt",
-        padding=True,
-        truncation=True
-    )
     outputs = model.generate(**inputs)
     return processor.decode(outputs[0], skip_special_tokens=True)
-def create_story(caption, tokenizer, model):
-    prompt = f"""Create a children's story about {caption} with:
-1. Friendly characters
-2. Happy ending
-3. 50-100 words
-Story:"""
-    inputs = tokenizer(prompt, return_tensors="pt")
     outputs = model.generate(
         inputs.input_ids,
         max_length=300,
         num_return_sequences=1,
-        no_repeat_ngram_size=2,
-        early_stopping=True
     )
-    return tokenizer.decode(outputs[0], skip_special_tokens=True).replace(prompt, "").strip()
-def text_to_audio(text):
     audio_buffer = io.BytesIO()
-    tts = gTTS(text=text, lang='en')
     tts.write_to_fp(audio_buffer)
     audio_buffer.seek(0)
     return audio_buffer
 def main():
-    st.title("Children's Story Generator")
-    # Load models once at startup
     img_processor, img_model, text_tokenizer, text_model = load_models()
-    uploaded_file = st.file_uploader("Upload a photo", type=["jpg", "png", "jpeg"])
     if uploaded_file:
-        # Display image with corrected parameter
         st.image(uploaded_file, use_container_width=True)
-        # Processing pipeline
-        with st.spinner("Analyzing image..."):
-            caption = generate_caption(uploaded_file, img_processor, img_model)
-            st.subheader("Image Analysis")
-            st.write(f"Detected scene: {caption}")
-        with st.spinner("Writing story..."):
-            story = create_story(caption, text_tokenizer, text_model)
-            st.subheader("Generated Story")
-            st.write(story)
-        with st.spinner("Creating audio..."):
-            audio = text_to_audio(story)
-            st.audio(audio, format="audio/mp3")
-            st.download_button(
-                "Download Audio",
-                data=audio,
-                file_name="story.mp3",
-                mime="audio/mp3"
-            )
 if __name__ == "__main__":
     main()

 import streamlit as st
 from PIL import Image
 from transformers import BlipProcessor, BlipForConditionalGeneration
 from transformers import GPT2Tokenizer, GPT2LMHeadModel
 from gtts import gTTS
 import io
+# Model loading with cache
 @st.cache_resource
 def load_models():
     img_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
     img_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
     text_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
     text_model = GPT2LMHeadModel.from_pretrained("gpt2")
     return img_processor, img_model, text_tokenizer, text_model
+def process_image(uploaded_file, processor, model):
+    img = Image.open(uploaded_file).convert('RGB')
+    inputs = processor(images=img, return_tensors="pt", padding=True)
     outputs = model.generate(**inputs)
     return processor.decode(outputs[0], skip_special_tokens=True)
+def generate_story(caption, tokenizer, model):
+    prompt = f"Create a children's story about {caption} with animals:"
+    inputs = tokenizer(prompt, return_tensors="pt", max_length=100, truncation=True)
     outputs = model.generate(
         inputs.input_ids,
         max_length=300,
         num_return_sequences=1,
+        temperature=0.7
     )
+    return tokenizer.decode(outputs[0], skip_special_tokens=True).replace(prompt, "")
+def text_to_speech(text):
     audio_buffer = io.BytesIO()
+    tts = gTTS(text=text[:300], lang='en')
     tts.write_to_fp(audio_buffer)
     audio_buffer.seek(0)
     return audio_buffer
 def main():
+    st.title("Children's Story Maker")
     img_processor, img_model, text_tokenizer, text_model = load_models()
+    uploaded_file = st.file_uploader("Upload photo (JPG/PNG)", type=["jpg", "png", "jpeg"])
     if uploaded_file:
         st.image(uploaded_file, use_container_width=True)
+        with st.status("Processing Pipeline", expanded=True):
+            # Stage 1: Image Analysis
+            st.write("🖼️ Analyzing image...")
+            caption = process_image(uploaded_file, img_processor, img_model)
+            # Stage 2: Story Generation
+            st.write("📖 Creating story...")
+            story = generate_story(caption, text_tokenizer, text_model)
+            # Stage 3: Audio Conversion
+            st.write("🔊 Generating audio...")
+            audio = text_to_speech(story)
+        st.subheader("Results")
+        st.write(f"**Caption:** {caption}")
+        st.write(f"**Story:** {story}")
+        st.audio(audio, format="audio/mp3")
+        # Download buttons
+        st.download_button("Download Story", story, "story.txt")
+        st.download_button("Download Audio", audio.getvalue(), "story.mp3")
 if __name__ == "__main__":
     main()