testtest

Sleeping

App Files Files Community

TLH01 commited on May 2, 2025

Commit

6a5a7a4

verified ·

1 Parent(s): 1394a8a

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -46

app.py CHANGED Viewed

@@ -5,69 +5,48 @@ from transformers import GPT2Tokenizer, GPT2LMHeadModel
 from gtts import gTTS
 import io
-# Model loading with cache
 @st.cache_resource
 def load_models():
-    img_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-    img_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
-    text_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
-    text_model = GPT2LMHeadModel.from_pretrained("gpt2")
-    return img_processor, img_model, text_tokenizer, text_model
-def process_image(uploaded_file, processor, model):
-    img = Image.open(uploaded_file).convert('RGB')
-    inputs = processor(images=img, return_tensors="pt", padding=True)
-    outputs = model.generate(**inputs)
-    return processor.decode(outputs[0], skip_special_tokens=True)
-def generate_story(caption, tokenizer, model):
-    prompt = f"Create a children's story about {caption} with animals:"
-    inputs = tokenizer(prompt, return_tensors="pt", max_length=100, truncation=True)
-    outputs = model.generate(
-        inputs.input_ids,
-        max_length=300,
-        num_return_sequences=1,
-        temperature=0.7
     )
-    return tokenizer.decode(outputs[0], skip_special_tokens=True).replace(prompt, "")
-def text_to_speech(text):
-    audio_buffer = io.BytesIO()
-    tts = gTTS(text=text[:300], lang='en')
-    tts.write_to_fp(audio_buffer)
-    audio_buffer.seek(0)
-    return audio_buffer
 def main():
-    st.title("Children's Story Maker")
     img_processor, img_model, text_tokenizer, text_model = load_models()
-    uploaded_file = st.file_uploader("Upload photo (JPG/PNG)", type=["jpg", "png", "jpeg"])
     if uploaded_file:
         st.image(uploaded_file, use_container_width=True)
-        with st.status("Processing Pipeline", expanded=True):
-            # Stage 1: Image Analysis
-            st.write("🖼️ Analyzing image...")
-            caption = process_image(uploaded_file, img_processor, img_model)
-            # Stage 2: Story Generation
-            st.write("📖 Creating story...")
-            story = generate_story(caption, text_tokenizer, text_model)
-            # Stage 3: Audio Conversion
-            st.write("🔊 Generating audio...")
-            audio = text_to_speech(story)
-        st.subheader("Results")
         st.write(f"**Caption:** {caption}")
         st.write(f"**Story:** {story}")
         st.audio(audio, format="audio/mp3")
-        # Download buttons
-        st.download_button("Download Story", story, "story.txt")
-        st.download_button("Download Audio", audio.getvalue(), "story.mp3")
 if __name__ == "__main__":
     main()

 from gtts import gTTS
 import io
 @st.cache_resource
 def load_models():
+    return (
+        BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base"),
+        BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base"),
+        GPT2Tokenizer.from_pretrained("gpt2"),
+        GPT2LMHeadModel.from_pretrained("gpt2")
     )
 def main():
+    st.title("Stable Story Maker")
     img_processor, img_model, text_tokenizer, text_model = load_models()
+    uploaded_file = st.file_uploader("Upload Image", type=["jpg", "png"])
     if uploaded_file:
         st.image(uploaded_file, use_container_width=True)
+        with st.status("Processing"):
+            # Stage 1
+            img = Image.open(uploaded_file).convert("RGB")
+            inputs = img_processor(images=img, return_tensors="pt")
+            caption = img_processor.decode(img_model.generate(**inputs)[0], skip_special_tokens=True)
+            # Stage 2
+            prompt = f"Children's story about {caption}:"
+            inputs = text_tokenizer(prompt, return_tensors="pt")
+            story = text_tokenizer.decode(
+                text_model.generate(inputs.input_ids, max_length=200)[0],
+                skip_special_tokens=True
+            ).replace(prompt, "")
+            # Stage 3
+            tts = gTTS(text=story[:250], lang='en')
+            audio = io.BytesIO()
+            tts.write_to_fp(audio)
+            audio.seek(0)
         st.write(f"**Caption:** {caption}")
         st.write(f"**Story:** {story}")
         st.audio(audio, format="audio/mp3")
 if __name__ == "__main__":
     main()