Spaces:

CR7CAD
/

Assignment1

Sleeping

App Files Files Community

CR7CAD commited on Mar 9

Commit

15c1038

verified ·

1 Parent(s): fc13d66

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -16

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import torch
 import os
 import tempfile
 import time
 # Use Streamlit's caching mechanisms to optimize model loading
 @st.cache_resource
@@ -79,11 +80,25 @@ def text2audio(story_text):
     # If we got here, no TTS method worked
     raise Exception("No text-to-speech capability available")
 # Simple image-to-text function using cached model
 @st.cache_data
-def img2text(image):
-    """Convert image to text with caching"""
-    result = img2text_model(image)
     return result[0]["generated_text"]
 # Helper function to count words
@@ -185,14 +200,8 @@ def text2story(text):
     # If no good ending is found, return as is
     return story_text
-# Basic Streamlit interface
-st.title("Image to Audio Story")
-# Add processing status indicator
-status_container = st.empty()
-# Initialize session state for tracking progress
-if 'progress' not in st.session_state:
     st.session_state.progress = {
         'caption_generated': False,
         'story_generated': False,
@@ -203,11 +212,14 @@ if 'progress' not in st.session_state:
         'audio_format': None
     }
-# File uploader
-uploaded_file = st.file_uploader("Upload an image", on_change=lambda: reset_progress())
-# Function to reset progress when a new file is uploaded
-def reset_progress():
     st.session_state.progress = {
         'caption_generated': False,
         'story_generated': False,
@@ -218,6 +230,9 @@ def reset_progress():
         'audio_format': None
     }
 # Process the image if uploaded
 if uploaded_file is not None:
     # Display image
@@ -226,10 +241,13 @@ if uploaded_file is not None:
     # Convert to PIL Image
     image = Image.open(uploaded_file)
     # Image to Text (if not already done)
     if not st.session_state.progress['caption_generated']:
         status_container.info("Generating caption...")
-        st.session_state.progress['caption'] = img2text(image)
         st.session_state.progress['caption_generated'] = True
     st.write(f"Caption: {st.session_state.progress['caption']}")

 import os
 import tempfile
 import time
+import numpy as np
 # Use Streamlit's caching mechanisms to optimize model loading
 @st.cache_resource
     # If we got here, no TTS method worked
     raise Exception("No text-to-speech capability available")
+# Convert PIL Image to bytes for hashing in cache
+def get_image_bytes(pil_img):
+    """Convert PIL image to bytes for hashing"""
+    import io
+    buf = io.BytesIO()
+    pil_img.save(buf, format='JPEG')
+    return buf.getvalue()
 # Simple image-to-text function using cached model
 @st.cache_data
+def img2text(image_bytes):
+    """Convert image to text with caching - using bytes for caching compatibility"""
+    # Convert bytes back to PIL image for processing
+    import io
+    from PIL import Image
+    pil_img = Image.open(io.BytesIO(image_bytes))
+    # Process with the model
+    result = img2text_model(pil_img)
     return result[0]["generated_text"]
 # Helper function to count words
     # If no good ending is found, return as is
     return story_text
+# Function to reset progress when a new file is uploaded
+def reset_progress():
     st.session_state.progress = {
         'caption_generated': False,
         'story_generated': False,
         'audio_format': None
     }
+# Basic Streamlit interface
+st.title("Image to Audio Story")
+# Add processing status indicator
+status_container = st.empty()
+# Initialize session state for tracking progress
+if 'progress' not in st.session_state:
     st.session_state.progress = {
         'caption_generated': False,
         'story_generated': False,
         'audio_format': None
     }
+# File uploader
+uploaded_file = st.file_uploader("Upload an image", on_change=reset_progress)
 # Process the image if uploaded
 if uploaded_file is not None:
     # Display image
     # Convert to PIL Image
     image = Image.open(uploaded_file)
+    # Convert image to bytes for caching compatibility
+    image_bytes = get_image_bytes(image)
     # Image to Text (if not already done)
     if not st.session_state.progress['caption_generated']:
         status_container.info("Generating caption...")
+        st.session_state.progress['caption'] = img2text(image_bytes)
         st.session_state.progress['caption_generated'] = True
     st.write(f"Caption: {st.session_state.progress['caption']}")