Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,6 +6,7 @@ import torch
|
|
| 6 |
import os
|
| 7 |
import tempfile
|
| 8 |
import time
|
|
|
|
| 9 |
|
| 10 |
# Use Streamlit's caching mechanisms to optimize model loading
|
| 11 |
@st.cache_resource
|
|
@@ -79,11 +80,25 @@ def text2audio(story_text):
|
|
| 79 |
# If we got here, no TTS method worked
|
| 80 |
raise Exception("No text-to-speech capability available")
|
| 81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
# Simple image-to-text function using cached model
|
| 83 |
@st.cache_data
|
| 84 |
-
def img2text(
|
| 85 |
-
"""Convert image to text with caching"""
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
return result[0]["generated_text"]
|
| 88 |
|
| 89 |
# Helper function to count words
|
|
@@ -185,14 +200,8 @@ def text2story(text):
|
|
| 185 |
# If no good ending is found, return as is
|
| 186 |
return story_text
|
| 187 |
|
| 188 |
-
#
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
# Add processing status indicator
|
| 192 |
-
status_container = st.empty()
|
| 193 |
-
|
| 194 |
-
# Initialize session state for tracking progress
|
| 195 |
-
if 'progress' not in st.session_state:
|
| 196 |
st.session_state.progress = {
|
| 197 |
'caption_generated': False,
|
| 198 |
'story_generated': False,
|
|
@@ -203,11 +212,14 @@ if 'progress' not in st.session_state:
|
|
| 203 |
'audio_format': None
|
| 204 |
}
|
| 205 |
|
| 206 |
-
#
|
| 207 |
-
|
| 208 |
|
| 209 |
-
#
|
| 210 |
-
|
|
|
|
|
|
|
|
|
|
| 211 |
st.session_state.progress = {
|
| 212 |
'caption_generated': False,
|
| 213 |
'story_generated': False,
|
|
@@ -218,6 +230,9 @@ def reset_progress():
|
|
| 218 |
'audio_format': None
|
| 219 |
}
|
| 220 |
|
|
|
|
|
|
|
|
|
|
| 221 |
# Process the image if uploaded
|
| 222 |
if uploaded_file is not None:
|
| 223 |
# Display image
|
|
@@ -226,10 +241,13 @@ if uploaded_file is not None:
|
|
| 226 |
# Convert to PIL Image
|
| 227 |
image = Image.open(uploaded_file)
|
| 228 |
|
|
|
|
|
|
|
|
|
|
| 229 |
# Image to Text (if not already done)
|
| 230 |
if not st.session_state.progress['caption_generated']:
|
| 231 |
status_container.info("Generating caption...")
|
| 232 |
-
st.session_state.progress['caption'] = img2text(
|
| 233 |
st.session_state.progress['caption_generated'] = True
|
| 234 |
|
| 235 |
st.write(f"Caption: {st.session_state.progress['caption']}")
|
|
|
|
| 6 |
import os
|
| 7 |
import tempfile
|
| 8 |
import time
|
| 9 |
+
import numpy as np
|
| 10 |
|
| 11 |
# Use Streamlit's caching mechanisms to optimize model loading
|
| 12 |
@st.cache_resource
|
|
|
|
| 80 |
# If we got here, no TTS method worked
|
| 81 |
raise Exception("No text-to-speech capability available")
|
| 82 |
|
| 83 |
+
# Convert PIL Image to bytes for hashing in cache
|
| 84 |
+
def get_image_bytes(pil_img):
|
| 85 |
+
"""Convert PIL image to bytes for hashing"""
|
| 86 |
+
import io
|
| 87 |
+
buf = io.BytesIO()
|
| 88 |
+
pil_img.save(buf, format='JPEG')
|
| 89 |
+
return buf.getvalue()
|
| 90 |
+
|
| 91 |
# Simple image-to-text function using cached model
|
| 92 |
@st.cache_data
|
| 93 |
+
def img2text(image_bytes):
|
| 94 |
+
"""Convert image to text with caching - using bytes for caching compatibility"""
|
| 95 |
+
# Convert bytes back to PIL image for processing
|
| 96 |
+
import io
|
| 97 |
+
from PIL import Image
|
| 98 |
+
pil_img = Image.open(io.BytesIO(image_bytes))
|
| 99 |
+
|
| 100 |
+
# Process with the model
|
| 101 |
+
result = img2text_model(pil_img)
|
| 102 |
return result[0]["generated_text"]
|
| 103 |
|
| 104 |
# Helper function to count words
|
|
|
|
| 200 |
# If no good ending is found, return as is
|
| 201 |
return story_text
|
| 202 |
|
| 203 |
+
# Function to reset progress when a new file is uploaded
|
| 204 |
+
def reset_progress():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
st.session_state.progress = {
|
| 206 |
'caption_generated': False,
|
| 207 |
'story_generated': False,
|
|
|
|
| 212 |
'audio_format': None
|
| 213 |
}
|
| 214 |
|
| 215 |
+
# Basic Streamlit interface
|
| 216 |
+
st.title("Image to Audio Story")
|
| 217 |
|
| 218 |
+
# Add processing status indicator
|
| 219 |
+
status_container = st.empty()
|
| 220 |
+
|
| 221 |
+
# Initialize session state for tracking progress
|
| 222 |
+
if 'progress' not in st.session_state:
|
| 223 |
st.session_state.progress = {
|
| 224 |
'caption_generated': False,
|
| 225 |
'story_generated': False,
|
|
|
|
| 230 |
'audio_format': None
|
| 231 |
}
|
| 232 |
|
| 233 |
+
# File uploader
|
| 234 |
+
uploaded_file = st.file_uploader("Upload an image", on_change=reset_progress)
|
| 235 |
+
|
| 236 |
# Process the image if uploaded
|
| 237 |
if uploaded_file is not None:
|
| 238 |
# Display image
|
|
|
|
| 241 |
# Convert to PIL Image
|
| 242 |
image = Image.open(uploaded_file)
|
| 243 |
|
| 244 |
+
# Convert image to bytes for caching compatibility
|
| 245 |
+
image_bytes = get_image_bytes(image)
|
| 246 |
+
|
| 247 |
# Image to Text (if not already done)
|
| 248 |
if not st.session_state.progress['caption_generated']:
|
| 249 |
status_container.info("Generating caption...")
|
| 250 |
+
st.session_state.progress['caption'] = img2text(image_bytes)
|
| 251 |
st.session_state.progress['caption_generated'] = True
|
| 252 |
|
| 253 |
st.write(f"Caption: {st.session_state.progress['caption']}")
|