Spaces:
Sleeping
Sleeping
File size: 4,565 Bytes
b713a83 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 | import streamlit as st
import tempfile
import os
import time
from agent import VoiceToImageAgent
# Page configuration
st.set_page_config(
page_title="Voice into Imagination",
page_icon="🎙️",
layout="wide"
)
# Custom CSS for refined chat style and bottom bar
st.markdown("""
<style>
/* Fix input at bottom */
.stChatInput {
position: fixed;
bottom: 3rem;
z-index: 1000;
}
/* Hide some Streamlit elements for cleaner look */
.element-container:has(#button-after) {
display: none;
}
/* Status Container Styling */
div[data-testid="stStatusWidget"] {
visibility: hidden;
}
</style>
""", unsafe_allow_html=True)
st.title("🎙️ Voice into Imagination")
# Initialize agent
if "agent" not in st.session_state:
st.session_state.agent = VoiceToImageAgent()
agent = st.session_state.agent
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Initialize persistent logs
if "logs" not in st.session_state:
st.session_state.logs = []
# Initialize audio input key counter for resetting
if "audio_key_count" not in st.session_state:
st.session_state.audio_key_count = 0
# Sidebar for Logs
with st.sidebar:
st.title("🛠️ System Logs")
# Display all previous logs
log_placeholder = st.empty()
with log_placeholder.container():
for log in st.session_state.logs:
st.caption(f"INFO: {log}")
def log_message(message):
st.session_state.logs.append(message)
# Refresh log view
with log_placeholder.container():
for log in st.session_state.logs:
st.caption(f"INFO: {log}")
# Display chat messages
for message in st.session_state.messages:
with st.chat_message(message["role"]):
if message["role"] == "user":
st.markdown(message["content"])
else:
if "image_url" in message:
st.image(message["image_url"], width="stretch")
# Removed caption showing prompt text to keep UI clean
else:
st.markdown(message["content"])
# Bottom Input Area
# We use a container to hold our custom status area + the audio input
bottom_container = st.container()
with bottom_container:
# 1. Status Area (Dynamic)
status_placeholder = st.empty()
# 2. Audio Input
# Using a dynamic key allows us to reset/clear the component by incrementing the counter
audio_key = f"audio_{st.session_state.audio_key_count}"
audio_value = st.audio_input("Recorder", key=audio_key)
if audio_value:
# Process the audio
with st.spinner("Processing..."):
# Save audio to temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
f.write(audio_value.getvalue())
audio_path = f.name
try:
# STATUS: Transcribing
status_placeholder.info("🎙️ Transcribing voice...")
log_message("Audio received. Transcribing...")
transcript = agent.transcribe(audio_path)
# STATUS: Show Transcript (Simulate appearing on label/near input)
status_placeholder.success(f"🗣️ You said: \"{transcript}\"")
log_message(f"Transcript: {transcript}")
# Simulate "automatic send" pause
time.sleep(2)
# STATUS: Generating
status_placeholder.info("🎨 Generating image...")
log_message("Generating image prompt...")
prompt = agent.text_to_prompt(transcript)
log_message(f"Prompt: {prompt}")
log_message("Generating image...")
image_url = agent.generate_image(prompt)
log_message("Image generated successfully.")
# Clear Status
status_placeholder.empty()
# Update Chat History
st.session_state.messages.append({"role": "user", "content": transcript})
st.session_state.messages.append({"role": "assistant", "content": prompt, "image_url": image_url})
# Increment key to reset audio input
st.session_state.audio_key_count += 1
# Rerun to update the view
st.rerun()
except Exception as e:
st.error(f"An error occurred: {e}")
log_message(f"ERROR: {e}")
finally:
if os.path.exists(audio_path):
os.remove(audio_path)
|