Spaces:

Krish-05
/

fast_rep_voice

Paused

App Files Files Community

Krish-05 commited on Jul 25, 2025

Commit

bd7abe9

verified ·

1 Parent(s): 7caf3da

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +315 -339

streamlit_app.py CHANGED Viewed

@@ -5,384 +5,360 @@ import time
 import logging
 import numpy as np
 import sys
-import io
-import soundfile as sf
-import queue
-import pkg_resources # Import pkg_resources for version checking
-from streamlit_webrtc import webrtc_streamer, WebRtcMode, RTCConfiguration
-import av # Required for audio frames processing
-from streamlit.components.v1 import html # Import html for custom JS
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
-# --- Configuration ---
-FASTAPI_HOST = "localhost"
-FASTAPI_PORT = 7860
-FASTAPI_OLLAMA_URL = f"http://{FASTAPI_HOST}:{FASTAPI_PORT}/ask"
-FASTAPI_STT_URL = f"http://{FASTAPI_HOST}:{FASTAPI_PORT}/transcribe/"
-# --- Package Version Verification (for debugging/info) ---
-logger.info("--- Checking installed package versions at runtime ---")
-try:
-    st_version = pkg_resources.get_distribution("streamlit").version
-    logger.info(f"Streamlit version: {st_version}")
-except pkg_resources.DistributionNotFound:
-    logger.warning("Streamlit not found at runtime.")
 try:
-    requests_version = pkg_resources.get_distribution("requests").version
-    logger.info(f"Requests version: {requests_version}")
-except pkg_resources.DistributionNotFound:
-    logger.warning("Requests not found at runtime.")
-try:
-    webrtc_version = pkg_resources.get_distribution("streamlit-webrtc").version
-    logger.info(f"streamlit-webrtc version: {webrtc_version}")
-except pkg_resources.DistributionNotFound:
-    logger.warning("streamlit-webrtc not found at runtime.")
-try:
-    # CORRECTED: Use pkg_resources consistently
-    transformers_version = pkg_resources.get_distribution("transformers").version
-    logger.info(f"transformers version: {transformers_version}")
-except pkg_resources.DistributionNotFound:
-    logger.warning("transformers not found (expected for current app logic).")
 except Exception as e:
-    logger.error(f"Error getting transformers version: {e}")
-logger.info("--- Finished checking package versions ---")
-# --- Streamlit Page Setup ---
 st.set_page_config(page_title="Ollama AI Assistant", page_icon="🤖", layout="wide")
-# --- Session State Initialization ---
 if 'chat_history' not in st.session_state:
     st.session_state.chat_history = [
         {"role": "assistant", "message": "Hello! How can I assist you today?"}
     ]
-if 'microphone_active' not in st.session_state:
-    st.session_state.microphone_active = False
 if 'transcribed_text' not in st.session_state:
-    st.session_state.transcribed_text = ""
-if 'audio_buffer' not in st.session_state:
-    st.session_state.audio_buffer = []
-# --- App Header & Chat Display ---
 st.title("🤖 Ollama AI Assistant")
-st.caption("Start chatting with our AI assistant. Type your message below or use the speaker icon.")
-st.markdown("---")
 for chat in st.session_state.chat_history:
     with st.chat_message(chat["role"], avatar="🤖" if chat["role"] == "assistant" else "👤"):
         st.write(chat["message"])
-# --- WebRTC Streamer (Always Rendered, but audio processing is conditional) ---
-webrtc_ctx = webrtc_streamer(
-    key="microphone_input_permanent", # Use a fixed key
-    mode=WebRtcMode.SENDONLY,
-    rtc_configuration=RTCConfiguration({"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}),
-    audio_receiver_size=2048, # A larger buffer for hold-to-speak
-    media_stream_constraints={"video": False, "audio": True}
-)
-# Function to transcribe and update text area
-def transcribe_and_update_text_area(audio_data_list, sample_rate=48000):
-    if audio_data_list:
-        try:
-            combined_audio = np.concatenate(audio_data_list)
-            byte_io = io.BytesIO()
-            sf.write(byte_io, combined_audio, sample_rate, format='WAV')
-            byte_io.seek(0)
-            files = {"audio_file": ("audio.wav", byte_io.getvalue(), "audio/wav")}
-            logger.info(f"Sending {len(combined_audio)} samples (at {sample_rate} Hz) to FastAPI STT endpoint.")
-            with st.spinner("Transcribing audio..."):
-                response = requests.post(FASTAPI_STT_URL, files=files)
-            response.raise_for_status()
-            transcribed_data = response.json()
-            st.session_state.transcribed_text = transcribed_data.get("transcribed_text", "Could not transcribe.")
-            logger.info(f"Transcribed text received: {st.session_state.transcribed_text[:100]}...")
         except requests.exceptions.ConnectionError:
-            st.session_state.transcribed_text = (f"Error: Could not connect to the STT server. "
-                                                  f"Please ensure it is running at {FASTAPI_STT_URL}.")
-            logger.error(f"ConnectionError to STT FastAPI at {FASTAPI_STT_URL}")
         except requests.exceptions.RequestException as e:
             error_details = e.response.text if e.response is not None else str(e)
-            st.session_state.transcribed_text = (f"An error occurred during STT request. "
-                                                  f"Details: {error_details}")
-            logger.error(f"Request error to STT FastAPI: {e}", exc_info=True)
         except Exception as e:
-            st.session_state.transcribed_text = f"An unexpected error occurred during STT: {e}"
-            logger.exception("An unexpected error occurred during STT transcription.")
-        finally:
-            st.session_state.audio_buffer = []
-            st.session_state.microphone_active = False
-            st.rerun()
-    else:
-        logger.info("No audio data to transcribe.")
-        st.session_state.microphone_active = False
         st.rerun()
-# --- Custom JavaScript for Hold-to-Speak Button ---
-SPEAKER_BUTTON_HTML = """
-<style>
-.speaker-button-container {
-    display: flex;
-    align-items: center;
-    justify-content: flex-end;
-    padding-top: 10px;
-}
-.speaker-button {
-    background-color: #4CAF50;
-    color: white;
-    padding: 10px 15px;
-    border: none;
-    border-radius: 5px;
-    font-size: 16px;
-    cursor: pointer;
-    transition: background-color 0.3s ease;
-    display: flex;
-    align-items: center;
-    gap: 8px;
-}
-.speaker-button:hover {
-    background-color: #45a049;
-}
-.speaker-button:active {
-    background-color: #3e8e41;
-}
-.speaker-button.active {
-    background-color: #f44336;
-}
-.speaker-button.active:hover {
-    background-color: #da190b;
-}
-@keyframes pulse {
-    0% { box-shadow: 0 0 0 0 rgba(244, 67, 54, 0.7); }
-    70% { box-shadow: 0 0 0 10px rgba(244, 67, 54, 0); }
-    100% { box-shadow: 0 0 0 0 rgba(244, 67, 54, 0); }
-}
-.speaker-button.active {
-    animation: pulse 1.5s infinite;
-}
-</style>
-<div class="speaker-button-container">
-    <button id="speakerButton" class="speaker-button">
-        <i class="fa fa-microphone" style="font-size:24px"></i>
-        <span id="buttonText">Hold to Speak</span>
-    </button>
-</div>
-<script>
-    const speakerButton = document.getElementById('speakerButton');
-    const buttonText = document.getElementById('buttonText');
-    let isRecording = false;
-    function sendMessageToStreamlit(action) {
-        window.parent.postMessage({
-            streamlit: true,
-            type: 'FROM_IFRAME',
-            data: { action: action }
-        }, '*');
-    }
-    window.addEventListener('message', event => {
-        if (event.data.type === 'streamlit:setComponentValue' && event.data.key === 'speaker_button_state') {
-            const state = event.data.value;
-            if (state.active === true && !isRecording) {
-                speakerButton.classList.add('active');
-                buttonText.textContent = 'Recording... Release to Transcribe';
-                isRecording = true;
-            } else if (state.active === false && isRecording) {
-                speakerButton.classList.remove('active');
-                buttonText.textContent = 'Hold to Speak';
-                isRecording = false;
-            }
-        }
-    });
-    speakerButton.addEventListener('mousedown', () => {
-        if (!isRecording) {
-            sendMessageToStreamlit('start_recording');
-            speakerButton.classList.add('active');
-            buttonText.textContent = 'Recording... Release to Transcribe';
-            isRecording = true;
-        }
-    });
-    speakerButton.addEventListener('mouseup', () => {
-        if (isRecording) {
-            sendMessageToStreamlit('stop_recording');
-            speakerButton.classList.remove('active');
-            buttonText.textContent = 'Processing...';
-            isRecording = false;
-        }
-    });
-    speakerButton.addEventListener('mouseleave', () => {
-        if (isRecording) {
-            sendMessageToStreamlit('stop_recording');
-            speakerButton.classList.remove('active');
-            buttonText.textContent = 'Processing...';
-            isRecording = false;
-        }
-    });
-    speakerButton.addEventListener('contextmenu', e => e.preventDefault());
-    speakerButton.addEventListener('touchstart', (e) => {
-        e.preventDefault();
-        if (!isRecording) {
-            sendMessageToStreamlit('start_recording');
-            speakerButton.classList.add('active');
-            buttonText.textContent = 'Recording... Release to Transcribe';
-            isRecording = true;
-        }
-    }, { passive: false });
-    speakerButton.addEventListener('touchend', (e) => {
-        e.preventDefault();
-        if (isRecording) {
-            sendMessageToStreamlit('stop_recording');
-            speakerButton.classList.remove('active');
-            buttonText.textContent = 'Processing...';
-            isRecording = false;
-        }
-    }, { passive: false });
-    speakerButton.addEventListener('touchcancel', (e) => {
-        e.preventDefault();
-        if (isRecording) {
-            sendMessageToStreamlit('stop_recording');
-            speakerButton.classList.remove('active');
-            buttonText.textContent = 'Processing...';
-            isRecording = false;
-        }
-    }, { passive: false });
-</script>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">
-"""
-# --- Input Area ---
-col1, col2 = st.columns([0.8, 0.2])
-with col1:
-    with st.form("chat_form", clear_on_submit=True):
-        user_prompt = st.text_area(
-            "Type your message here...",
-            height=100,
-            placeholder="e.g., Explain quantum computing in simple terms.",
-            label_visibility="collapsed",
-            key="user_input_text_area",
-            value=st.session_state.transcribed_text
-        )
-        # Clear transcribed text if user starts typing in the text_area
-        if user_prompt != st.session_state.transcribed_text and st.session_state.transcribed_text != "":
-            st.session_state.transcribed_text = ""
-            logger.info("Transcribed text cleared because user started typing/editing.")
-        submitted = st.form_submit_button("Send")
-        if submitted:
-            st.session_state.microphone_active = False # Ensure mic is off on send
-            if user_prompt:
-                logger.info(f"User submitted prompt: {user_prompt[:100]}...")
-                st.session_state.chat_history.append({"role": "user", "message": user_prompt})
-                st.session_state.transcribed_text = ""
-                with st.chat_message("assistant", avatar="🤖"):
-                    response_placeholder = st.empty()
-                    response_placeholder.write("Thinking...")
-                full_response = ""
-                byte_buffer = b""
-                try:
-                    payload = {"text": user_prompt}
-                    headers = {"Content-Type": "application/json"}
-                    with requests.post(FASTAPI_OLLAMA_URL, json=payload, headers=headers, stream=True) as response:
-                        response.raise_for_status()
-                        for chunk in response.iter_content(chunk_size=1):
-                            if chunk:
-                                byte_buffer += chunk
-                                try:
-                                    decoded_text = byte_buffer.decode("utf-8", errors="strict")
-                                    full_response += decoded_text
-                                    response_placeholder.markdown(full_response + "▌")
-                                    byte_buffer = b""
-                                except UnicodeDecodeError:
-                                    pass
-                                except Exception as e:
-                                    full_response += chunk.decode("utf-8", errors="replace")
-                                    response_placeholder.markdown(full_response + "▌")
-                                    byte_buffer = b""
-                        if byte_buffer:
-                            full_response += byte_buffer.decode("utf-8", errors="replace")
-                        response_placeholder.markdown(full_response)
-                except requests.exceptions.ConnectionError:
-                    full_response = (f"Error: Could not connect to the FastAPI server. "
-                                     f"Please ensure it is running at {FASTAPI_OLLAMA_URL}.")
-                    response_placeholder.error(full_response)
-                    logger.error(f"Connection error to FastAPI LLM at {FASTAPI_OLLAMA_URL}", exc_info=True)
-                except requests.exceptions.RequestException as e:
-                    error_details = e.response.text if e.response is not None else str(e)
-                    status_code = e.response.status_code if e.response is not None else "N/A"
-                    full_response = (f"An error occurred during the request to FastAPI. "
-                                     f"Status code: {status_code}\nDetails: {error_details}")
-                    response_placeholder.error(full_response)
-                    logger.error(f"Request error to FastAPI LLM: {e}", exc_info=True)
-                except Exception as e:
-                    full_response = f"An unexpected error occurred: {e}"
-                    response_placeholder.error(full_response)
-                    logger.exception("An unexpected error occurred during LLM processing.")
-                st.session_state.chat_history.append({"role": "assistant", "message": full_response})
-                st.rerun()
-            else:
-                st.warning("Please enter a prompt before clicking 'Send'.")
-with col2:
-    speaker_button_event = html(SPEAKER_BUTTON_HTML, height=70, scrolling=False)
-    # Communicate current recording state to the JavaScript component
-    st.write(f"<script>window.parent.postMessage({{ type: 'streamlit:setComponentValue', key: 'speaker_button_state', value: {{ active: {str(st.session_state.microphone_active).lower()} }} }}, '*');</script>", unsafe_allow_html=True)
-    # Process messages from the custom JavaScript button
-    if isinstance(speaker_button_event, dict) and "action" in speaker_button_event: # CORRECTED LINE
-        if speaker_button_event["action"] == "start_recording":
-            if webrtc_ctx.state.playing and not st.session_state.microphone_active:
-                st.session_state.microphone_active = True
-                st.session_state.audio_buffer = []
-                st.session_state.transcribed_text = ""
-                logger.info("JS: Start recording signal received. Microphone active.")
-                st.rerun()
-            elif not webrtc_ctx.state.playing:
-                st.warning("Please allow microphone access in your browser.")
-                logger.warning("JS: Start recording signal received, but WebRTC context is not playing.")
-            elif st.session_state.microphone_active:
-                logger.info("JS: Start recording signal received, but microphone already active.")
-        elif speaker_button_event["action"] == "stop_recording":
-            if st.session_state.microphone_active:
-                logger.info("JS: Stop recording signal received. Transcribing...")
-                transcribe_and_update_text_area(st.session_state.audio_buffer)
-            else:
-                logger.info("JS: Stop recording signal received, but microphone was not active.")
-# --- Real-time audio buffering from webrtc_ctx ---
-if webrtc_ctx.state.playing and st.session_state.microphone_active:
-    try:
-        audio_frames = webrtc_ctx.audio_receiver.get_frames(timeout=0.01)
-        for frame in audio_frames:
-            audio_array = frame.to_ndarray(format="flt").flatten()
-            st.session_state.audio_buffer.append(audio_array)
-    except queue.Empty:
-        pass
-    except Exception as e:
-        logger.error(f"Error getting audio frames from webrtc_ctx: {e}", exc_info=True)
 # --- Footer ---
 st.markdown("---")
-st.caption("Powered by Ollama, Hugging Face (STT), FastAPI, and Streamlit.")

 import logging
 import numpy as np
 import sys
+import io # New: For handling audio bytes
+from pydub import AudioSegment # New: For converting audio formats (requires ffmpeg)
+from streamlit_webrtc import WebRtcMode, webrtc_streamer, AudioProcessorBase, ClientSettings # New: For microphone access
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
+# --- Debugging: Display installed package versions ---
 try:
+    import pkg_resources
+    st.sidebar.write(f"Streamlit version: {pkg_resources.get_distribution('streamlit').version}")
+    st.sidebar.write(f"Requests version: {pkg_resources.get_distribution('requests').version}")
+    try:
+        webrtc_version = pkg_resources.get_distribution("streamlit-webrtc").version
+        st.sidebar.write(f"streamlit-webrtc version: {webrtc_version}")
+    except pkg_resources.DistributionNotFound:
+        st.sidebar.write("streamlit-webrtc not found (expected for current app logic).")
+    except Exception as e:
+        st.sidebar.write(f"Could not get streamlit-webrtc version: {e}")
+    try:
+        # Check for faster-whisper and pydub
+        fw_version = pkg_resources.get_distribution("faster-whisper").version
+        st.sidebar.write(f"faster-whisper version: {fw_version}")
+    except pkg_resources.DistributionNotFound:
+        st.sidebar.write("faster-whisper not found (expected for current app logic).")
+    except Exception as e:
+        st.sidebar.write(f"Could not get faster-whisper version: {e}")
+    try:
+        pd_version = pkg_resources.get_distribution("pydub").version
+        st.sidebar.write(f"pydub version: {pd_version}")
+    except pkg_resources.DistributionNotFound:
+        st.sidebar.write("pydub not found (expected for current app logic).")
+    except Exception as e:
+        st.sidebar.write(f"Could not get pydub version: {e}")
+    # Not expecting transformers here, removed for clarity.
 except Exception as e:
+    st.sidebar.write(f"Could not get package versions: {e}")
+# --- End Debugging Section ---
+# Configuration for the FastAPI backend
+FASTAPI_HOST = "localhost"
+FASTAPI_PORT = 7860
+FASTAPI_LLM_URL = f"http://{FASTAPI_HOST}:{FASTAPI_PORT}/ask" # For LLM requests
+FASTAPI_STT_URL = f"http://{FASTAPI_HOST}:{FASTAPI_PORT}/transcribe_audio" # For STT requests
+# Set Streamlit page configuration
 st.set_page_config(page_title="Ollama AI Assistant", page_icon="🤖", layout="wide")
+# --- Session state for chat history ---
+# Initialize chat history if it doesn't exist in session state
 if 'chat_history' not in st.session_state:
     st.session_state.chat_history = [
         {"role": "assistant", "message": "Hello! How can I assist you today?"}
     ]
+    logger.info("Chat history initialized.")
+# --- Session state for STT and WebRTC ---
+# This controls the microphone recording lifecycle
 if 'transcribed_text' not in st.session_state:
+    st.session_state.transcribed_text = "" # Stores the last transcribed text
+if 'webrtc_state' not in st.session_state:
+    st.session_state.webrtc_state = "idle" # idle, listening, processing_audio
+# --- Custom Audio Processor for VAD and Audio Buffering ---
+class VADAudioProcessor(AudioProcessorBase):
+    """
+    Processes audio frames from WebRTC. It buffers audio and
+    implements a simple volume-based Voice Activity Detection (VAD).
+    """
+    def __init__(self):
+        self.audio_buffer = io.BytesIO()
+        self.silent_frames_count = 0
+        self.voice_detected = False
+        self.frame_rate = 16000 # Standard for WebRTC audio
+        self.samples_width = 2 # 16-bit audio (2 bytes per sample)
+        self.threshold = 500 # Adjust this based on environment noise and microphone sensitivity
+        self.max_silent_frames = 30 # Stop after N silent frames (~0.3 seconds at 10ms/frame)
+        self.total_frames_processed = 0
+        logger.info("VADAudioProcessor initialized.")
+    def _calculate_volume(self, audio_chunk: bytes) -> float:
+        """Calculate RMS (Root Mean Square) volume of an audio chunk."""
+        # Convert bytes to a numpy array of 16-bit integers
+        audio_array = np.frombuffer(audio_chunk, dtype=np.int16)
+        if audio_array.size == 0:
+            return 0.0
+        # Calculate RMS
+        rms = np.sqrt(np.mean(audio_array**2))
+        return rms
+    def process(self, audio_chunk: bytes) -> bytes:
+        """
+        Processes each incoming audio chunk from the microphone.
+        """
+        # Write the raw audio chunk to the buffer
+        self.audio_buffer.write(audio_chunk)
+        self.total_frames_processed += 1
+        # Perform simple VAD
+        volume = self._calculate_volume(audio_chunk)
+        # logger.debug(f"Audio chunk received, volume: {volume:.2f}") # Use debug for less verbose logging
+        if volume > self.threshold:
+            self.voice_detected = True
+            self.silent_frames_count = 0 # Reset silence count on voice detection
+            # logger.debug("Voice detected!")
+        elif self.voice_detected: # Only count silence if voice was previously detected
+            self.silent_frames_count += 1
+            # logger.debug(f"Silence detected. Silent frames: {self.silent_frames_count}")
+        # This processor simply collects data. The stopping logic is handled
+        # by the Streamlit app's main loop reacting to this processor's state.
+        return audio_chunk # Return the chunk (pass-through)
+# --- App Header ---
 st.title("🤖 Ollama AI Assistant")
+st.caption("Start chatting with our AI assistant. Type your message or use the microphone.")
+# --- Chat Display ---
+st.markdown("---") # Separator for visual clarity
 for chat in st.session_state.chat_history:
+    # Use Streamlit's chat_message container for distinct roles
     with st.chat_message(chat["role"], avatar="🤖" if chat["role"] == "assistant" else "👤"):
         st.write(chat["message"])
+# --- Input Area ---
+# Use a form to handle user input and submission
+with st.form("chat_form", clear_on_submit=True):
+    # Store the user's prompt in session state so it can be pre-filled by STT
+    user_prompt_key = "user_input_text_area" # A unique key for the text area
+    user_prompt = st.text_area(
+        "Type your message here...",
+        height=100,
+        placeholder="e.g., Explain quantum computing in simple terms.",
+        label_visibility="collapsed", # Hide the default label for a cleaner look
+        key=user_prompt_key,
+        value=st.session_state.transcribed_text # Pre-fill with transcribed text from STT
+    )
+    col1, col2 = st.columns([1, 1])
+    with col1:
+        submitted = st.form_submit_button("Send")
+    with col2:
+        # Microphone button logic
+        record_button_label = "Stop Listening" if st.session_state.webrtc_state == "listening" else "Start Listening"
+        microphone_button = st.form_submit_button(record_button_label, key="microphone_button")
+    # Handle microphone button press to control WebRTC state
+    if microphone_button:
+        if st.session_state.webrtc_state == "idle":
+            # Transition to 'listening' state
+            st.session_state.webrtc_state = "listening"
+            st.session_state.transcribed_text = "" # Clear any previous transcription
+            st.info("Listening... Tap 'Stop Listening' or wait for silence to auto-stop.")
+            st.rerun() # Rerun to activate the WebRTC streamer
+        elif st.session_state.webrtc_state == "listening":
+            # User manually clicked 'Stop Listening', transition to 'processing_audio'
+            st.session_state.webrtc_state = "processing_audio"
+            st.info("Stopping recording and processing audio...")
+            st.rerun() # Rerun to trigger audio processing
+    # Process the prompt when the 'Send' button is submitted and prompt is not empty
+    if submitted and user_prompt:
+        logger.info(f"User submitted prompt: {user_prompt[:50]}...") # Log the submitted prompt
+        # Add user's message to chat history immediately
+        st.session_state.chat_history.append({"role": "user", "message": user_prompt})
+        st.session_state.transcribed_text = "" # Clear transcribed text after it's sent to LLM
+        # Display a "Thinking..." message while waiting for the AI response
+        with st.chat_message("assistant", avatar="🤖"):
+            response_placeholder = st.empty() # Create an empty placeholder for streaming content
+            response_placeholder.write("Thinking...") # Initial message
+            logger.info("Displaying 'Thinking...' message.")
+        full_response = "" # Initialize an empty string to build the full response
+        byte_buffer = b"" # Initialize a buffer for incomplete UTF-8 characters for streaming
+        try:
+            # Prepare the request payload for FastAPI LLM endpoint
+            payload = {"text": user_prompt}
+            headers = {"Content-Type": "application/json"}
+            logger.info(f"Sending LLM request to FastAPI at {FASTAPI_LLM_URL}")
+            # Make a streaming POST request to the FastAPI endpoint
+            with requests.post(FASTAPI_LLM_URL, json=payload, headers=headers, stream=True) as response:
+                logger.info(f"Received LLM response from FastAPI with status code: {response.status_code}")
+                response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
+                # Iterate over the response content as it streams (byte by byte)
+                for chunk in response.iter_content(chunk_size=1):
+                    if chunk: # Filter out potential empty keep-alive chunks
+                        byte_buffer += chunk # Append new bytes to the buffer
+                        try:
+                            # Attempt to decode the entire buffer using 'strict' error handling
+                            decoded_text = byte_buffer.decode("utf-8", errors="strict")
+                            full_response += decoded_text
+                            response_placeholder.markdown(full_response + "▌") # Update display, add cursor
+                            byte_buffer = b"" # Clear the buffer if decoding was successful
+                        except UnicodeDecodeError:
+                            # This is expected if a multi-byte character is split across chunks.
+                            # Do nothing, just wait for the next chunk to complete the character.
+                            pass
+                        except Exception as e:
+                            # Catch any other unexpected decoding errors
+                            logger.error(f"Error decoding stream chunk: {e} - Raw bytes: {chunk}")
+                            try:
+                                full_response += chunk.decode("utf-8", errors="replace")
+                            except Exception as decode_err:
+                                logger.error(f"Failed to decode even with replace errors: {decode_err}")
+                                full_response += "[Decoding Error]" # Indicate a severe decoding issue
+                            response_placeholder.markdown(full_response + "▌")
+                            byte_buffer = b"" # Clear buffer to try and recover
+            # After the loop, if there are any remaining bytes in the buffer, try to decode them
+            if byte_buffer:
+                try:
+                    full_response += byte_buffer.decode("utf-8", errors="replace")
+                    logger.warning("Remaining bytes in buffer decoded with replacement.")
+                except Exception as e:
+                    logger.error(f"Failed to decode final buffer bytes: {e}")
+                    full_response += "[Final Decoding Error]"
+            response_placeholder.markdown(full_response) # Final update without cursor
+            logger.info("Streaming complete. Full LLM response received.")
         except requests.exceptions.ConnectionError:
+            # Handle cases where Streamlit cannot connect to FastAPI
+            full_response = (f"Error: Could not connect to the FastAPI server. "
+                             f"Please ensure it is running at {FASTAPI_LLM_URL}.")
+            response_placeholder.error(full_response) # Display error in the placeholder
+            logger.error(f"ConnectionError: Could not connect to FastAPI at {FASTAPI_LLM_URL}")
         except requests.exceptions.RequestException as e:
+            # Handle other request-related errors (e.g., HTTP errors from raise_for_status)
             error_details = e.response.text if e.response is not None else str(e)
+            status_code = e.response.status_code if e.response is not None else "N/A"
+            full_response = (f"An error occurred during the request to FastAPI. "
+                             f"Status code: {status_code}\nDetails: {error_details}")
+            response_placeholder.error(full_response) # Display error in the placeholder
+            logger.error(f"Request error to FastAPI: {e}", exc_info=True)
         except Exception as e:
+            # Catch any other unexpected errors during the request or processing
+            full_response = f"An unexpected error occurred: {e}"
+            response_placeholder.error(full_response) # Display error in the placeholder
+            logger.exception("An unexpected error occurred during API request.") # Logs traceback
+        # After the streaming is complete (or an error occurred), add the final response
+        # to the chat history. This ensures it persists across reruns.
+        st.session_state.chat_history.append({"role": "assistant", "message": full_response})
+        logger.info("Final LLM response added to chat history.")
+        # Rerun the app to display the updated chat history with the final response
         st.rerun()
+    elif submitted and not user_prompt:
+        # Warn user if no prompt is entered for the 'Send' button
+        st.warning("Please enter a prompt before clicking 'Send'.")
+        logger.warning("User attempted to send an empty text prompt.")
+# --- WebRTC Streamer for Microphone Input ---
+webrtc_ctx = None
+if st.session_state.webrtc_state in ["listening", "processing_audio"]:
+    logger.info(f"Initiating webrtc_streamer with state: {st.session_state.webrtc_state}")
+    webrtc_ctx = webrtc_streamer(
+        key="ollama-audio-input", # Unique key for this component
+        mode=WebRtcMode.SENDONLY, # Only send audio from browser to Python
+        audio_html_attrs={
+            "autoPlay": "true",
+            "controls": "",
+            "muted": "muted", # Mute local playback to avoid echo
+        },
+        # Use our custom processor to handle audio frames and VAD
+        in_audio_frames_processor_factory=VADAudioProcessor,
+        client_settings=ClientSettings(
+            rtc_configuration={"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}, # STUN server for NAT traversal
+            media_stream_constraints={"audio": True, "video": False}, # Only request audio stream
+        ),
+    )
+    # Display status messages while recording
+    if webrtc_ctx.state.playing and st.session_state.webrtc_state == "listening":
+        st.info("Microphone active. Speak clearly now...")
+    elif not webrtc_ctx.state.playing and st.session_state.webrtc_state == "listening":
+        st.warning("Waiting for microphone permissions... Please grant access if prompted.")
+    # Check VAD status from the audio processor
+    if webrtc_ctx.audio_processor:
+        processor: VADAudioProcessor = webrtc_ctx.audio_processor
+        # If voice was detected, and now prolonged silence is detected
+        if processor.voice_detected and processor.silent_frames_count >= processor.max_silent_frames:
+            logger.info("VAD detected prolonged silence. Transitioning to processing audio.")
+            # Set state to processing, which will cause a rerun and stop the streamer
+            if st.session_state.webrtc_state == "listening": # Only auto-stop if currently listening
+                st.session_state.webrtc_state = "processing_audio"
+                st.info("Silence detected. Processing audio for transcription...")
+                st.rerun() # Trigger a rerun to process the audio
+# --- Audio Processing and STT Call after Recording Stops ---
+# This block runs when we transition to 'processing_audio' state and the WebRTC session is truly stopped.
+if st.session_state.webrtc_state == "processing_audio" and (webrtc_ctx is None or not webrtc_ctx.state.playing):
+    logger.info("WebRTC session stopped (or never started in processing_audio state). Attempting to get audio.")
+    # Ensure we have an audio processor instance from the stopped session
+    if webrtc_ctx and webrtc_ctx.audio_processor:
+        processor: VADAudioProcessor = webrtc_ctx.audio_processor
+        if processor.audio_buffer.tell() > 0: # Check if any audio data was recorded
+            recorded_audio_bytes = processor.audio_buffer.getvalue()
+            logger.info(f"Recorded audio buffer size: {len(recorded_audio_bytes)} bytes.")
+            # Convert raw 16-bit PCM (from WebRTC) to WAV format using pydub
+            try:
+                audio = AudioSegment(
+                    recorded_audio_bytes,
+                    sample_width=processor.samples_width,
+                    frame_rate=processor.frame_rate,
+                    channels=1 # WebRTC typically provides mono audio
+                )
+                wav_io = io.BytesIO()
+                audio.export(wav_io, format="wav") # Export to WAV format
+                wav_io.seek(0) # Rewind the buffer to the beginning for reading
+                st.info("Sending recorded audio to STT backend for transcription...")
+                # Send the WAV audio bytes to the FastAPI STT endpoint
+                files = {'audio_file': ('audio.wav', wav_io.getvalue(), 'audio/wav')}
+                response = requests.post(FASTAPI_STT_URL, files=files)
+                response.raise_for_status() # Raise HTTPError for bad responses
+                transcription_result = response.json()
+                transcribed_text = transcription_result.get("transcribed_text", "").strip()
+                st.session_state.transcribed_text = transcribed_text # Store transcribed text
+                logger.info(f"Transcription received: {transcribed_text[:100]}...")
+                if transcribed_text:
+                    st.success("Transcription complete!")
+                else:
+                    st.warning("No clear speech detected or transcription resulted in empty text.")
+            except requests.exceptions.RequestException as e:
+                st.error(f"Error sending audio to STT backend: {e}")
+                logger.error(f"STT Backend error: {e}", exc_info=True)
+                st.session_state.transcribed_text = "" # Clear on error
+            except Exception as e:
+                st.error(f"An unexpected error occurred during audio processing or STT: {e}")
+                logger.exception("Unexpected error in STT processing.")
+                st.session_state.transcribed_text = "" # Clear on error
+        else:
+            st.warning("No audio was recorded during the session.")
+            st.session_state.transcribed_text = ""
+        # Reset WebRTC state to idle after processing is complete
+        st.session_state.webrtc_state = "idle"
+        st.rerun() # Rerun to update the text area with transcription and reset UI
+    elif st.session_state.webrtc_state == "processing_audio":
+        st.warning("WebRTC context or audio processor was not available for transcription. Retrying or check permissions.")
+        st.session_state.webrtc_state = "idle" # Reset for next attempt
+        st.rerun()
 # --- Footer ---
 st.markdown("---")
+st.caption("Powered by Ollama, FastAPI, Streamlit, and WebRTC.")