Spaces:

renesistech
/

Spatial-aware

Sleeping

App Files Files Community

noumanjavaid commited on May 7, 2025

Commit

76a264c

verified ·

1 Parent(s): 7e3304a

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +349 -363

src/streamlit_app.py CHANGED Viewed

@@ -5,42 +5,44 @@ import asyncio
 import base64
 import io
 import threading
-import queue # Standard library queue, not asyncio.Queue for thread-safe UI updates if needed
 import traceback
-import time # Keep time for potential future use (e.g., timestamps)
 from dotenv import load_dotenv
 # --- Import main libraries ---
 import cv2
 import pyaudio
 import PIL.Image
-import mss
 from google import genai
 from google.genai import types
 # --- Configuration ---
-load_dotenv()
 # Audio configuration
 FORMAT = pyaudio.paInt16
 CHANNELS = 1
 SEND_SAMPLE_RATE = 16000
-RECEIVE_SAMPLE_RATE = 24000  # According to Gemini documentation
 CHUNK_SIZE = 1024
-AUDIO_QUEUE_MAXSIZE = 20  # Max audio chunks to buffer for playback
 # Video configuration
 VIDEO_FPS_LIMIT = 1  # Send 1 frame per second to the API
-VIDEO_PREVIEW_RESIZE = (640, 480)  # Size for Streamlit preview
-VIDEO_API_RESIZE = (1024, 1024)  # Max size to send to API (adjust if needed)
 # Gemini model configuration
-MODEL = "models/gemini-2.0-flash-live-001" # Ensure this is the correct model for live capabilities
-DEFAULT_MODE = "camera" # Default video input mode
 # System Prompt for the Medical Assistant
-MEDICAL_ASSISTANT_SYSTEM_PROMPT = """You are an AI Medical Assistant. Your primary function is to analyze visual information from the user's camera or screen.
 Your responsibilities are:
 1.  **Visual Observation and Description:** Carefully examine the images or video feed. Describe relevant details you observe.
@@ -56,76 +58,95 @@ Your responsibilities are:
 Example of a disclaimer you might use: "As an AI assistant, I can describe what I see, but I can't provide medical advice or diagnoses. For any health concerns, it's always best to speak with a doctor or other healthcare professional."
 """
-# Initialize Streamlit state
 def init_session_state():
-    if 'initialized' not in st.session_state:
-        st.session_state['initialized'] = False
-    if 'audio_loop' not in st.session_state:
-        st.session_state['audio_loop'] = None
-    if 'chat_messages' not in st.session_state:
-        st.session_state['chat_messages'] = []
-    if 'current_frame' not in st.session_state:
-        st.session_state['current_frame'] = None
-    if 'run_loop' not in st.session_state: # Flag to control the loop from Streamlit
-        st.session_state['run_loop'] = False
-# Initialize all session state variables
 init_session_state()
-# Configure page
-st.set_page_config(page_title="Real-time Medical Assistant", layout="wide")
 # Initialize Gemini client
-# Ensure API key is set in environment variables or .env file
 GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
 if not GEMINI_API_KEY:
     st.error("GEMINI_API_KEY not found. Please set it in your environment variables or a .env file.")
     st.stop()
-client = genai.Client(
-    http_options={"api_version": "v1beta"},
-    api_key=GEMINI_API_KEY,
-)
-# Configure Gemini client and response settings
-CONFIG = types.LiveConnectConfig(
-    response_modalities=["audio", "text"], # Ensure text is also enabled if you want to display AI text directly
     speech_config=types.SpeechConfig(
         voice_config=types.VoiceConfig(
-            prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name="Puck") # Or other preferred voice
         )
     ),
-    # If the API supports an initial_prompt field in LiveConnectConfig, it would be ideal here.
-    # As of some versions, it might not be directly available, hence sending as first message.
 )
-pya = pyaudio.PyAudio()
-class AudioLoop:
-    def __init__(self, video_mode=DEFAULT_MODE):
-        self.video_mode = video_mode
-        self.audio_in_queue = None  # asyncio.Queue for audio playback
-        self.out_queue = None       # asyncio.Queue for data to Gemini
-        self.session = None
-        # Tasks are managed by TaskGroup now
-        self.running = True # General flag to control async loops
-        self.audio_stream = None # PyAudio input stream
-    async def send_text_to_gemini(self, text_input): # Renamed from send_text to avoid confusion
-        if not text_input or not self.session or not self.running:
-            st.warning("Session not active or no text to send.")
             return
         try:
-            # User messages should typically end the turn for the AI to respond.
-            await self.session.send(input=text_input, end_of_turn=True)
-            # UI update for user message is handled in main Streamlit part
         except Exception as e:
-            st.error(f"Error sending message to Gemini: {str(e)}")
-            traceback.print_exception(e)
-    def _get_frame(self, cap):
-        ret, frame = cap.read()
         if not ret:
             return None
         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
@@ -135,404 +156,369 @@ class AudioLoop:
         preview_img.thumbnail(VIDEO_PREVIEW_RESIZE)
         api_img = img.copy()
-        api_img.thumbnail(VIDEO_API_RESIZE)
         image_io = io.BytesIO()
         api_img.save(image_io, format="jpeg")
-        image_io.seek(0)
-        image_bytes = image_io.read()
         return {
             "preview": preview_img,
-            "api": {
-                "mime_type": "image/jpeg",
-                "data": base64.b64encode(image_bytes).decode()
-            }
         }
-    async def get_frames_from_camera(self): # Renamed for clarity
-        cap = None
         try:
-            cap = await asyncio.to_thread(cv2.VideoCapture, 0)
-            if not cap.isOpened():
-                st.error("Could not open camera.") # This error needs to reach Streamlit UI
-                self.running = False # Stop the loop if camera fails
                 return
-            while self.running:
-                frame_data = await asyncio.to_thread(self._get_frame, cap)
-                if frame_data is None:
-                    await asyncio.sleep(0.01) # Short sleep if frame read fails
-                    continue
-                st.session_state['current_frame'] = frame_data["preview"]
-                if self.out_queue.full():
-                    await self.out_queue.get() # Make space if full to avoid indefinite block
-                await self.out_queue.put(frame_data["api"])
                 await asyncio.sleep(1.0 / VIDEO_FPS_LIMIT)
         except Exception as e:
-            st.error(f"Camera streaming error: {e}")
-            self.running = False
         finally:
-            if cap:
-                await asyncio.to_thread(cap.release)
-    def _get_screen_frame(self): # Renamed for clarity
-        sct = mss.mss()
-        # Use the first monitor
-        monitor_number = 1
-        if len(sct.monitors) > 1: # sct.monitors[0] is all monitors, sct.monitors[1] is primary
-            monitor = sct.monitors[monitor_number]
-        else: # If only one monitor entry (all), just use it.
-             monitor = sct.monitors[0]
-        screenshot = sct.grab(monitor)
-        img = PIL.Image.frombytes("RGB", screenshot.size, screenshot.rgb)
-        preview_img = img.copy()
-        preview_img.thumbnail(VIDEO_PREVIEW_RESIZE)
-        api_img = img.copy()
-        api_img.thumbnail(VIDEO_API_RESIZE)
-        image_io = io.BytesIO()
-        api_img.save(image_io, format="jpeg")
-        image_io.seek(0)
-        image_bytes = image_io.read()
-        return {
-            "preview": preview_img,
-            "api": {
-                "mime_type": "image/jpeg",
-                "data": base64.b64encode(image_bytes).decode()
             }
-        }
-    async def get_frames_from_screen(self): # Renamed for clarity
         try:
-            while self.running:
-                frame_data = await asyncio.to_thread(self._get_screen_frame)
-                if frame_data is None:
-                    await asyncio.sleep(0.01)
-                    continue
-                st.session_state['current_frame'] = frame_data["preview"]
-                if self.out_queue.full():
-                    await self.out_queue.get()
-                await self.out_queue.put(frame_data["api"])
                 await asyncio.sleep(1.0 / VIDEO_FPS_LIMIT)
         except Exception as e:
-            st.error(f"Screen capture error: {e}")
-            self.running = False
-    async def send_realtime_media(self): # Renamed
         try:
-            while self.running:
-                if not self.session:
-                    await asyncio.sleep(0.1) # Wait for session to be established
                     continue
                 try:
-                    msg = await asyncio.wait_for(self.out_queue.get(), timeout=0.5) # Timeout to prevent blocking indefinitely
-                    if self.session and self.running: # Re-check session and running status
-                         await self.session.send(input=msg) # No end_of_turn for continuous media
-                    self.out_queue.task_done()
                 except asyncio.TimeoutError:
-                    continue # No new media to send
                 except Exception as e:
-                    if self.running: # Only log if we are supposed to be running
-                        print(f"Error in send_realtime_media: {e}") # Log to console
-                        # Consider if this error should stop the loop or be reported to UI
-                        await asyncio.sleep(0.1) # Prevent tight loop on error
         except asyncio.CancelledError:
-            print("send_realtime_media task cancelled.")
         finally:
-            print("send_realtime_media task finished.")
-    async def listen_for_audio(self): # Renamed
-        self.audio_stream = None
         try:
             mic_info = await asyncio.to_thread(pya.get_default_input_device_info)
-            self.audio_stream = await asyncio.to_thread(
                 pya.open,
-                format=FORMAT,
-                channels=CHANNELS,
-                rate=SEND_SAMPLE_RATE,
-                input=True,
-                input_device_index=mic_info["index"],
                 frames_per_buffer=CHUNK_SIZE,
             )
             print("Microphone stream opened.")
-            while self.running:
                 try:
-                    # exception_on_overflow=False helps avoid crashes on buffer overflows
-                    data = await asyncio.to_thread(self.audio_stream.read, CHUNK_SIZE, exception_on_overflow=False)
-                    if self.out_queue.full():
-                        await self.out_queue.get() # Make space
-                    await self.out_queue.put({"data": data, "mime_type": "audio/pcm"})
-                except IOError as e: # PyAudio specific IO errors
-                    if e.errno == pyaudio.paInputOverflowed:
-                        print("PyAudio Input overflowed. Skipping frame.") # Or log to a file/UI
                     else:
-                        print(f"PyAudio read error: {e}")
-                        self.running = False # Potentially stop on other IOErrors
-                        break
                 except Exception as e:
-                    print(f"Error in listen_for_audio: {e}")
-                    await asyncio.sleep(0.01) # Prevent tight loop on error
         except Exception as e:
-            st.error(f"Failed to open microphone: {e}") # This error needs to reach Streamlit UI
-            self.running = False
         finally:
-            if self.audio_stream:
-                await asyncio.to_thread(self.audio_stream.stop_stream)
-                await asyncio.to_thread(self.audio_stream.close)
-            print("Microphone stream closed.")
-    async def receive_gemini_responses(self): # Renamed
         try:
-            while self.running:
-                if not self.session:
-                    await asyncio.sleep(0.1) # Wait for session
-                    continue
                 try:
-                    # Blocking receive, but should yield if self.running becomes false or session closes
-                    turn = self.session.receive()
-                    async for response in turn:
-                        if not self.running: break # Exit if stop signal received during iteration
-                        if data := response.data: # Audio data
-                            if not self.audio_in_queue.full():
-                                self.audio_in_queue.put_nowait(data)
                             else:
-                                print("Playback audio queue full, discarding data.")
-                        if text := response.text: # Text part of the response
-                            # Queue this for the main thread to update Streamlit
-                            st.session_state['chat_messages'].append({"role": "assistant", "content": text})
-                            # Consider st.experimental_rerun() if immediate update is critical and safe
-                            # For now, rely on Streamlit's natural refresh from chat_input or other interactions
-                    # Handle turn completion logic if needed (e.g., clear audio queue for interruptions)
-                    # For simplicity, current model might not need complex interruption handling here.
-                    # If interruptions are implemented (e.g., user speaks while AI is speaking),
-                    # you might want to clear self.audio_in_queue here.
                 except types.generation_types.StopCandidateException:
-                    print("Gemini indicated end of response (StopCandidateException).") # Normal
                 except Exception as e:
-                    if self.running:
                         print(f"Error receiving from Gemini: {e}")
-                        await asyncio.sleep(0.1) # Prevent tight loop on error
         except asyncio.CancelledError:
-            print("receive_gemini_responses task cancelled.")
         finally:
-            print("receive_gemini_responses task finished.")
-    async def play_audio_responses(self): # Renamed
-        playback_stream = None
         try:
-            playback_stream = await asyncio.to_thread(
-                pya.open,
-                format=FORMAT, # Assuming Gemini audio matches this, or adjust
-                channels=CHANNELS,
-                rate=RECEIVE_SAMPLE_RATE,
-                output=True,
             )
             print("Audio playback stream opened.")
-            while self.running:
                 try:
-                    bytestream = await asyncio.wait_for(self.audio_in_queue.get(), timeout=0.5)
-                    await asyncio.to_thread(playback_stream.write, bytestream)
-                    self.audio_in_queue.task_done()
                 except asyncio.TimeoutError:
-                    continue # No audio to play
                 except Exception as e:
                     print(f"Error playing audio: {e}")
-                    await asyncio.sleep(0.01) # Prevent tight loop
         except Exception as e:
-            st.error(f"Failed to open audio playback: {e}")
-            self.running = False
         finally:
-            if playback_stream:
-                await asyncio.to_thread(playback_stream.stop_stream)
-                await asyncio.to_thread(playback_stream.close)
-            print("Audio playback stream closed.")
-    def stop_loop(self): # Renamed
-        print("Stop signal received for AudioLoop.")
-        self.running = False
-        # Queues can be an issue for graceful shutdown if tasks are blocked on put/get
-        # Put sentinel values or use timeouts in queue operations
-        if self.out_queue: # For send_realtime_media
-            self.out_queue.put_nowait(None) # Sentinel to unblock .get()
-        if self.audio_in_queue: # For play_audio_responses
-            self.audio_in_queue.put_nowait(None) # Sentinel
-    async def run(self):
-        st.session_state['run_loop'] = True # Indicate loop is running
-        self.running = True
-        print("AudioLoop starting...")
         try:
-            # `client.aio.live.connect` is an async context manager
-            async with client.aio.live.connect(model=MODEL, config=CONFIG) as session:
-                self.session = session
                 print("Gemini session established.")
-                # Send the system prompt first.
                 try:
                     print("Sending system prompt to Gemini...")
-                    # end_of_turn=False means this text is part of the initial context for the first actual user interaction.
-                    await self.session.send(input=MEDICAL_ASSISTANT_SYSTEM_PROMPT, end_of_turn=False)
-                    print("System prompt sent.")
                 except Exception as e:
-                    st.error(f"Failed to send system prompt to Gemini: {str(e)}")
-                    traceback.print_exception(e)
-                    self.running = False # Stop if system prompt fails critical setup
-                    return # Exit run method
-                # Initialize queues within the async context if they depend on loop specifics
-                self.audio_in_queue = asyncio.Queue(maxsize=AUDIO_QUEUE_MAXSIZE)
-                self.out_queue = asyncio.Queue(maxsize=10) # For outgoing media to Gemini API
                 async with asyncio.TaskGroup() as tg:
-                    # Start all background tasks
-                    print("Starting child tasks...")
-                    tg.create_task(self.send_realtime_media(), name="send_realtime_media")
-                    tg.create_task(self.listen_for_audio(), name="listen_for_audio")
                     if self.video_mode == "camera":
-                        tg.create_task(self.get_frames_from_camera(), name="get_frames_from_camera")
                     elif self.video_mode == "screen":
-                        tg.create_task(self.get_frames_from_screen(), name="get_frames_from_screen")
-                    # If mode is "none", no video task is started.
-                    tg.create_task(self.receive_gemini_responses(), name="receive_gemini_responses")
-                    tg.create_task(self.play_audio_responses(), name="play_audio_responses")
-                    print("All child tasks created.")
-                # TaskGroup will wait for all tasks to complete here.
-                # If self.running is set to False, tasks should ideally notice and exit.
-                print("TaskGroup finished.")
         except asyncio.CancelledError:
-            print("AudioLoop.run() was cancelled.") # Usually from TaskGroup cancellation
-        except ExceptionGroup as eg: # From TaskGroup if child tasks fail
-            st.error(f"Error in async tasks: {eg.exceptions[0]}") # Show first error in UI
-            print(f"ExceptionGroup caught in AudioLoop.run(): {eg}")
             for i, exc in enumerate(eg.exceptions):
-                print(f"  Exception {i+1}/{len(eg.exceptions)} in TaskGroup: {type(exc).__name__}: {exc}")
                 traceback.print_exception(type(exc), exc, exc.__traceback__)
-        except Exception as e:
-            st.error(f"Critical error in session: {str(e)}")
-            print(f"Exception caught in AudioLoop.run(): {type(e).__name__}: {e}")
-            traceback.print_exception(e)
         finally:
-            print("AudioLoop.run() finishing, cleaning up...")
-            self.running = False # Ensure all loops stop
-            st.session_state['run_loop'] = False # Signal that the loop has stopped
-            # `self.session` will be closed automatically by the `async with` block for `client.aio.live.connect`
-            self.session = None
-            # Other stream closures are handled in their respective task's finally blocks
-            print("AudioLoop finished.")
-def main():
-    st.title("Gemini Live Medical Assistant")
     with st.sidebar:
-        st.subheader("Settings")
         video_mode_options = ["camera", "screen", "none"]
-        # Ensure default video mode is in options, find its index
-        default_video_index = video_mode_options.index(DEFAULT_MODE) if DEFAULT_MODE in video_mode_options else 0
-        video_mode = st.selectbox("Video Source", video_mode_options, index=default_video_index)
-        if not st.session_state.get('run_loop', False): # If loop is not running
-            if st.button("Start Session", key="start_session_button"):
-                st.session_state.chat_messages = [{ # Clear chat and add system message
                     "role": "system",
                     "content": (
-                        "Medical Assistant activated. The AI has been instructed on its role to visually assist you. "
-                        "Please remember, this AI cannot provide medical diagnoses or replace consultation with a healthcare professional."
                     )
                 }]
-                st.session_state.current_frame = None # Clear previous frame
-                audio_loop = AudioLoop(video_mode=video_mode)
-                st.session_state.audio_loop = audio_loop
-                # Run the asyncio event loop in a new thread
-                # daemon=True allows Streamlit to exit even if this thread is stuck (though it shouldn't be)
-                threading.Thread(target=lambda: asyncio.run(audio_loop.run()), daemon=True).start()
-                st.success("Session started. Initializing assistant...")
-                st.rerun() # Rerun to update button state and messages
-        else: # If loop is running
-            if st.button("Stop Session", key="stop_session_button"):
-                if st.session_state.audio_loop:
-                    st.session_state.audio_loop.stop_loop() # Signal async tasks to stop
-                # Wait a moment for tasks to attempt cleanup (optional, can be tricky)
-                # time.sleep(1)
-                st.session_state.audio_loop = None
-                st.warning("Session stopping...")
-                st.rerun() # Rerun to update UI
-    # Main content area
-    col1, col2 = st.columns([2, 3]) # Adjust column ratio as needed
-    with col1:
-        st.subheader("Video Feed")
-        if st.session_state.get('run_loop', False) and st.session_state.get('current_frame') is not None:
-            st.image(st.session_state['current_frame'], caption="Live Feed" if video_mode != "none" else "Video Disabled", use_column_width=True)
-        elif video_mode != "none":
-            st.info("Video feed will appear here when the session starts.")
         else:
-            st.info("Video input is disabled.")
-    with col2:
-        st.subheader("Chat with Medical Assistant")
-        chat_container = st.container() # For scrolling chat
-        with chat_container:
-            for msg in st.session_state.chat_messages:
-                with st.chat_message(msg["role"]):
-                    st.write(msg["content"])
-        prompt = st.chat_input("Ask about what you're showing...", key="chat_input_box", disabled=not st.session_state.get('run_loop', False))
-        if prompt:
-            st.session_state.chat_messages.append({"role": "user", "content": prompt})
-            st.rerun() # Show user message immediately
-            if st.session_state.audio_loop:
-                # The text needs to be sent from within the asyncio loop or by scheduling it.
-                # A simple way is to call a method on audio_loop that uses asyncio.create_task or similar.
-                # For direct call from thread to asyncio loop, ensure it's thread-safe.
-                # A better way is to put the text into a queue that send_text_to_gemini reads from,
-                # or use asyncio.run_coroutine_threadsafe if the loop is known.
-                # Current send_text_to_gemini is an async method.
-                # We need to run it in the event loop of the audio_loop's thread.
-                loop = asyncio.get_event_loop_policy().get_event_loop() # Get current thread's loop (might not be the one)
-                if st.session_state.audio_loop.session: # Ensure session exists
-                    # This is a simplified approach; proper thread-safe coroutine scheduling is more robust.
-                    # Consider using asyncio.run_coroutine_threadsafe if audio_loop.run() exposes its loop.
-                    asyncio.run(st.session_state.audio_loop.send_text_to_gemini(prompt))
                 else:
-                    st.error("Session not fully active to send message.")
             else:
-                st.error("Session is not active. Please start a session.")
-            # Rerun after processing to show potential AI response (if text part comes quickly)
-            # st.rerun() # This might be too frequent, rely on receive_gemini_responses to update chat
 if __name__ == "__main__":
-    # Global PyAudio termination hook (optional, for very clean shutdowns)
-    # def cleanup_pyaudio():
-    #     print("Terminating PyAudio globally.")
-    #     pya.terminate()
-    # import atexit
-    # atexit.register(cleanup_pyaudio)
-    main()

 import base64
 import io
 import threading
 import traceback
+import time # For potential delays or timestamps if needed in future
+import atexit # For cleanup actions on exit
 from dotenv import load_dotenv
 # --- Import main libraries ---
 import cv2
 import pyaudio
 import PIL.Image
+import mss # For screen capture
 from google import genai
 from google.genai import types
 # --- Configuration ---
+load_dotenv() # Load environment variables from .env file
 # Audio configuration
 FORMAT = pyaudio.paInt16
 CHANNELS = 1
 SEND_SAMPLE_RATE = 16000
+RECEIVE_SAMPLE_RATE = 24000  # Gemini documentation recommendation
 CHUNK_SIZE = 1024
+AUDIO_PLAYBACK_QUEUE_MAXSIZE = 50  # Buffer for audio chunks from Gemini for playback
+MEDIA_OUT_QUEUE_MAXSIZE = 20 # Buffer for audio/video frames to send to Gemini
 # Video configuration
 VIDEO_FPS_LIMIT = 1  # Send 1 frame per second to the API
+VIDEO_PREVIEW_RESIZE = (640, 480)  # Size for Streamlit preview display
+VIDEO_API_RESIZE = (1024, 1024)  # Max size for images sent to API (aspect ratio preserved)
 # Gemini model configuration
+MODEL_NAME = "models/gemini-2.0-flash-live-001" # VERIFY THIS MODEL NAME IS CORRECT FOR LIVE API
+DEFAULT_VIDEO_MODE = "camera" # Default video input mode ("camera", "screen", "none")
 # System Prompt for the Medical Assistant
+MEDICAL_ASSISTANT_SYSTEM_PROMPT = """You are an AI Medical Assistant. Your primary function is to analyze visual information from the user's camera or screen and respond via voice.
 Your responsibilities are:
 1.  **Visual Observation and Description:** Carefully examine the images or video feed. Describe relevant details you observe.
 Example of a disclaimer you might use: "As an AI assistant, I can describe what I see, but I can't provide medical advice or diagnoses. For any health concerns, it's always best to speak with a doctor or other healthcare professional."
 """
+# Initialize PyAudio instance globally
+pya = pyaudio.PyAudio()
+# Ensure PyAudio is terminated on exit
+def cleanup_pyaudio():
+    print("Terminating PyAudio instance.")
+    pya.terminate()
+atexit.register(cleanup_pyaudio)
+# Initialize Streamlit session state variables
 def init_session_state():
+    defaults = {
+        'app_initialized': True, # To ensure this runs only once per session
+        'session_active': False,
+        'audio_loop_instance': None,
+        'chat_messages': [],
+        'current_frame_preview': None,
+        'video_mode_selection': DEFAULT_VIDEO_MODE,
+    }
+    for key, value in defaults.items():
+        if key not in st.session_state:
+            st.session_state[key] = value
 init_session_state()
+# Configure Streamlit page
+st.set_page_config(page_title="Live Medical Assistant", layout="wide")
 # Initialize Gemini client
 GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
 if not GEMINI_API_KEY:
     st.error("GEMINI_API_KEY not found. Please set it in your environment variables or a .env file.")
     st.stop()
+# Use 'client' consistently for the genai.Client instance
+client = None # Define client in the global scope
+try:
+    client = genai.Client(
+        http_options={"api_version": "v1beta"}, # Required for live
+        api_key=GEMINI_API_KEY,
+    )
+except Exception as e:
+    st.error(f"Failed to initialize Gemini client: {e}")
+    st.stop()
+# Gemini LiveConnectConfig
+LIVE_CONNECT_CONFIG = types.LiveConnectConfig(
+    response_modalities=["audio", "text"], # Expect both audio and text responses
     speech_config=types.SpeechConfig(
         voice_config=types.VoiceConfig(
+            prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name="Zephyr") # Changed voice to Zephyr
         )
     ),
 )
+class AudioVisualLoop:
+    def __init__(self, video_mode_setting=DEFAULT_VIDEO_MODE):
+        self.video_mode = video_mode_setting
+        self.gemini_session = None
+        self.async_event_loop = None # To store the loop for thread-safe calls
+        self.audio_playback_queue = None # asyncio.Queue for audio from Gemini
+        self.media_to_gemini_queue = None # asyncio.Queue for audio/video to Gemini
+        self.is_running = True # Flag to control all async tasks
+        self.mic_stream = None # PyAudio input stream
+        self.playback_stream = None # PyAudio output stream
+        self.camera_capture = None # OpenCV VideoCapture object
+    async def send_text_input_to_gemini(self, user_text):
+        if not user_text or not self.gemini_session or not self.is_running:
+            print("Warning: Cannot send text. Session not active or no text.")
             return
         try:
+            print(f"Sending text to Gemini: {user_text}")
+            await self.gemini_session.send(input=user_text, end_of_turn=True)
         except Exception as e:
+            st.error(f"Error sending text message to Gemini: {e}")
+            print(f"Traceback for send_text_input_to_gemini: {traceback.format_exc()}")
+    def _process_camera_frame(self):
+        if not self.camera_capture or not self.camera_capture.isOpened():
+            print("Camera not available or not open.")
+            return None
+        ret, frame = self.camera_capture.read()
         if not ret:
+            print("Failed to read frame from camera.")
             return None
         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         preview_img.thumbnail(VIDEO_PREVIEW_RESIZE)
         api_img = img.copy()
+        api_img.thumbnail(VIDEO_API_RESIZE) # Preserves aspect ratio
         image_io = io.BytesIO()
         api_img.save(image_io, format="jpeg")
+        image_bytes = image_io.getvalue()
         return {
             "preview": preview_img,
+            "api_data": {"mime_type": "image/jpeg", "data": base64.b64encode(image_bytes).decode()}
         }
+    async def stream_camera_frames(self):
         try:
+            self.camera_capture = await asyncio.to_thread(cv2.VideoCapture, 0) # 0 for default camera
+            if not self.camera_capture.isOpened():
+                st.error("Could not open camera. Please check permissions and availability.")
+                self.is_running = False
                 return
+            while self.is_running:
+                frame_data = await asyncio.to_thread(self._process_camera_frame)
+                if frame_data:
+                    st.session_state['current_frame_preview'] = frame_data["preview"]
+                    if self.media_to_gemini_queue.full():
+                        await self.media_to_gemini_queue.get() # Make space
+                    await self.media_to_gemini_queue.put(frame_data["api_data"])
                 await asyncio.sleep(1.0 / VIDEO_FPS_LIMIT)
         except Exception as e:
+            # st.error(f"Camera streaming error: {e}") # Avoid st.error from non-main thread if problematic
+            print(f"Camera streaming error: {e}\nTraceback: {traceback.format_exc()}")
+            self.is_running = False
         finally:
+            if self.camera_capture:
+                await asyncio.to_thread(self.camera_capture.release)
+                self.camera_capture = None
+            print("Camera streaming task finished.")
+    def _process_screen_frame(self):
+        with mss.mss() as sct:
+            monitor_index = 1
+            if len(sct.monitors) <= monitor_index:
+                monitor_index = 0
+            monitor = sct.monitors[monitor_index]
+            sct_img = sct.grab(monitor)
+            img = PIL.Image.frombytes("RGB", (sct_img.width, sct_img.height), sct_img.rgb)
+            preview_img = img.copy()
+            preview_img.thumbnail(VIDEO_PREVIEW_RESIZE)
+            api_img = img.copy()
+            api_img.thumbnail(VIDEO_API_RESIZE)
+            image_io = io.BytesIO()
+            api_img.save(image_io, format="jpeg")
+            image_bytes = image_io.getvalue()
+            return {
+                "preview": preview_img,
+                "api_data": {"mime_type": "image/jpeg", "data": base64.b64encode(image_bytes).decode()}
             }
+    async def stream_screen_frames(self):
         try:
+            while self.is_running:
+                frame_data = await asyncio.to_thread(self._process_screen_frame)
+                if frame_data:
+                    st.session_state['current_frame_preview'] = frame_data["preview"]
+                    if self.media_to_gemini_queue.full():
+                        await self.media_to_gemini_queue.get()
+                    await self.media_to_gemini_queue.put(frame_data["api_data"])
                 await asyncio.sleep(1.0 / VIDEO_FPS_LIMIT)
         except Exception as e:
+            # st.error(f"Screen capture error: {e}")
+            print(f"Screen capture error: {e}\nTraceback: {traceback.format_exc()}")
+            self.is_running = False
+        finally:
+            print("Screen streaming task finished.")
+    async def stream_media_to_gemini(self):
         try:
+            while self.is_running:
+                if not self.gemini_session:
+                    await asyncio.sleep(0.1)
                     continue
                 try:
+                    media_chunk = await asyncio.wait_for(self.media_to_gemini_queue.get(), timeout=1.0)
+                    if media_chunk and self.gemini_session and self.is_running:
+                         await self.gemini_session.send(input=media_chunk)
+                    if media_chunk: # Avoid task_done on None if used as sentinel
+                        self.media_to_gemini_queue.task_done()
                 except asyncio.TimeoutError:
+                    continue
                 except Exception as e:
+                    if self.is_running:
+                        print(f"Error in stream_media_to_gemini: {e}")
+                        await asyncio.sleep(0.1)
         except asyncio.CancelledError:
+            print("stream_media_to_gemini task cancelled.")
         finally:
+            print("Media streaming to Gemini task finished.")
+    async def capture_microphone_audio(self):
         try:
             mic_info = await asyncio.to_thread(pya.get_default_input_device_info)
+            self.mic_stream = await asyncio.to_thread(
                 pya.open,
+                format=FORMAT, channels=CHANNELS, rate=SEND_SAMPLE_RATE,
+                input=True, input_device_index=mic_info["index"],
                 frames_per_buffer=CHUNK_SIZE,
             )
             print("Microphone stream opened.")
+            while self.is_running:
                 try:
+                    audio_data = await asyncio.to_thread(self.mic_stream.read, CHUNK_SIZE, exception_on_overflow=False)
+                    if self.media_to_gemini_queue.full():
+                        await self.media_to_gemini_queue.get()
+                    await self.media_to_gemini_queue.put({"data": audio_data, "mime_type": "audio/pcm"})
+                except IOError as e:
+                    if e.errno == pyaudio.paInputOverflowed: # type: ignore
+                        print("Microphone Input overflowed. Skipping.")
                     else:
+                        print(f"Microphone read IOError: {e}")
+                        self.is_running = False; break
                 except Exception as e:
+                    print(f"Error in capture_microphone_audio: {e}")
+                    await asyncio.sleep(0.01)
         except Exception as e:
+            # st.error(f"Failed to open microphone: {e}. Please check permissions.")
+            print(f"Failed to open microphone: {e}\nTraceback: {traceback.format_exc()}")
+            self.is_running = False
         finally:
+            if self.mic_stream:
+                await asyncio.to_thread(self.mic_stream.stop_stream)
+                await asyncio.to_thread(self.mic_stream.close)
+                self.mic_stream = None
+            print("Microphone capture task finished.")
+    async def process_gemini_responses(self):
         try:
+            while self.is_running:
+                if not self.gemini_session:
+                    await asyncio.sleep(0.1); continue
                 try:
+                    turn_response = self.gemini_session.receive()
+                    async for chunk in turn_response:
+                        if not self.is_running: break
+                        if audio_data := chunk.data:
+                            if not self.audio_playback_queue.full():
+                                self.audio_playback_queue.put_nowait(audio_data)
                             else:
+                                print("Audio playback queue full, discarding data.")
+                        if text_response := chunk.text:
+                            # Schedule Streamlit update from the main thread if possible,
+                            # or use st.session_state and rely on rerun.
+                            st.session_state['chat_messages'] = st.session_state['chat_messages'] + [{"role": "assistant", "content": text_response}]
+                            # If immediate UI update is needed and safe:
+                            # st.experimental_rerun() # Use with caution from background threads
                 except types.generation_types.StopCandidateException:
+                    print("Gemini response stream ended (StopCandidateException).")
                 except Exception as e:
+                    if self.is_running:
                         print(f"Error receiving from Gemini: {e}")
+                        await asyncio.sleep(0.1)
         except asyncio.CancelledError:
+            print("process_gemini_responses task cancelled.")
         finally:
+            print("Gemini response processing task finished.")
+    async def play_gemini_audio(self):
         try:
+            self.playback_stream = await asyncio.to_thread(
+                pya.open, format=FORMAT, channels=CHANNELS, rate=RECEIVE_SAMPLE_RATE, output=True
             )
             print("Audio playback stream opened.")
+            while self.is_running:
                 try:
+                    audio_chunk = await asyncio.wait_for(self.audio_playback_queue.get(), timeout=1.0)
+                    if audio_chunk: # Check if not None (sentinel)
+                        await asyncio.to_thread(self.playback_stream.write, audio_chunk)
+                    if audio_chunk: # Avoid task_done on None
+                        self.audio_playback_queue.task_done()
                 except asyncio.TimeoutError:
+                    continue
                 except Exception as e:
                     print(f"Error playing audio: {e}")
+                    await asyncio.sleep(0.01)
         except Exception as e:
+            # st.error(f"Failed to open audio playback stream: {e}")
+            print(f"Failed to open audio playback stream: {e}\nTraceback: {traceback.format_exc()}")
+            self.is_running = False
         finally:
+            if self.playback_stream:
+                await asyncio.to_thread(self.playback_stream.stop_stream)
+                await asyncio.to_thread(self.playback_stream.close)
+                self.playback_stream = None
+            print("Audio playback task finished.")
+    def signal_stop(self):
+        print("Signal to stop AudioVisualLoop received.")
+        self.is_running = False
+        if self.media_to_gemini_queue: self.media_to_gemini_queue.put_nowait(None)
+        if self.audio_playback_queue: self.audio_playback_queue.put_nowait(None)
+    async def run_main_loop(self):
+        self.async_event_loop = asyncio.get_running_loop()
+        self.is_running = True
+        # st.session_state['session_active'] = True # Set by caller in Streamlit thread
+        print("AudioVisualLoop starting...")
+        # Ensure client is not None before using (should be handled by global init)
+        if client is None:
+            print("Error: Gemini client is not initialized.")
+            st.error("Critical Error: Gemini client failed to initialize. Cannot start session.")
+            st.session_state['session_active'] = False
+            return
         try:
+            # Use the global 'client' instance
+            async with client.aio.live.connect(model=MODEL_NAME, config=LIVE_CONNECT_CONFIG) as session:
+                self.gemini_session = session
                 print("Gemini session established.")
                 try:
                     print("Sending system prompt to Gemini...")
+                    await self.gemini_session.send(input=MEDICAL_ASSISTANT_SYSTEM_PROMPT, end_of_turn=False)
+                    print("System prompt sent successfully.")
                 except Exception as e:
+                    # st.error(f"Failed to send system prompt to Gemini: {e}")
+                    print(f"Failed to send system prompt to Gemini: {e}\nTraceback: {traceback.format_exc()}")
+                    self.is_running = False
+                    return
+                self.audio_playback_queue = asyncio.Queue(maxsize=AUDIO_PLAYBACK_QUEUE_MAXSIZE)
+                self.media_to_gemini_queue = asyncio.Queue(maxsize=MEDIA_OUT_QUEUE_MAXSIZE)
                 async with asyncio.TaskGroup() as tg:
+                    print("Creating async tasks...")
+                    tg.create_task(self.stream_media_to_gemini(), name="stream_media_to_gemini")
+                    tg.create_task(self.capture_microphone_audio(), name="capture_microphone_audio")
                     if self.video_mode == "camera":
+                        tg.create_task(self.stream_camera_frames(), name="stream_camera_frames")
                     elif self.video_mode == "screen":
+                        tg.create_task(self.stream_screen_frames(), name="stream_screen_frames")
+                    tg.create_task(self.process_gemini_responses(), name="process_gemini_responses")
+                    tg.create_task(self.play_gemini_audio(), name="play_gemini_audio")
+                    print("All async tasks created in TaskGroup.")
+                print("TaskGroup finished execution.")
         except asyncio.CancelledError:
+            print("AudioVisualLoop.run_main_loop() was cancelled.")
+        except ExceptionGroup as eg:
+            # st.error(f"An error occurred in one of the concurrent tasks: {eg.exceptions[0]}")
+            print(f"ExceptionGroup caught in AudioVisualLoop: {eg}")
             for i, exc in enumerate(eg.exceptions):
+                print(f"  Task Exception {i+1}/{len(eg.exceptions)}: {type(exc).__name__}: {exc}")
                 traceback.print_exception(type(exc), exc, exc.__traceback__)
+        except Exception as e: # This catches the ConnectionClosedError
+            # st.error(f"A critical error occurred in the main session loop: {e}")
+            print(f"General Exception in AudioVisualLoop: {type(e).__name__}: {e}")
+            traceback.print_exception(e) # Print full traceback for debugging
         finally:
+            print("AudioVisualLoop.run_main_loop() finishing...")
+            self.is_running = False
+            # st.session_state['session_active'] = False # Set by caller in Streamlit thread
+            self.gemini_session = None
+            print("AudioVisualLoop finished.")
+# --- Streamlit UI ---
+def run_streamlit_app():
+    st.title("Live AI Medical Assistant")
     with st.sidebar:
+        st.header("Session Control")
         video_mode_options = ["camera", "screen", "none"]
+        current_video_mode = st.session_state.get('video_mode_selection', DEFAULT_VIDEO_MODE)
+        selected_video_mode = st.selectbox(
+            "Video Source:",
+            video_mode_options,
+            index=video_mode_options.index(current_video_mode),
+            disabled=st.session_state.get('session_active', False)
+        )
+        if selected_video_mode != current_video_mode: # Update if changed and not active
+             st.session_state['video_mode_selection'] = selected_video_mode
+        if not st.session_state.get('session_active', False):
+            if st.button("🚀 Start Session", type="primary", use_container_width=True):
+                st.session_state['session_active'] = True # Set active before starting thread
+                st.session_state['chat_messages'] = [{
                     "role": "system",
                     "content": (
+                        "Medical Assistant is activating. The AI has been instructed on its role to visually assist you. "
+                        "Remember: This AI cannot provide medical diagnoses or replace consultation with a healthcare professional."
                     )
                 }]
+                st.session_state['current_frame_preview'] = None
+                audio_loop_instance = AudioVisualLoop(video_mode_setting=st.session_state['video_mode_selection'])
+                st.session_state['audio_loop_instance'] = audio_loop_instance
+                threading.Thread(target=lambda: asyncio.run(audio_loop_instance.run_main_loop()), daemon=True).start()
+                st.success("Session starting... Please wait for initialization.")
+                time.sleep(1)
+                st.rerun()
+        else:
+            if st.button("🛑 Stop Session", type="secondary", use_container_width=True):
+                if st.session_state.get('audio_loop_instance'):
+                    st.session_state['audio_loop_instance'].signal_stop()
+                st.session_state['audio_loop_instance'] = None
+                st.session_state['session_active'] = False # Set inactive
+                st.warning("Session stopping... Please wait.")
+                time.sleep(1)
+                st.rerun()
+    col_video, col_chat = st.columns([2, 3])
+    with col_video:
+        st.subheader("Live Feed")
+        if st.session_state.get('session_active', False) and st.session_state.get('video_mode_selection', DEFAULT_VIDEO_MODE) != "none":
+            if st.session_state.get('current_frame_preview') is not None:
+                st.image(st.session_state['current_frame_preview'], caption="Live Video Feed", use_column_width=True)
+            else:
+                st.info("Waiting for video feed...")
+        elif st.session_state.get('video_mode_selection', DEFAULT_VIDEO_MODE) != "none":
+            st.info("Video feed will appear here once the session starts.")
         else:
+            st.info("Video input is disabled for this session.")
+    with col_chat:
+        st.subheader("Chat with Assistant")
+        # Display chat messages from session state
+        for msg in st.session_state.get('chat_messages', []):
+            with st.chat_message(msg["role"]):
+                st.write(msg["content"])
+        user_chat_input = st.chat_input(
+            "Type your message or ask about the video...",
+            key="user_chat_input_box",
+            disabled=not st.session_state.get('session_active', False)
+        )
+        if user_chat_input:
+            # Append user message and rerun to display it immediately
+            st.session_state['chat_messages'] = st.session_state.get('chat_messages', []) + [{"role": "user", "content": user_chat_input}]
+            loop_instance = st.session_state.get('audio_loop_instance')
+            if loop_instance and loop_instance.async_event_loop and loop_instance.gemini_session:
+                if loop_instance.async_event_loop.is_running():
+                    asyncio.run_coroutine_threadsafe(
+                        loop_instance.send_text_input_to_gemini(user_chat_input),
+                        loop_instance.async_event_loop
+                    )
                 else:
+                    st.error("Session event loop is not running. Cannot send message.")
+            elif not loop_instance or not st.session_state.get('session_active', False):
+                 st.error("Session is not active. Please start a session to send messages.")
             else:
+                 st.warning("Session components not fully ready. Please wait a moment.")
+            st.rerun() # Rerun to show user message and any quick AI text response
 if __name__ == "__main__":
+    run_streamlit_app()