Spaces:

jeremierostan
/

Real-Time-Experts

Sleeping

App Files Files Community

jeremierostan commited on Mar 5, 2025

Commit

05a24ef

verified ·

1 Parent(s): 87ab4e6

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -14

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import base64
 import json
 import os
 import pathlib
-from typing import AsyncGenerator, Literal, Dict, List
 import gradio as gr
 import numpy as np
@@ -52,7 +52,6 @@ class GeminiHandler(AsyncStreamHandler):
         expected_layout: Literal["mono"] = "mono",
         output_sample_rate: int = 24000,
         output_frame_size: int = 480,
-        system_prompt: str = SYSTEM_PROMPTS["default"]
     ) -> None:
         super().__init__(
             expected_layout,
@@ -63,14 +62,13 @@ class GeminiHandler(AsyncStreamHandler):
         self.input_queue: asyncio.Queue = asyncio.Queue()
         self.output_queue: asyncio.Queue = asyncio.Queue()
         self.quit: asyncio.Event = asyncio.Event()
-        self.system_prompt = system_prompt
     def copy(self) -> "GeminiHandler":
         return GeminiHandler(
             expected_layout="mono",
             output_sample_rate=self.output_sample_rate,
             output_frame_size=self.output_frame_size,
-            system_prompt=self.system_prompt
         )
     async def start_up(self):
@@ -85,16 +83,14 @@ class GeminiHandler(AsyncStreamHandler):
                 self.system_prompt = custom_prompt
         else:
             api_key, voice_name = None, "Puck"
         client = genai.Client(
             api_key=api_key or os.getenv("GEMINI_API_KEY"),
             http_options={"api_version": "v1alpha"},
         )
-        # Convert the system prompt to a list as required by the API
-        system_instruction_list = [self.system_prompt]
-        # Create config with system_instruction (not system_instructions)
         config = LiveConnectConfig(
             response_modalities=["AUDIO"],  # type: ignore
             speech_config=SpeechConfig(
@@ -104,12 +100,41 @@ class GeminiHandler(AsyncStreamHandler):
                     )
                 )
             ),
-            system_instruction=system_instruction_list,  # Note: singular "instruction" and passed as a list
         )
         async with client.aio.live.connect(
             model="gemini-2.0-flash-exp", config=config
         ) as session:
             async for audio in session.start_stream(
                 stream=self.stream(), mime_type="audio/pcm"
             ):
@@ -168,8 +193,8 @@ stream = Stream(
             value="default",
         ),
         gr.Textbox(
-            label="Custom Prompt (overrides preset if not empty)",
-            placeholder="Enter a custom system prompt",
             value="",
         ),
     ],
@@ -180,8 +205,8 @@ class InputData(BaseModel):
     webrtc_id: str
     voice_name: str
     api_key: str
-    prompt_key: str
-    custom_prompt: str
 app = FastAPI()
@@ -200,7 +225,6 @@ async def index():
     rtc_config = get_twilio_turn_credentials() if get_space() else None
     html_content = (current_dir / "index.html").read_text()
     html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
-    # Also inject the system prompts into the HTML
     html_content = html_content.replace("__SYSTEM_PROMPTS__", json.dumps(SYSTEM_PROMPTS))
     return HTMLResponse(content=html_content)

 import json
 import os
 import pathlib
+from typing import AsyncGenerator, Literal
 import gradio as gr
 import numpy as np
         expected_layout: Literal["mono"] = "mono",
         output_sample_rate: int = 24000,
         output_frame_size: int = 480,
     ) -> None:
         super().__init__(
             expected_layout,
         self.input_queue: asyncio.Queue = asyncio.Queue()
         self.output_queue: asyncio.Queue = asyncio.Queue()
         self.quit: asyncio.Event = asyncio.Event()
+        self.system_prompt = None
     def copy(self) -> "GeminiHandler":
         return GeminiHandler(
             expected_layout="mono",
             output_sample_rate=self.output_sample_rate,
             output_frame_size=self.output_frame_size,
         )
     async def start_up(self):
                 self.system_prompt = custom_prompt
         else:
             api_key, voice_name = None, "Puck"
+            self.system_prompt = None
         client = genai.Client(
             api_key=api_key or os.getenv("GEMINI_API_KEY"),
             http_options={"api_version": "v1alpha"},
         )
+        # Create basic config
         config = LiveConnectConfig(
             response_modalities=["AUDIO"],  # type: ignore
             speech_config=SpeechConfig(
                     )
                 )
             ),
         )
+        # Get model reference
+        model = client.get_model("gemini-2.0-flash-exp")
+        # Apply system prompt if available
+        if self.system_prompt:
+            try:
+                # First try with system_instruction method (newer API versions)
+                model = model.with_system_instructions(self.system_prompt)
+                print(f"Using system prompt via with_system_instructions: {self.system_prompt[:50]}...")
+            except Exception as e:
+                print(f"Could not apply system prompt via with_system_instructions: {e}")
+                # If that fails, we'll handle it in the session
+                pass
+        # Create session
         async with client.aio.live.connect(
             model="gemini-2.0-flash-exp", config=config
         ) as session:
+            # If we couldn't set the system prompt earlier and we have one,
+            # try to send it as the first message
+            if self.system_prompt:
+                try:
+                    # Try to send system prompt as first message
+                    await session.send_message(f"SYSTEM: {self.system_prompt}\n\nPlease acknowledge this system instruction.")
+                    # Wait for a response
+                    async for response in session.stream_response():
+                        # Just need one response to confirm it was received
+                        print("System instruction acknowledged")
+                        break
+                except Exception as e:
+                    print(f"Could not send system prompt as message: {e}")
+            # Now start the audio stream
             async for audio in session.start_stream(
                 stream=self.stream(), mime_type="audio/pcm"
             ):
             value="default",
         ),
         gr.Textbox(
+            label="Custom Prompt",
+            placeholder="Enter a custom system prompt (overrides preset if not empty)",
             value="",
         ),
     ],
     webrtc_id: str
     voice_name: str
     api_key: str
+    prompt_key: str = ""
+    custom_prompt: str = ""
 app = FastAPI()
     rtc_config = get_twilio_turn_credentials() if get_space() else None
     html_content = (current_dir / "index.html").read_text()
     html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
     html_content = html_content.replace("__SYSTEM_PROMPTS__", json.dumps(SYSTEM_PROMPTS))
     return HTMLResponse(content=html_content)