Spaces:

RSHVR
/

Command_RTC

Sleeping

App Files Files Community

RSHVR commited on Mar 30, 2025

Commit

359bac7

verified ·

1 Parent(s): 9cfc5dc

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -17

app.py CHANGED Viewed

@@ -1,20 +1,13 @@
 import os
 import gradio as gr
-from fastrtc import Stream, ReplyOnPause, AdditionalOutputs
 # Import your custom models
 from tts import tortoise_tts, TortoiseOptions
 from stt import whisper_stt
 import cohereAPI
-# Try to import HumAware-VAD, install if not available
-try:
-    from humaware_vad import HumAwareVADModel
-except ImportError:
-    print("Installing humaware-vad...")
-    subprocess.check_call([sys.executable, "-m", "pip", "install", "humaware-vad"])
-    from humaware_vad import HumAwareVADModel
 # Environment variables
 COHERE_API_KEY = os.getenv("COHERE_API_KEY")
 system_message = "You respond concisely, in about 15 words or less"
@@ -22,9 +15,6 @@ system_message = "You respond concisely, in about 15 words or less"
 # Initialize conversation history
 conversation_history = []
-# Initialize the HumAware-VAD model
-vad_model = HumAwareVADModel()
 # Create a handler function that uses both your custom models
 def response(audio):
     global conversation_history
@@ -32,7 +22,7 @@ def response(audio):
     # Convert speech to text using your Whisper model
     user_message = whisper_stt.stt(audio)
-    # Yield the transcription
     yield AdditionalOutputs(user_message)
     # Send text to Cohere API
@@ -56,18 +46,21 @@ def response(audio):
     for chunk in tortoise_tts.stream_tts_sync(response_text, tts_options):
         yield chunk
-# Create the FastRTC stream with HumAware-VAD for better pause detection
 stream = Stream(
-    handler=ReplyOnPause(response, model=vad_model),  # Use HumAware-VAD model
     modality="audio",
     mode="send-receive",
     additional_outputs=[gr.Textbox(label="Transcription")],
-    additional_outputs_handler=lambda old, new: new if old is None else f"{old}\nUser: {new}"
 )
 # Launch the Gradio UI
 if __name__ == "__main__":
-    # Update your requirements.txt to include humaware-vad
     stream.ui.launch(
         server_name="0.0.0.0",
         share=False,

 import os
 import gradio as gr
+from fastrtc import Stream, AdditionalOutputs
+from fastrtc_walkie_talkie import WalkieTalkie
 # Import your custom models
 from tts import tortoise_tts, TortoiseOptions
 from stt import whisper_stt
 import cohereAPI
 # Environment variables
 COHERE_API_KEY = os.getenv("COHERE_API_KEY")
 system_message = "You respond concisely, in about 15 words or less"
 # Initialize conversation history
 conversation_history = []
 # Create a handler function that uses both your custom models
 def response(audio):
     global conversation_history
     # Convert speech to text using your Whisper model
     user_message = whisper_stt.stt(audio)
+    # Yield the transcription as additional output
     yield AdditionalOutputs(user_message)
     # Send text to Cohere API
     for chunk in tortoise_tts.stream_tts_sync(response_text, tts_options):
         yield chunk
+# Create the FastRTC stream with WalkieTalkie for turn detection
 stream = Stream(
+    handler=WalkieTalkie(response),  # Use WalkieTalkie instead of ReplyOnPause
     modality="audio",
     mode="send-receive",
     additional_outputs=[gr.Textbox(label="Transcription")],
+    additional_outputs_handler=lambda old, new: new if old is None else f"{old}\nUser: {new}",
+    ui_args={
+        "title": "Voice Assistant (Walkie-Talkie Style)",
+        "subtitle": "Say 'over' to finish your turn. For example, 'What's the weather like today? over.'"
+    }
 )
 # Launch the Gradio UI
 if __name__ == "__main__":
     stream.ui.launch(
         server_name="0.0.0.0",
         share=False,