Spaces:

fastrtc
/

echo-audio

Running

App Files Files Community

freddyaboulton HF Staff commited on Feb 22, 2025

Commit

b105679

verified ·

1 Parent(s): 06820f1

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +7 -52

app.py CHANGED Viewed

@@ -1,66 +1,21 @@
-import os
-import time
 import numpy as np
-from dotenv import load_dotenv
 from fastapi import FastAPI
 from fastapi.responses import RedirectResponse
-from fastrtc import (
-    ReplyOnPause,
-    Stream,
-    get_stt_model,
-    get_tts_model,
-)
 from gradio.utils import get_space
-from numpy.typing import NDArray
-from openai import OpenAI
-load_dotenv()
-sambanova_client = OpenAI(
-    api_key=os.getenv("SAMBANOVA_API_KEY"), base_url="https://api.sambanova.ai/v1"
-)
-stt_model = get_stt_model()
-tts_model = get_tts_model()
-chat_history = [
-    {
-        "role": "system",
-        "content": (
-            "You are a helpful assistant having a spoken conversation."
-            "Please keep your answers short and concise."
-        ),
-    }
-]
-def echo(audio: tuple[int, NDArray[np.int16]]):
-    prompt = stt_model.stt(audio)
-    print("prompt", prompt)
-    chat_history.append({"role": "user", "content": prompt})
-    start_time = time.time()
-    response = sambanova_client.chat.completions.create(
-        model="Meta-Llama-3.2-3B-Instruct",
-        messages=chat_history,
-        max_tokens=200,
-    )
-    end_time = time.time()
-    print("time taken inference", end_time - start_time)
-    prompt = response.choices[0].message.content
-    chat_history.append({"role": "assistant", "content": prompt})
-    start_time = time.time()
-    for audio_chunk in tts_model.stream_tts_sync(prompt):
-        yield audio_chunk
-    end_time = time.time()
-    print("time taken tts", end_time - start_time)
 stream = Stream(
-    handler=ReplyOnPause(echo),
     modality="audio",
     mode="send-receive",
-    rtc_configuration=None,  # get_twilio_turn_credentials() if get_space() else None,
     concurrency_limit=20 if get_space() else None,
 )

 import numpy as np
 from fastapi import FastAPI
 from fastapi.responses import RedirectResponse
+from fastrtc import ReplyOnPause, Stream, get_twilio_turn_credentials
 from gradio.utils import get_space
+def detection(audio: tuple[int, np.ndarray]):
+    # Implement any iterator that yields audio
+    # See "LLM Voice Chat" for a more complete example
+    yield audio
 stream = Stream(
+    handler=ReplyOnPause(detection),
     modality="audio",
     mode="send-receive",
+    rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
     concurrency_limit=20 if get_space() else None,
 )