Spaces:
Runtime error
Runtime error
| import os | |
| import time | |
| import requests | |
| import gradio as gr | |
| import numpy as np | |
| from dotenv import load_dotenv | |
| from elevenlabs import ElevenLabs | |
| from fastapi import FastAPI | |
| from fastrtc import ( | |
| AdditionalOutputs, | |
| ReplyOnPause, | |
| Stream, | |
| get_stt_model, | |
| get_twilio_turn_credentials, | |
| ) | |
| from gradio.utils import get_space | |
| from numpy.typing import NDArray | |
| # Load environment variables | |
| load_dotenv() | |
| # Initialize DeepSeek client | |
| class DeepSeekAPI: | |
| def __init__(self, api_key): | |
| self.api_key = api_key | |
| def chat_completion(self, messages, temperature=0.7, max_tokens=512): | |
| url = "https://api.deepseek.com/v1/chat/completions" | |
| headers = { | |
| "Content-Type": "application/json", | |
| "Authorization": f"Bearer {self.api_key}" | |
| } | |
| payload = { | |
| "model": "deepseek-chat", | |
| "messages": messages, | |
| "temperature": temperature, | |
| "max_tokens": max_tokens | |
| } | |
| response = requests.post(url, json=payload, headers=headers) | |
| # Check for error response | |
| if response.status_code != 200: | |
| print(f"DeepSeek API error: {response.status_code} - {response.text}") | |
| return {"choices": [{"message": {"content": "I'm sorry, I encountered an error processing your request."}}]} | |
| return response.json() | |
| # Initialize clients | |
| deepseek_client = DeepSeekAPI(api_key=os.getenv("DEEPSEEK_API_KEY")) | |
| tts_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY")) | |
| stt_model = get_stt_model() | |
| # Set up Twilio credentials for WebRTC | |
| # The function doesn't accept keyword arguments, it reads from env vars directly | |
| twilio_credentials = get_twilio_turn_credentials() | |
| # Log Twilio status | |
| if twilio_credentials: | |
| print("Twilio TURN credentials successfully configured") | |
| else: | |
| print("No Twilio credentials found or invalid credentials") | |
| # Handler function for voice conversation | |
| def response( | |
| audio: tuple[int, NDArray[np.int16 | np.float32]], | |
| chatbot: list[dict] | None = None, | |
| ): | |
| chatbot = chatbot or [] | |
| messages = [{"role": d["role"], "content": d["content"]} for d in chatbot] | |
| start = time.time() | |
| text = stt_model.stt(audio) | |
| print("transcription", time.time() - start) | |
| print("prompt", text) | |
| chatbot.append({"role": "user", "content": text}) | |
| yield AdditionalOutputs(chatbot) | |
| messages.append({"role": "user", "content": text}) | |
| # Replace Groq LLM with DeepSeek | |
| response_data = deepseek_client.chat_completion( | |
| messages=messages, | |
| max_tokens=512 | |
| ) | |
| response_text = response_data["choices"][0]["message"]["content"] | |
| chatbot.append({"role": "assistant", "content": response_text}) | |
| for chunk in tts_client.text_to_speech.convert_as_stream( | |
| text=response_text, | |
| voice_id="JBFqnCBsd6RMkjVDRZzb", | |
| model_id="eleven_multilingual_v2", | |
| output_format="pcm_24000", | |
| ): | |
| audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1) | |
| yield (24000, audio_array) | |
| yield AdditionalOutputs(chatbot) | |
| # Create the chatbot and Stream components | |
| chatbot = gr.Chatbot(type="messages") | |
| stream = Stream( | |
| modality="audio", | |
| mode="send-receive", | |
| handler=ReplyOnPause(response, input_sample_rate=16000), | |
| additional_outputs_handler=lambda a, b: b, | |
| additional_inputs=[chatbot], | |
| additional_outputs=[chatbot], | |
| rtc_configuration=twilio_credentials, # Always use Twilio credentials | |
| concurrency_limit=5 if get_space() else None, | |
| time_limit=90 if get_space() else None, | |
| ui_args={"title": "LLM Voice Chat (Powered by DeepSeek, ElevenLabs, and WebRTC ⚡️)"}, | |
| ) | |
| # Mount the STREAM UI to the FastAPI app | |
| app = FastAPI() | |
| app = gr.mount_gradio_app(app, stream.ui, path="/") | |
| if __name__ == "__main__": | |
| import os | |
| os.environ["GRADIO_SSR_MODE"] = "false" | |
| if (mode := os.getenv("MODE")) == "UI": | |
| stream.ui.launch(server_port=7860) | |
| elif mode == "PHONE": | |
| stream.fastphone(host="0.0.0.0", port=7860) | |
| else: | |
| stream.ui.launch(server_port=7860) | |