Spaces:
Sleeping
Sleeping
| import os | |
| import time | |
| import gradio as gr | |
| import numpy as np | |
| from dotenv import load_dotenv | |
| from distil_whisper_fastrtc import get_stt_model | |
| from fastapi import FastAPI | |
| from fastrtc import ( | |
| AdditionalOutputs, | |
| ReplyOnPause, | |
| Stream, | |
| get_tts_model, | |
| get_hf_turn_credentials, | |
| ) | |
| from gradio.utils import get_space | |
| from groq import Groq | |
| from numpy.typing import NDArray | |
| load_dotenv() | |
| groq_client = Groq() | |
| tts_model = get_tts_model() | |
| stt_model = get_stt_model() | |
| credentials = get_hf_turn_credentials(token=None) | |
| # See "Talk to Claude" in Cookbook for an example of how to keep | |
| # track of the chat history. | |
| def response( | |
| audio: tuple[int, NDArray[np.int16 | np.float32]], | |
| chatbot: list[dict] | None = None, | |
| ): | |
| chatbot = chatbot or [] | |
| messages = [{"role": d["role"], "content": d["content"]} for d in chatbot] | |
| start = time.time() | |
| text = stt_model.stt(audio) | |
| print("transcription", time.time() - start) | |
| print("prompt", text) | |
| chatbot.append({"role": "user", "content": text}) | |
| yield AdditionalOutputs(chatbot) | |
| messages.append({"role": "user", "content": text}) | |
| response_text = ( | |
| groq_client.chat.completions.create( | |
| model="llama-3.1-8b-instant", | |
| max_tokens=512, | |
| messages=messages, # type: ignore | |
| ) | |
| .choices[0] | |
| .message.content | |
| ) | |
| chatbot.append({"role": "assistant", "content": response_text}) | |
| # Convert response to audio using TTS model | |
| for audio_chunk in tts_model.stream_tts_sync(response_text or ""): | |
| # Yield the audio chunk | |
| yield audio_chunk | |
| yield AdditionalOutputs(chatbot) | |
| chatbot = gr.Chatbot(type="messages") | |
| stream = Stream( | |
| modality="audio", | |
| mode="send-receive", | |
| handler=ReplyOnPause(response, input_sample_rate=16000), | |
| rtc_configuration=credentials, | |
| additional_outputs_handler=lambda a, b: b, | |
| additional_inputs=[chatbot], | |
| additional_outputs=[chatbot], | |
| concurrency_limit=5 if get_space() else None, | |
| time_limit=90 if get_space() else None, | |
| ui_args={"title": "LLM Voice Chat (Powered by Groq, and WebRTC ⚡️)"}, | |
| ) | |
| # Mount the STREAM UI to the FastAPI app | |
| # Because I don't want to build the UI manually | |
| app = FastAPI() | |
| app = gr.mount_gradio_app(app, stream.ui, path="/") | |
| if __name__ == "__main__": | |
| import os | |
| os.environ["GRADIO_SSR_MODE"] = "false" | |
| if (mode := os.getenv("MODE")) == "UI": | |
| stream.ui.launch(server_port=7860) | |
| else: | |
| stream.ui.launch(server_port=7860) | |