llm-voice-chat

Sleeping

App Files Files Community

llm-voice-chat / app.py

Codeblockz

Update app.py

4e1c01f verified 11 months ago

raw

history blame contribute delete

2.52 kB

	import os
	import time

	import gradio as gr
	import numpy as np
	from dotenv import load_dotenv
	from distil_whisper_fastrtc import get_stt_model
	from fastapi import FastAPI
	from fastrtc import (
	AdditionalOutputs,
	ReplyOnPause,
	Stream,
	get_tts_model,
	get_hf_turn_credentials,
	)
	from gradio.utils import get_space
	from groq import Groq
	from numpy.typing import NDArray

	load_dotenv()
	groq_client = Groq()
	tts_model = get_tts_model()
	stt_model = get_stt_model()
	credentials = get_hf_turn_credentials(token=None)

	# See "Talk to Claude" in Cookbook for an example of how to keep
	# track of the chat history.
	def response(
	audio: tuple[int, NDArray[np.int16 \| np.float32]],
	chatbot: list[dict] \| None = None,
	):
	chatbot = chatbot or []
	messages = [{"role": d["role"], "content": d["content"]} for d in chatbot]
	start = time.time()
	text = stt_model.stt(audio)
	print("transcription", time.time() - start)
	print("prompt", text)
	chatbot.append({"role": "user", "content": text})
	yield AdditionalOutputs(chatbot)
	messages.append({"role": "user", "content": text})
	response_text = (
	groq_client.chat.completions.create(
	model="llama-3.1-8b-instant",
	max_tokens=512,
	messages=messages, # type: ignore
	)
	.choices[0]
	.message.content
	)

	chatbot.append({"role": "assistant", "content": response_text})

	# Convert response to audio using TTS model
	for audio_chunk in tts_model.stream_tts_sync(response_text or ""):
	# Yield the audio chunk
	yield audio_chunk
	yield AdditionalOutputs(chatbot)


	chatbot = gr.Chatbot(type="messages")
	stream = Stream(
	modality="audio",
	mode="send-receive",
	handler=ReplyOnPause(response, input_sample_rate=16000),
	rtc_configuration=credentials,
	additional_outputs_handler=lambda a, b: b,
	additional_inputs=[chatbot],
	additional_outputs=[chatbot],
	concurrency_limit=5 if get_space() else None,
	time_limit=90 if get_space() else None,
	ui_args={"title": "LLM Voice Chat (Powered by Groq, and WebRTC ⚡️)"},
	)

	# Mount the STREAM UI to the FastAPI app
	# Because I don't want to build the UI manually
	app = FastAPI()
	app = gr.mount_gradio_app(app, stream.ui, path="/")


	if __name__ == "__main__":
	import os

	os.environ["GRADIO_SSR_MODE"] = "false"

	if (mode := os.getenv("MODE")) == "UI":
	stream.ui.launch(server_port=7860)
	else:
	stream.ui.launch(server_port=7860)