File size: 5,060 Bytes
4181d96 bcd0310 8f9154b bcd0310 fa063fb bcd0310 4181d96 2fe3f2a 4181d96 fa063fb 4181d96 fa063fb 4181d96 fa063fb 4181d96 f056893 4181d96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 |
"""import gradio as gr
from huggingface_hub import InferenceClient
def respond(
message,
history: list[dict[str, str]],
system_message,
max_tokens,
temperature,
top_p,
hf_token: gr.OAuthToken,
):
#For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
messages = [{"role": "system", "content": system_message}]
messages.extend(history)
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
choices = message.choices
token = ""
if len(choices) and choices[0].delta.content:
token = choices[0].delta.content
response += token
yield response
#For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
chatbot = gr.ChatInterface(
respond,
type="messages",
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
with gr.Blocks() as demo:
with gr.Sidebar():
gr.LoginButton()
chatbot.render()
if __name__ == "__main__":
demo.launch()
"""
import gradio as gr
import requests
from huggingface_hub import InferenceClient
DEEPGRAM_API_KEY = "0c72698eb40f85fc25b56a76039e795be653afed"
def deepgram_stt(audio_file_path):
#Send user microphone audio to Deepgram STT
url = "https://api.deepgram.com/v1/listen"
headers = {
"Authorization": f"Token {DEEPGRAM_API_KEY}",
"Content-Type": "audio/wav"
}
with open(audio_file_path, "rb") as f:
audio = f.read()
response = requests.post(url, headers=headers, data=audio).json()
return response["results"]["channels"][0]["alternatives"][0]["transcript"]
def deepgram_tts(text):
#Convert model output → speech using Deepgram TTS
url = "https://api.deepgram.com/v1/speak?model=aura-asteria-en" # any model
headers = {
"Authorization": f"Token {DEEPGRAM_API_KEY}",
"Content-Type": "application/json"
}
payload = {"text": text}
audio_out = "response.wav"
r = requests.post(url, json=payload, headers=headers)
with open(audio_out, "wb") as f:
f.write(r.content)
return audio_out
def respond_audio(
audio_input,
history,
system_message,
max_tokens,
temperature,
top_p,
hf_token: gr.OAuthToken,
):
#STT → send to model → TTS
client = InferenceClient(
token=hf_token.token,
model="openai/gpt-oss-20b"
)
# ---- 1. Speech → text ----
user_message = deepgram_stt(audio_input)
messages = [{"role": "system", "content": system_message}]
messages.extend(history)
messages.append({"role": "user", "content": user_message})
# ---- 2. Model response ----
response_text = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
if len(message.choices) and message.choices[0].delta.content:
response_text += message.choices[0].delta.content
yield response_text, None # update text while streaming
# ---- 3. Text → audio ----
audio_file = deepgram_tts(response_text)
yield response_text, audio_file
with gr.Blocks() as demo:
with gr.Sidebar():
gr.LoginButton()
gr.Markdown("## 🎤 Voice Chat Mode (Deepgram + GPT-OSS)")
# Hidden but expandable textbox
with gr.Accordion("Optional: Type Instead of Speaking", open=False):
typed_message = gr.Textbox(label="Manual Text Input")
chatbot = gr.Chatbot(type="messages")
audio_in = gr.Audio(label="Press to Speak", type="filepath")
audio_out = gr.Audio(label="TTS Output")
system_message = gr.Textbox(
value="You are a friendly Chatbot.",
label="System message"
)
max_tokens = gr.Slider(1, 2048, value=512, label="Max new tokens")
temp = gr.Slider(0.1, 4.0, value=0.7, label="Temperature")
top_p = gr.Slider(0.1, 1.0, value=0.95, label="Top-p")
send_button = gr.Button("Send (Voice)")
send_button.click(
respond_audio,
inputs=[audio_in, chatbot, system_message, max_tokens, temp, top_p],
outputs=[chatbot, audio_out]
)
if __name__ == "__main__":
demo.launch() |