WWMachine commited on
Commit
4181d96
·
verified ·
1 Parent(s): ca988f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -1
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
 
@@ -68,3 +68,124 @@ with gr.Blocks() as demo:
68
 
69
  if __name__ == "__main__":
70
  demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
 
 
68
 
69
  if __name__ == "__main__":
70
  demo.launch()
71
+ """
72
+ import gradio as gr
73
+ import requests
74
+ from huggingface_hub import InferenceClient
75
+
76
+ DEEPGRAM_API_KEY = "YOUR_DEEPGRAM_API_KEY"
77
+
78
+ def deepgram_stt(audio_file_path):
79
+ """
80
+ Send user microphone audio to Deepgram STT
81
+ """
82
+ url = "https://api.deepgram.com/v1/listen"
83
+ headers = {
84
+ "Authorization": f"Token {DEEPGRAM_API_KEY}",
85
+ "Content-Type": "audio/wav"
86
+ }
87
+
88
+ with open(audio_file_path, "rb") as f:
89
+ audio = f.read()
90
+
91
+ response = requests.post(url, headers=headers, data=audio).json()
92
+ return response["results"]["channels"][0]["alternatives"][0]["transcript"]
93
+
94
+
95
+ def deepgram_tts(text):
96
+ """
97
+ Convert model output → speech using Deepgram TTS
98
+ """
99
+ url = "https://api.deepgram.com/v1/speak?model=aura-asteria-en" # any model
100
+ headers = {
101
+ "Authorization": f"Token {DEEPGRAM_API_KEY}",
102
+ "Content-Type": "application/json"
103
+ }
104
+
105
+ payload = {"text": text}
106
+
107
+ audio_out = "response.wav"
108
+ r = requests.post(url, json=payload, headers=headers)
109
+
110
+ with open(audio_out, "wb") as f:
111
+ f.write(r.content)
112
+
113
+ return audio_out
114
+
115
+
116
+ def respond_audio(
117
+ audio_input,
118
+ history,
119
+ system_message,
120
+ max_tokens,
121
+ temperature,
122
+ top_p,
123
+ hf_token: gr.OAuthToken,
124
+ ):
125
+ """
126
+ STT → send to model → TTS
127
+ """
128
+ client = InferenceClient(
129
+ token=hf_token.token,
130
+ model="openai/gpt-oss-20b"
131
+ )
132
+
133
+ # ---- 1. Speech → text ----
134
+ user_message = deepgram_stt(audio_input)
135
+
136
+ messages = [{"role": "system", "content": system_message}]
137
+ messages.extend(history)
138
+ messages.append({"role": "user", "content": user_message})
139
+
140
+ # ---- 2. Model response ----
141
+ response_text = ""
142
+ for message in client.chat_completion(
143
+ messages,
144
+ max_tokens=max_tokens,
145
+ stream=True,
146
+ temperature=temperature,
147
+ top_p=top_p,
148
+ ):
149
+ if len(message.choices) and message.choices[0].delta.content:
150
+ response_text += message.choices[0].delta.content
151
+ yield response_text, None # update text while streaming
152
+
153
+ # ---- 3. Text → audio ----
154
+ audio_file = deepgram_tts(response_text)
155
+
156
+ yield response_text, audio_file
157
+
158
+
159
+ with gr.Blocks() as demo:
160
+ with gr.Sidebar():
161
+ gr.LoginButton()
162
+
163
+ gr.Markdown("## 🎤 Voice Chat Mode (Deepgram + GPT-OSS)")
164
+
165
+ # Hidden but expandable textbox
166
+ with gr.Accordion("Optional: Type Instead of Speaking", open=False):
167
+ typed_message = gr.Textbox(label="Manual Text Input")
168
+
169
+ chatbot = gr.Chatbot()
170
+
171
+ audio_in = gr.Audio(source="microphone", type="filepath", label="Press to Speak")
172
+ audio_out = gr.Audio(label="TTS Output")
173
+
174
+ system_message = gr.Textbox(
175
+ value="You are a friendly Chatbot.",
176
+ label="System message"
177
+ )
178
+ max_tokens = gr.Slider(1, 2048, value=512, label="Max new tokens")
179
+ temp = gr.Slider(0.1, 4.0, value=0.7, label="Temperature")
180
+ top_p = gr.Slider(0.1, 1.0, value=0.95, label="Top-p")
181
+
182
+ send_button = gr.Button("Send (Voice)")
183
+
184
+ send_button.click(
185
+ respond_audio,
186
+ inputs=[audio_in, chatbot, system_message, max_tokens, temp, top_p],
187
+ outputs=[chatbot, audio_out]
188
+ )
189
+
190
+ if __name__ == "__main__":
191
+ demo.launch()