datbkpro commited on
Commit
2fec4ad
·
verified ·
1 Parent(s): bd182b1

Update ui/tabs.py

Browse files
Files changed (1) hide show
  1. ui/tabs.py +157 -144
ui/tabs.py CHANGED
@@ -52,29 +52,35 @@ def create_all_tabs(audio_service: AudioService, chat_service: ChatService,
52
  create_language_info_tab(rag_system.multilingual_manager)
53
  with gr.Tab("Stream Object Detection"):
54
  create_streaming_object_detection()
55
- def create_sambanova_voice_tab(sambanova_service):
56
- """Tạo tab Sambanova AI với Voice Input/Output hoàn chỉnh"""
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  with gr.Blocks() as sambanova_tab:
59
- gr.Markdown("## 🤖 Sambanova AI - Voice & Text Complete")
60
- gr.Markdown("Trò chuyện với AI - Hỗ trợ voice input/output hoàn chỉnh")
61
 
62
  # State variables
63
  chatbot = gr.Chatbot(
64
  type="messages",
65
  value=[],
66
  label="💬 Hội thoại",
67
- height=500,
68
- render=False
69
  )
70
  conversation_state = gr.State(value=[])
71
- audio_output_state = gr.State(value=None)
72
 
73
  with gr.Row():
74
  with gr.Column(scale=1):
75
- # Header
76
- gr.Markdown("### 💬 Chat với AI")
77
-
78
  # Model selection
79
  model_dropdown = gr.Dropdown(
80
  choices=sambanova_service.get_available_models(),
@@ -82,23 +88,24 @@ def create_sambanova_voice_tab(sambanova_service):
82
  label="Chọn Model"
83
  )
84
 
85
- # Language selection for TTS
86
  language_dropdown = gr.Dropdown(
87
  choices=['vi', 'en', 'ja', 'ko', 'zh', 'fr', 'es', 'de'],
88
  value='vi',
89
- label="Ngôn ngữ TTS"
 
90
  )
91
 
92
  # Text input
93
  text_input = gr.Textbox(
94
  label="Tin nhắn của bạn",
95
- placeholder="Xin chào! Hãy hỏi tôi bất cứ điều gì...",
96
  lines=3
97
  )
98
 
99
  # Voice input
100
  with gr.Group():
101
- gr.Markdown("### 🎤 Voice Input")
102
  audio_input = gr.Audio(
103
  sources=["microphone"],
104
  type="numpy",
@@ -111,10 +118,11 @@ def create_sambanova_voice_tab(sambanova_service):
111
  temperature = gr.Slider(0, 1, value=0.1, label="Temperature")
112
  top_p = gr.Slider(0, 1, value=0.1, label="Top-P")
113
 
114
- # Voice output toggle
115
  voice_output_toggle = gr.Checkbox(
116
- label="🔊 Bật Voice Output (TTS)",
117
- value=True
 
118
  )
119
 
120
  # Buttons
@@ -130,175 +138,180 @@ def create_sambanova_voice_tab(sambanova_service):
130
  interactive=False
131
  )
132
 
133
- with gr.Column(scale=2):
134
- # Audio output
135
- gr.Markdown("### 🔊 Voice Output")
136
- audio_output = gr.Audio(
137
- label="Giọng nói AI",
138
- autoplay=True,
139
- visible=False
140
- )
141
-
142
- # Streaming output
143
- streaming_output = gr.Textbox(
144
- label="Streaming Response",
145
- lines=5,
146
- max_lines=10
 
 
 
 
 
 
 
 
 
147
  )
148
-
149
- # Model information
150
- gr.Markdown("### 📊 Thông tin")
151
- gr.Markdown("""
152
- **Tính năng:**
153
- ✅ Text chat
154
- ✅ Voice input (STT)
155
- ✅ Voice output (TTS)
156
- ✅ Streaming response
157
- ✅ Đa ngôn ngữ
158
-
159
- **Công nghệ:**
160
- - Sambanova API
161
- - FastRTC STT
162
- - Google/Microsoft TTS
163
- """)
164
 
165
- # Event handlers cho text với voice output
166
- def send_text_message(text, history, state, audio_state, model, language, temp, top_p_val, voice_enabled):
167
- """Gửi tin nhắn text và nhận voice response"""
168
  if not text or not text.strip():
169
- return history, state, audio_state, "❌ Vui lòng nhập tin nhắn", gr.update(visible=False), ""
170
 
171
  try:
172
- # Thêm user message vào history
173
- user_message = {"role": "user", "content": text}
174
- new_history = history + [user_message]
175
- new_state = state + [user_message]
176
-
177
- yield new_history, new_state, audio_state, "⏳ Đang xử lý...", gr.update(visible=False), ""
178
-
179
- # Lấy response từ AI
180
- if voice_enabled:
181
- # Sử dụng voice response
182
- response_data = sambanova_service.generate_response_with_voice(
183
- new_state, model, language
184
- )
185
- ai_text = response_data["text"]
186
- audio_filepath = response_data["audio"]
187
-
188
- # Thêm AI response
189
- ai_message = {"role": "assistant", "content": ai_text}
190
- final_history = new_history + [ai_message]
191
- final_state = new_state + [ai_message]
192
-
193
- # Cập nhật audio output
194
- audio_update = gr.update(value=audio_filepath, visible=True) if audio_filepath else gr.update(visible=False)
195
- new_audio_state = audio_filepath
196
-
197
- yield final_history, final_state, new_audio_state, "✅ Hoàn thành với voice", audio_update, ai_text
198
- else:
199
- # Chỉ text response
200
- ai_text = sambanova_service.generate_response(new_state, model, temp, top_p_val)
201
- ai_message = {"role": "assistant", "content": ai_text}
202
- final_history = new_history + [ai_message]
203
- final_state = new_state + [ai_message]
204
-
205
- yield final_history, final_state, audio_state, "✅ Hoàn thành", gr.update(visible=False), ai_text
206
 
207
  except Exception as e:
208
- print(f"❌ Error: {e}")
209
  error_msg = f"❌ Lỗi: {str(e)}"
210
- yield history, state, audio_state, error_msg, gr.update(visible=False), ""
211
 
212
- # Event handlers cho voice với voice output
213
- def send_voice_message(audio, history, state, audio_state, model, language, temp, top_p_val, voice_enabled):
214
- """Gửi tin nhắn voice và nhận voice response"""
215
  if audio is None:
216
- return history, state, audio_state, "❌ Vui lòng ghi âm tin nhắn", gr.update(visible=False), ""
217
 
218
  try:
219
- # Chuyển speech thành text
220
- yield history, state, audio_state, "🎤 Đang chuyển speech thành text...", gr.update(visible=False), ""
221
 
 
222
  text = sambanova_service.speech_to_text(audio)
223
  if not text:
224
- return history, state, audio_state, "❌ Không thể nhận dạng giọng nói", gr.update(visible=False), ""
 
225
 
226
- # Thêm user message vào history (cả audio và text)
227
- user_audio_message = {"role": "user", "content": gr.Audio(audio)}
228
- user_text_message = {"role": "user", "content": text}
229
 
230
- new_history = history + [user_audio_message]
231
- new_state = state + [user_text_message]
232
 
233
- yield new_history, new_state, audio_state, "⏳ Đang xử lý voice message...", gr.update(visible=False), ""
234
 
235
- # Lấy response từ AI
236
- if voice_enabled:
237
- response_data = sambanova_service.generate_response_with_voice(
238
- new_state, model, language
239
- )
240
- ai_text = response_data["text"]
241
- audio_filepath = response_data["audio"]
242
-
243
- # Thêm AI response
244
- ai_message = {"role": "assistant", "content": ai_text}
245
- final_history = new_history + [ai_message]
246
- final_state = new_state + [ai_message]
247
-
248
- # Cập nhật audio output
249
- audio_update = gr.update(value=audio_filepath, visible=True) if audio_filepath else gr.update(visible=False)
250
- new_audio_state = audio_filepath
251
-
252
- yield final_history, final_state, new_audio_state, "✅ Voice message hoàn thành", audio_update, ai_text
253
- else:
254
- ai_text = sambanova_service.generate_response(new_state, model, temp, top_p_val)
255
- ai_message = {"role": "assistant", "content": ai_text}
256
- final_history = new_history + [ai_message]
257
- final_state = new_state + [ai_message]
258
-
259
- yield final_history, final_state, audio_state, "✅ Hoàn thành", gr.update(visible=False), ai_text
260
 
261
  except Exception as e:
262
- print(f"❌ Voice Error: {e}")
263
  error_msg = f"❌ Lỗi voice: {str(e)}"
264
- yield history, state, audio_state, error_msg, gr.update(visible=False), ""
265
 
266
- def clear_chat():
267
- """Xóa toàn bộ hội thoại"""
268
- return [], [], None, "🔄 Đã xóa hội thoại", gr.update(visible=False), ""
269
 
270
- # Kết nối events cho text
271
  send_text_btn.click(
272
- fn=send_text_message,
273
  inputs=[
274
- text_input, chatbot, conversation_state, audio_output_state,
275
  model_dropdown, language_dropdown, temperature, top_p, voice_output_toggle
276
  ],
277
- outputs=[chatbot, conversation_state, audio_output_state, status, audio_output, streaming_output]
278
  ).then(
279
- fn=lambda: ("", None),
280
- outputs=[text_input, audio_input]
281
  )
282
 
283
- # Kết nối events cho voice
284
  send_voice_btn.click(
285
- fn=send_voice_message,
286
  inputs=[
287
- audio_input, chatbot, conversation_state, audio_output_state,
288
  model_dropdown, language_dropdown, temperature, top_p, voice_output_toggle
289
  ],
290
- outputs=[chatbot, conversation_state, audio_output_state, status, audio_output, streaming_output]
291
  ).then(
292
- fn=lambda: ("", None),
293
- outputs=[text_input, audio_input]
294
  )
295
 
296
  clear_btn.click(
297
- fn=clear_chat,
298
- outputs=[chatbot, conversation_state, audio_output_state, status, audio_output, streaming_output]
299
  )
300
 
301
  return sambanova_tab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
  def create_voice_coding_tab(voice_coding_service):
303
  """Tạo tab Voice Coding đơn giản - Text-based trước"""
304
 
 
52
  create_language_info_tab(rag_system.multilingual_manager)
53
  with gr.Tab("Stream Object Detection"):
54
  create_streaming_object_detection()
55
+ def create_sambanova_voice_tab():
56
+ """Tạo tab Sambanova AI với Voice Input/Output"""
57
+
58
+ # Khởi tạo services
59
+ try:
60
+ tts_service = EnhancedTTSService()
61
+ sambanova_service = SambanovaVoiceService(tts_service=tts_service)
62
+ print("✅ Tất cả services đã được khởi tạo")
63
+ except Exception as e:
64
+ print(f"❌ Lỗi khởi tạo services: {e}")
65
+ # Fallback: chỉ khởi tạo Sambanova service không có TTS
66
+ sambanova_service = SambanovaVoiceService()
67
+ tts_service = None
68
 
69
  with gr.Blocks() as sambanova_tab:
70
+ gr.Markdown("## 🤖 Sambanova AI - Voice & Text")
71
+ gr.Markdown("Trò chuyện với AI - Hỗ trợ voice input/output")
72
 
73
  # State variables
74
  chatbot = gr.Chatbot(
75
  type="messages",
76
  value=[],
77
  label="💬 Hội thoại",
78
+ height=400
 
79
  )
80
  conversation_state = gr.State(value=[])
 
81
 
82
  with gr.Row():
83
  with gr.Column(scale=1):
 
 
 
84
  # Model selection
85
  model_dropdown = gr.Dropdown(
86
  choices=sambanova_service.get_available_models(),
 
88
  label="Chọn Model"
89
  )
90
 
91
+ # Language selection for TTS (chỉ hiển thị nếu có TTS)
92
  language_dropdown = gr.Dropdown(
93
  choices=['vi', 'en', 'ja', 'ko', 'zh', 'fr', 'es', 'de'],
94
  value='vi',
95
+ label="Ngôn ngữ TTS",
96
+ visible=tts_service is not None
97
  )
98
 
99
  # Text input
100
  text_input = gr.Textbox(
101
  label="Tin nhắn của bạn",
102
+ placeholder="Nhập tin nhắn hoặc sử dụng voice...",
103
  lines=3
104
  )
105
 
106
  # Voice input
107
  with gr.Group():
108
+ gr.Markdown("**🎤 Voice Input**")
109
  audio_input = gr.Audio(
110
  sources=["microphone"],
111
  type="numpy",
 
118
  temperature = gr.Slider(0, 1, value=0.1, label="Temperature")
119
  top_p = gr.Slider(0, 1, value=0.1, label="Top-P")
120
 
121
+ # Voice output toggle (chỉ hiển thị nếu có TTS)
122
  voice_output_toggle = gr.Checkbox(
123
+ label="🔊 Bật Voice Output",
124
+ value=True,
125
+ visible=tts_service is not None
126
  )
127
 
128
  # Buttons
 
138
  interactive=False
139
  )
140
 
141
+ with gr.Column(scale=1):
142
+ # Audio output (chỉ hiển thị nếu có TTS)
143
+ if tts_service is not None:
144
+ gr.Markdown("### 🔊 Voice Output")
145
+ audio_output = gr.Audio(
146
+ label="Giọng nói AI",
147
+ autoplay=False,
148
+ visible=True
149
+ )
150
+ else:
151
+ audio_output = gr.Audio(visible=False)
152
+ gr.Markdown("### ℹ️ Thông tin")
153
+ gr.Markdown("""
154
+ **Voice output tạm thời không khả dụng**
155
+ - Vẫn có thể sử dụng voice input
156
+ - Vẫn có thể chat bằng text
157
+ """)
158
+
159
+ # Response display
160
+ response_display = gr.Textbox(
161
+ label="Phản hồi từ AI",
162
+ lines=6,
163
+ interactive=False
164
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
+ # Event handlers
167
+ def process_text_message(text, history, state, model, language, temp, top_p_val, voice_enabled):
168
+ """Xử tin nhắn text"""
169
  if not text or not text.strip():
170
+ return history, state, "❌ Vui lòng nhập tin nhắn", "", gr.update(visible=False)
171
 
172
  try:
173
+ # Thêm user message
174
+ user_msg = {"role": "user", "content": text}
175
+ new_history = history + [user_msg]
176
+ new_state = state + [user_msg]
177
+
178
+ # Hiển thị trạng thái đang xử
179
+ yield new_history, new_state, "⏳ Đang xử lý...", "", gr.update(visible=False)
180
+
181
+ # Lấy response
182
+ ai_text = sambanova_service.generate_response(new_state, model, temp, top_p_val)
183
+ ai_msg = {"role": "assistant", "content": ai_text}
184
+
185
+ final_history = new_history + [ai_msg]
186
+ final_state = new_state + [ai_msg]
187
+
188
+ # Tạo voice output nếu enabled và có TTS
189
+ audio_update = gr.update(visible=False)
190
+ if voice_enabled and tts_service is not None:
191
+ audio_file = sambanova_service.text_to_speech(ai_text, language)
192
+ if audio_file:
193
+ audio_update = gr.update(value=audio_file, visible=True)
194
+
195
+ yield final_history, final_state, "✅ Hoàn thành", ai_text, audio_update
 
 
 
 
 
 
 
 
 
 
 
196
 
197
  except Exception as e:
 
198
  error_msg = f"❌ Lỗi: {str(e)}"
199
+ yield history, state, error_msg, "", gr.update(visible=False)
200
 
201
+ def process_voice_message(audio, history, state, model, language, temp, top_p_val, voice_enabled):
202
+ """Xử tin nhắn voice"""
 
203
  if audio is None:
204
+ return history, state, "❌ Vui lòng ghi âm tin nhắn", "", gr.update(visible=False)
205
 
206
  try:
207
+ # Hiển thị trạng thái đang xử lý
208
+ yield history, state, "🎤 Đang chuyển speech thành text...", "", gr.update(visible=False)
209
 
210
+ # Chuyển speech thành text
211
  text = sambanova_service.speech_to_text(audio)
212
  if not text:
213
+ yield history, state, "❌ Không nhận dạng được giọng nói", "", gr.update(visible=False)
214
+ return
215
 
216
+ # Thêm user message (cả audio và text)
217
+ user_audio_msg = {"role": "user", "content": gr.Audio(audio)}
218
+ user_text_msg = {"role": "user", "content": text}
219
 
220
+ new_history = history + [user_audio_msg]
221
+ new_state = state + [user_text_msg]
222
 
223
+ yield new_history, new_state, "⏳ Đang xử lý voice message...", "", gr.update(visible=False)
224
 
225
+ # Lấy response
226
+ ai_text = sambanova_service.generate_response(new_state, model, temp, top_p_val)
227
+ ai_msg = {"role": "assistant", "content": ai_text}
228
+
229
+ final_history = new_history + [ai_msg]
230
+ final_state = new_state + [ai_msg]
231
+
232
+ # Tạo voice output
233
+ audio_update = gr.update(visible=False)
234
+ if voice_enabled and tts_service is not None:
235
+ audio_file = sambanova_service.text_to_speech(ai_text, language)
236
+ if audio_file:
237
+ audio_update = gr.update(value=audio_file, visible=True)
238
+
239
+ yield final_history, final_state, "✅ Voice message hoàn thành", ai_text, audio_update
 
 
 
 
 
 
 
 
 
 
240
 
241
  except Exception as e:
 
242
  error_msg = f"❌ Lỗi voice: {str(e)}"
243
+ yield history, state, error_msg, "", gr.update(visible=False)
244
 
245
+ def clear_conversation():
246
+ """Xóa hội thoại"""
247
+ return [], [], "🔄 Đã xóa hội thoại", "", gr.update(visible=False)
248
 
249
+ # Kết nối events
250
  send_text_btn.click(
251
+ fn=process_text_message,
252
  inputs=[
253
+ text_input, chatbot, conversation_state,
254
  model_dropdown, language_dropdown, temperature, top_p, voice_output_toggle
255
  ],
256
+ outputs=[chatbot, conversation_state, status, response_display, audio_output]
257
  ).then(
258
+ lambda: "", # Clear text input
259
+ outputs=[text_input]
260
  )
261
 
 
262
  send_voice_btn.click(
263
+ fn=process_voice_message,
264
  inputs=[
265
+ audio_input, chatbot, conversation_state,
266
  model_dropdown, language_dropdown, temperature, top_p, voice_output_toggle
267
  ],
268
+ outputs=[chatbot, conversation_state, status, response_display, audio_output]
269
  ).then(
270
+ lambda: None, # Clear audio input
271
+ outputs=[audio_input]
272
  )
273
 
274
  clear_btn.click(
275
+ fn=clear_conversation,
276
+ outputs=[chatbot, conversation_state, status, response_display, audio_output]
277
  )
278
 
279
  return sambanova_tab
280
+
281
+ def check_environment():
282
+ """Kiểm tra môi trường trước khi chạy"""
283
+ print("🔍 Kiểm tra môi trường...")
284
+
285
+ # Kiểm tra API key
286
+ api_key = os.environ.get("SAMBANOVA_API_KEY")
287
+ if not api_key:
288
+ print("❌ SAMBANOVA_API_KEY không được tìm thấy")
289
+ print("💡 Hãy set environment variable: export SAMBANOVA_API_KEY=your_key")
290
+ return False
291
+ else:
292
+ print("✅ SAMBANOVA_API_KEY: OK")
293
+
294
+ # Kiểm tra dependencies
295
+ try:
296
+ import fastrtc
297
+ print("✅ FastRTC: OK")
298
+ except ImportError:
299
+ print("❌ FastRTC chưa được cài đặt")
300
+ return False
301
+
302
+ try:
303
+ import gtts
304
+ print("✅ gTTS: OK")
305
+ except ImportError:
306
+ print("❌ gTTS chưa được cài đặt")
307
+
308
+ try:
309
+ import edge_tts
310
+ print("✅ edge-tts: OK")
311
+ except ImportError:
312
+ print("❌ edge-tts chưa được cài đặt")
313
+
314
+ return True
315
  def create_voice_coding_tab(voice_coding_service):
316
  """Tạo tab Voice Coding đơn giản - Text-based trước"""
317