Spaces:

datbkpro
/

voicebot

Sleeping

App Files Files Community

datbkpro commited on Nov 19, 2025

Commit

2fec4ad

verified ·

1 Parent(s): bd182b1

Update ui/tabs.py

Browse files

Files changed (1) hide show

ui/tabs.py +157 -144

ui/tabs.py CHANGED Viewed

@@ -52,29 +52,35 @@ def create_all_tabs(audio_service: AudioService, chat_service: ChatService,
         create_language_info_tab(rag_system.multilingual_manager)
     with gr.Tab("Stream Object Detection"):
         create_streaming_object_detection()
-def create_sambanova_voice_tab(sambanova_service):
-    """Tạo tab Sambanova AI với Voice Input/Output hoàn chỉnh"""
     with gr.Blocks() as sambanova_tab:
-        gr.Markdown("## 🤖 Sambanova AI - Voice & Text Complete")
-        gr.Markdown("Trò chuyện với AI - Hỗ trợ voice input/output hoàn chỉnh")
         # State variables
         chatbot = gr.Chatbot(
             type="messages",
             value=[],
             label="💬 Hội thoại",
-            height=500,
-            render=False
         )
         conversation_state = gr.State(value=[])
-        audio_output_state = gr.State(value=None)
         with gr.Row():
             with gr.Column(scale=1):
-                # Header
-                gr.Markdown("### 💬 Chat với AI")
                 # Model selection
                 model_dropdown = gr.Dropdown(
                     choices=sambanova_service.get_available_models(),
@@ -82,23 +88,24 @@ def create_sambanova_voice_tab(sambanova_service):
                     label="Chọn Model"
                 )
-                # Language selection for TTS
                 language_dropdown = gr.Dropdown(
                     choices=['vi', 'en', 'ja', 'ko', 'zh', 'fr', 'es', 'de'],
                     value='vi',
-                    label="Ngôn ngữ TTS"
                 )
                 # Text input
                 text_input = gr.Textbox(
                     label="Tin nhắn của bạn",
-                    placeholder="Xin chào! Hãy hỏi tôi bất cứ điều gì...",
                     lines=3
                 )
                 # Voice input
                 with gr.Group():
-                    gr.Markdown("### 🎤 Voice Input")
                     audio_input = gr.Audio(
                         sources=["microphone"],
                         type="numpy",
@@ -111,10 +118,11 @@ def create_sambanova_voice_tab(sambanova_service):
                     temperature = gr.Slider(0, 1, value=0.1, label="Temperature")
                     top_p = gr.Slider(0, 1, value=0.1, label="Top-P")
-                # Voice output toggle
                 voice_output_toggle = gr.Checkbox(
-                    label="🔊 Bật Voice Output (TTS)",
-                    value=True
                 )
                 # Buttons
@@ -130,175 +138,180 @@ def create_sambanova_voice_tab(sambanova_service):
                     interactive=False
                 )
-            with gr.Column(scale=2):
-                # Audio output
-                gr.Markdown("### 🔊 Voice Output")
-                audio_output = gr.Audio(
-                    label="Giọng nói AI",
-                    autoplay=True,
-                    visible=False
-                )
-                # Streaming output
-                streaming_output = gr.Textbox(
-                    label="Streaming Response",
-                    lines=5,
-                    max_lines=10
                 )
-                # Model information
-                gr.Markdown("### 📊 Thông tin")
-                gr.Markdown("""
-                **Tính năng:**
-                ✅ Text chat
-                ✅ Voice input (STT)
-                ✅ Voice output (TTS)
-                ✅ Streaming response
-                ✅ Đa ngôn ngữ
-                **Công nghệ:**
-                - Sambanova API
-                - FastRTC STT
-                - Google/Microsoft TTS
-                """)
-        # Event handlers cho text với voice output
-        def send_text_message(text, history, state, audio_state, model, language, temp, top_p_val, voice_enabled):
-            """Gửi tin nhắn text và nhận voice response"""
             if not text or not text.strip():
-                return history, state, audio_state, "❌ Vui lòng nhập tin nhắn", gr.update(visible=False), ""
             try:
-                # Thêm user message vào history
-                user_message = {"role": "user", "content": text}
-                new_history = history + [user_message]
-                new_state = state + [user_message]
-                yield new_history, new_state, audio_state, "⏳ Đang xử lý...", gr.update(visible=False), ""
-                # Lấy response từ AI
-                if voice_enabled:
-                    # Sử dụng voice response
-                    response_data = sambanova_service.generate_response_with_voice(
-                        new_state, model, language
-                    )
-                    ai_text = response_data["text"]
-                    audio_filepath = response_data["audio"]
-                    # Thêm AI response
-                    ai_message = {"role": "assistant", "content": ai_text}
-                    final_history = new_history + [ai_message]
-                    final_state = new_state + [ai_message]
-                    # Cập nhật audio output
-                    audio_update = gr.update(value=audio_filepath, visible=True) if audio_filepath else gr.update(visible=False)
-                    new_audio_state = audio_filepath
-                    yield final_history, final_state, new_audio_state, "✅ Hoàn thành với voice", audio_update, ai_text
-                else:
-                    # Chỉ text response
-                    ai_text = sambanova_service.generate_response(new_state, model, temp, top_p_val)
-                    ai_message = {"role": "assistant", "content": ai_text}
-                    final_history = new_history + [ai_message]
-                    final_state = new_state + [ai_message]
-                    yield final_history, final_state, audio_state, "✅ Hoàn thành", gr.update(visible=False), ai_text
             except Exception as e:
-                print(f"❌ Error: {e}")
                 error_msg = f"❌ Lỗi: {str(e)}"
-                yield history, state, audio_state, error_msg, gr.update(visible=False), ""
-        # Event handlers cho voice với voice output
-        def send_voice_message(audio, history, state, audio_state, model, language, temp, top_p_val, voice_enabled):
-            """Gửi tin nhắn voice và nhận voice response"""
             if audio is None:
-                return history, state, audio_state, "❌ Vui lòng ghi âm tin nhắn", gr.update(visible=False), ""
             try:
-                # Chuyển speech thành text
-                yield history, state, audio_state, "🎤 Đang chuyển speech thành text...", gr.update(visible=False), ""
                 text = sambanova_service.speech_to_text(audio)
                 if not text:
-                    return history, state, audio_state, "❌ Không thể nhận dạng giọng nói", gr.update(visible=False), ""
-                # Thêm user message vào history (cả audio và text)
-                user_audio_message = {"role": "user", "content": gr.Audio(audio)}
-                user_text_message = {"role": "user", "content": text}
-                new_history = history + [user_audio_message]
-                new_state = state + [user_text_message]
-                yield new_history, new_state, audio_state, "⏳ Đang xử lý voice message...", gr.update(visible=False), ""
-                # Lấy response từ AI
-                if voice_enabled:
-                    response_data = sambanova_service.generate_response_with_voice(
-                        new_state, model, language
-                    )
-                    ai_text = response_data["text"]
-                    audio_filepath = response_data["audio"]
-                    # Thêm AI response
-                    ai_message = {"role": "assistant", "content": ai_text}
-                    final_history = new_history + [ai_message]
-                    final_state = new_state + [ai_message]
-                    # Cập nhật audio output
-                    audio_update = gr.update(value=audio_filepath, visible=True) if audio_filepath else gr.update(visible=False)
-                    new_audio_state = audio_filepath
-                    yield final_history, final_state, new_audio_state, "✅ Voice message hoàn thành", audio_update, ai_text
-                else:
-                    ai_text = sambanova_service.generate_response(new_state, model, temp, top_p_val)
-                    ai_message = {"role": "assistant", "content": ai_text}
-                    final_history = new_history + [ai_message]
-                    final_state = new_state + [ai_message]
-                    yield final_history, final_state, audio_state, "✅ Hoàn thành", gr.update(visible=False), ai_text
             except Exception as e:
-                print(f"❌ Voice Error: {e}")
                 error_msg = f"❌ Lỗi voice: {str(e)}"
-                yield history, state, audio_state, error_msg, gr.update(visible=False), ""
-        def clear_chat():
-            """Xóa toàn bộ hội thoại"""
-            return [], [], None, "🔄 Đã xóa hội thoại", gr.update(visible=False), ""
-        # Kết nối events cho text
         send_text_btn.click(
-            fn=send_text_message,
             inputs=[
-                text_input, chatbot, conversation_state, audio_output_state,
                 model_dropdown, language_dropdown, temperature, top_p, voice_output_toggle
             ],
-            outputs=[chatbot, conversation_state, audio_output_state, status, audio_output, streaming_output]
         ).then(
-            fn=lambda: ("", None),
-            outputs=[text_input, audio_input]
         )
-        # Kết nối events cho voice
         send_voice_btn.click(
-            fn=send_voice_message,
             inputs=[
-                audio_input, chatbot, conversation_state, audio_output_state,
                 model_dropdown, language_dropdown, temperature, top_p, voice_output_toggle
             ],
-            outputs=[chatbot, conversation_state, audio_output_state, status, audio_output, streaming_output]
         ).then(
-            fn=lambda: ("", None),
-            outputs=[text_input, audio_input]
         )
         clear_btn.click(
-            fn=clear_chat,
-            outputs=[chatbot, conversation_state, audio_output_state, status, audio_output, streaming_output]
         )
     return sambanova_tab
 def create_voice_coding_tab(voice_coding_service):
     """Tạo tab Voice Coding đơn giản - Text-based trước"""

         create_language_info_tab(rag_system.multilingual_manager)
     with gr.Tab("Stream Object Detection"):
         create_streaming_object_detection()
+def create_sambanova_voice_tab():
+    """Tạo tab Sambanova AI với Voice Input/Output"""
+    # Khởi tạo services
+    try:
+        tts_service = EnhancedTTSService()
+        sambanova_service = SambanovaVoiceService(tts_service=tts_service)
+        print("✅ Tất cả services đã được khởi tạo")
+    except Exception as e:
+        print(f"❌ Lỗi khởi tạo services: {e}")
+        # Fallback: chỉ khởi tạo Sambanova service không có TTS
+        sambanova_service = SambanovaVoiceService()
+        tts_service = None
     with gr.Blocks() as sambanova_tab:
+        gr.Markdown("## 🤖 Sambanova AI - Voice & Text")
+        gr.Markdown("Trò chuyện với AI - Hỗ trợ voice input/output")
         # State variables
         chatbot = gr.Chatbot(
             type="messages",
             value=[],
             label="💬 Hội thoại",
+            height=400
         )
         conversation_state = gr.State(value=[])
         with gr.Row():
             with gr.Column(scale=1):
                 # Model selection
                 model_dropdown = gr.Dropdown(
                     choices=sambanova_service.get_available_models(),
                     label="Chọn Model"
                 )
+                # Language selection for TTS (chỉ hiển thị nếu có TTS)
                 language_dropdown = gr.Dropdown(
                     choices=['vi', 'en', 'ja', 'ko', 'zh', 'fr', 'es', 'de'],
                     value='vi',
+                    label="Ngôn ngữ TTS",
+                    visible=tts_service is not None
                 )
                 # Text input
                 text_input = gr.Textbox(
                     label="Tin nhắn của bạn",
+                    placeholder="Nhập tin nhắn hoặc sử dụng voice...",
                     lines=3
                 )
                 # Voice input
                 with gr.Group():
+                    gr.Markdown("**🎤 Voice Input**")
                     audio_input = gr.Audio(
                         sources=["microphone"],
                         type="numpy",
                     temperature = gr.Slider(0, 1, value=0.1, label="Temperature")
                     top_p = gr.Slider(0, 1, value=0.1, label="Top-P")
+                # Voice output toggle (chỉ hiển thị nếu có TTS)
                 voice_output_toggle = gr.Checkbox(
+                    label="🔊 Bật Voice Output",
+                    value=True,
+                    visible=tts_service is not None
                 )
                 # Buttons
                     interactive=False
                 )
+            with gr.Column(scale=1):
+                # Audio output (chỉ hiển thị nếu có TTS)
+                if tts_service is not None:
+                    gr.Markdown("### 🔊 Voice Output")
+                    audio_output = gr.Audio(
+                        label="Giọng nói AI",
+                        autoplay=False,
+                        visible=True
+                    )
+                else:
+                    audio_output = gr.Audio(visible=False)
+                    gr.Markdown("### ℹ️ Thông tin")
+                    gr.Markdown("""
+                    **Voice output tạm thời không khả dụng**
+                    - Vẫn có thể sử dụng voice input
+                    - Vẫn có thể chat bằng text
+                    """)
+                # Response display
+                response_display = gr.Textbox(
+                    label="Phản hồi từ AI",
+                    lines=6,
+                    interactive=False
                 )
+        # Event handlers
+        def process_text_message(text, history, state, model, language, temp, top_p_val, voice_enabled):
+            """Xử lý tin nhắn text"""
             if not text or not text.strip():
+                return history, state, "❌ Vui lòng nhập tin nhắn", "", gr.update(visible=False)
             try:
+                # Thêm user message
+                user_msg = {"role": "user", "content": text}
+                new_history = history + [user_msg]
+                new_state = state + [user_msg]
+                # Hiển thị trạng thái đang xử lý
+                yield new_history, new_state, "⏳ Đang xử lý...", "", gr.update(visible=False)
+                # Lấy response
+                ai_text = sambanova_service.generate_response(new_state, model, temp, top_p_val)
+                ai_msg = {"role": "assistant", "content": ai_text}
+                final_history = new_history + [ai_msg]
+                final_state = new_state + [ai_msg]
+                # Tạo voice output nếu enabled và có TTS
+                audio_update = gr.update(visible=False)
+                if voice_enabled and tts_service is not None:
+                    audio_file = sambanova_service.text_to_speech(ai_text, language)
+                    if audio_file:
+                        audio_update = gr.update(value=audio_file, visible=True)
+                yield final_history, final_state, "✅ Hoàn thành", ai_text, audio_update
             except Exception as e:
                 error_msg = f"❌ Lỗi: {str(e)}"
+                yield history, state, error_msg, "", gr.update(visible=False)
+        def process_voice_message(audio, history, state, model, language, temp, top_p_val, voice_enabled):
+            """Xử lý tin nhắn voice"""
             if audio is None:
+                return history, state, "❌ Vui lòng ghi âm tin nhắn", "", gr.update(visible=False)
             try:
+                # Hiển thị trạng thái đang xử lý
+                yield history, state, "🎤 Đang chuyển speech thành text...", "", gr.update(visible=False)
+                # Chuyển speech thành text
                 text = sambanova_service.speech_to_text(audio)
                 if not text:
+                    yield history, state, "❌ Không nhận dạng được giọng nói", "", gr.update(visible=False)
+                    return
+                # Thêm user message (cả audio và text)
+                user_audio_msg = {"role": "user", "content": gr.Audio(audio)}
+                user_text_msg = {"role": "user", "content": text}
+                new_history = history + [user_audio_msg]
+                new_state = state + [user_text_msg]
+                yield new_history, new_state, "⏳ Đang xử lý voice message...", "", gr.update(visible=False)
+                # Lấy response
+                ai_text = sambanova_service.generate_response(new_state, model, temp, top_p_val)
+                ai_msg = {"role": "assistant", "content": ai_text}
+                final_history = new_history + [ai_msg]
+                final_state = new_state + [ai_msg]
+                # Tạo voice output
+                audio_update = gr.update(visible=False)
+                if voice_enabled and tts_service is not None:
+                    audio_file = sambanova_service.text_to_speech(ai_text, language)
+                    if audio_file:
+                        audio_update = gr.update(value=audio_file, visible=True)
+                yield final_history, final_state, "✅ Voice message hoàn thành", ai_text, audio_update
             except Exception as e:
                 error_msg = f"❌ Lỗi voice: {str(e)}"
+                yield history, state, error_msg, "", gr.update(visible=False)
+        def clear_conversation():
+            """Xóa hội thoại"""
+            return [], [], "🔄 Đã xóa hội thoại", "", gr.update(visible=False)
+        # Kết nối events
         send_text_btn.click(
+            fn=process_text_message,
             inputs=[
+                text_input, chatbot, conversation_state,
                 model_dropdown, language_dropdown, temperature, top_p, voice_output_toggle
             ],
+            outputs=[chatbot, conversation_state, status, response_display, audio_output]
         ).then(
+            lambda: "",  # Clear text input
+            outputs=[text_input]
         )
         send_voice_btn.click(
+            fn=process_voice_message,
             inputs=[
+                audio_input, chatbot, conversation_state,
                 model_dropdown, language_dropdown, temperature, top_p, voice_output_toggle
             ],
+            outputs=[chatbot, conversation_state, status, response_display, audio_output]
         ).then(
+            lambda: None,  # Clear audio input
+            outputs=[audio_input]
         )
         clear_btn.click(
+            fn=clear_conversation,
+            outputs=[chatbot, conversation_state, status, response_display, audio_output]
         )
     return sambanova_tab
+def check_environment():
+    """Kiểm tra môi trường trước khi chạy"""
+    print("🔍 Kiểm tra môi trường...")
+    # Kiểm tra API key
+    api_key = os.environ.get("SAMBANOVA_API_KEY")
+    if not api_key:
+        print("❌ SAMBANOVA_API_KEY không được tìm thấy")
+        print("💡 Hãy set environment variable: export SAMBANOVA_API_KEY=your_key")
+        return False
+    else:
+        print("✅ SAMBANOVA_API_KEY: OK")
+    # Kiểm tra dependencies
+    try:
+        import fastrtc
+        print("✅ FastRTC: OK")
+    except ImportError:
+        print("❌ FastRTC chưa được cài đặt")
+        return False
+    try:
+        import gtts
+        print("✅ gTTS: OK")
+    except ImportError:
+        print("❌ gTTS chưa được cài đặt")
+    try:
+        import edge_tts
+        print("✅ edge-tts: OK")
+    except ImportError:
+        print("❌ edge-tts chưa được cài đặt")
+    return True
 def create_voice_coding_tab(voice_coding_service):
     """Tạo tab Voice Coding đơn giản - Text-based trước"""