Spaces:

datbkpro
/

voicebot

Sleeping

App Files Files Community

datbkpro commited on Nov 19, 2025

Commit

eb504c3

verified ·

1 Parent(s): 77cab36

Update ui/tabs.py

Browse files

Files changed (1) hide show

ui/tabs.py +184 -48

ui/tabs.py CHANGED Viewed

@@ -53,36 +53,74 @@ def create_all_tabs(audio_service: AudioService, chat_service: ChatService,
     with gr.Tab("Stream Object Detection"):
         create_streaming_object_detection()
 def create_sambanova_voice_tab(sambanova_service):
-    """Tạo tab Sambanova AI - Text Only"""
     with gr.Blocks() as sambanova_tab:
-        gr.Markdown("## 🤖 Sambanova AI - Llama 3.2 3B")
-        gr.Markdown("Trò chuyện với model Llama 3.2 3B thông qua Sambanova API")
         # State variables
         chatbot = gr.Chatbot(
             type="messages",
             value=[],
             label="💬 Hội thoại",
-            height=500
         )
         conversation_state = gr.State(value=[])
         with gr.Row():
             with gr.Column(scale=1):
                 # Header
-                gr.Markdown("### 💬 Chat với Llama 3.2 3B")
-                # Text input - ĐƠN GIẢN, không dùng Accordion
                 text_input = gr.Textbox(
                     label="Tin nhắn của bạn",
                     placeholder="Xin chào! Hãy hỏi tôi bất cứ điều gì...",
                     lines=3
                 )
                 # Buttons
                 with gr.Row():
-                    send_btn = gr.Button("🚀 Gửi", variant="primary")
                     clear_btn = gr.Button("🗑️ Xóa", variant="secondary")
                 # Status
@@ -91,32 +129,44 @@ def create_sambanova_voice_tab(sambanova_service):
                     value="✅ Sẵn sàng",
                     interactive=False
                 )
-                # Voice notice
-                gr.Markdown("---")
-                gr.Markdown("**🎤 Voice features:** Đang được phát triển")
             with gr.Column(scale=2):
-                # Model information - ĐƠN GIẢN, không dùng Accordion
-                gr.Markdown("### 📊 Thông tin Model")
-                gr.Markdown("""
-                **Llama 3.2 3B Instruct**
-                - **Provider**: Sambanova AI
-                - **Context**: 128K tokens
-                - **Temperature**: 0.1
-                - **Top-P**: 0.1
                 **Tính năng:**
-                ✅ Instruction following
-                ✅ Multi-language
-                ✅ Code generation
                 """)
-        # Event handlers
-        def send_message(text, history, state):
-            """Gửi tin nhắn đến Sambanova API"""
             if not text or not text.strip():
-                return history, state, "❌ Vui lòng nhập tin nhắn"
             try:
                 # Thêm user message vào history
@@ -124,42 +174,128 @@ def create_sambanova_voice_tab(sambanova_service):
                 new_history = history + [user_message]
                 new_state = state + [user_message]
-                # Hiển thị trạng thái đang xử lý
-                yield new_history, new_state, "⏳ Đang xử lý..."
-                # Gọi Sambanova API
-                print("🤖 Calling Sambanova API...")
-                ai_response_text = sambanova_service.generate_response(new_state)
-                # Thêm AI response
-                ai_message = {"role": "assistant", "content": ai_response_text}
-                new_history = new_history + [ai_message]
-                new_state = new_state + [ai_message]
-                yield new_history, new_state, "✅ Hoàn thành"
             except Exception as e:
                 print(f"❌ Error: {e}")
                 error_msg = f"❌ Lỗi: {str(e)}"
-                yield history, state, error_msg
         def clear_chat():
             """Xóa toàn bộ hội thoại"""
-            return [], [], "🔄 Đã xóa hội thoại"
-        # Kết nối events
-        send_btn.click(
-            fn=send_message,
-            inputs=[text_input, chatbot, conversation_state],
-            outputs=[chatbot, conversation_state, status]
         ).then(
-            fn=lambda: "",
-            outputs=[text_input]
         )
         clear_btn.click(
             fn=clear_chat,
-            outputs=[chatbot, conversation_state, status]
         )
     return sambanova_tab

     with gr.Tab("Stream Object Detection"):
         create_streaming_object_detection()
 def create_sambanova_voice_tab(sambanova_service):
+    """Tạo tab Sambanova AI với Voice Input/Output hoàn chỉnh"""
     with gr.Blocks() as sambanova_tab:
+        gr.Markdown("## 🤖 Sambanova AI - Voice & Text Complete")
+        gr.Markdown("Trò chuyện với AI - Hỗ trợ voice input/output hoàn chỉnh")
         # State variables
         chatbot = gr.Chatbot(
             type="messages",
             value=[],
             label="💬 Hội thoại",
+            height=500,
+            render=False
         )
         conversation_state = gr.State(value=[])
+        audio_output_state = gr.State(value=None)
         with gr.Row():
             with gr.Column(scale=1):
                 # Header
+                gr.Markdown("### 💬 Chat với AI")
+                # Model selection
+                model_dropdown = gr.Dropdown(
+                    choices=sambanova_service.get_available_models(),
+                    value="Meta-Llama-3.1-8B-Instruct",
+                    label="Chọn Model"
+                )
+                # Language selection for TTS
+                language_dropdown = gr.Dropdown(
+                    choices=['vi', 'en', 'ja', 'ko', 'zh', 'fr', 'es', 'de'],
+                    value='vi',
+                    label="Ngôn ngữ TTS"
+                )
+                # Text input
                 text_input = gr.Textbox(
                     label="Tin nhắn của bạn",
                     placeholder="Xin chào! Hãy hỏi tôi bất cứ điều gì...",
                     lines=3
                 )
+                # Voice input
+                with gr.Group():
+                    gr.Markdown("### 🎤 Voice Input")
+                    audio_input = gr.Audio(
+                        sources=["microphone"],
+                        type="numpy",
+                        label="Nói tin nhắn của bạn",
+                        show_download_button=False
+                    )
+                # Parameters
+                with gr.Row():
+                    temperature = gr.Slider(0, 1, value=0.1, label="Temperature")
+                    top_p = gr.Slider(0, 1, value=0.1, label="Top-P")
+                # Voice output toggle
+                voice_output_toggle = gr.Checkbox(
+                    label="🔊 Bật Voice Output (TTS)",
+                    value=True
+                )
                 # Buttons
                 with gr.Row():
+                    send_text_btn = gr.Button("🚀 Gửi Text", variant="primary")
+                    send_voice_btn = gr.Button("🎤 Gửi Voice", variant="primary")
                     clear_btn = gr.Button("🗑️ Xóa", variant="secondary")
                 # Status
                     value="✅ Sẵn sàng",
                     interactive=False
                 )
             with gr.Column(scale=2):
+                # Audio output
+                gr.Markdown("### 🔊 Voice Output")
+                audio_output = gr.Audio(
+                    label="Giọng nói AI",
+                    autoplay=True,
+                    visible=False
+                )
+                # Streaming output
+                streaming_output = gr.Textbox(
+                    label="Streaming Response",
+                    lines=5,
+                    max_lines=10
+                )
+                # Model information
+                gr.Markdown("### 📊 Thông tin")
+                gr.Markdown("""
                 **Tính năng:**
+                ✅ Text chat
+                ✅ Voice input (STT)
+                ✅ Voice output (TTS)
+                ✅ Streaming response
+                ✅ Đa ngôn ngữ
+                **Công nghệ:**
+                - Sambanova API
+                - FastRTC STT
+                - Google/Microsoft TTS
                 """)
+        # Event handlers cho text với voice output
+        def send_text_message(text, history, state, audio_state, model, language, temp, top_p_val, voice_enabled):
+            """Gửi tin nhắn text và nhận voice response"""
             if not text or not text.strip():
+                return history, state, audio_state, "❌ Vui lòng nhập tin nhắn", gr.update(visible=False), ""
             try:
                 # Thêm user message vào history
                 new_history = history + [user_message]
                 new_state = state + [user_message]
+                yield new_history, new_state, audio_state, "⏳ Đang xử lý...", gr.update(visible=False), ""
+                # Lấy response từ AI
+                if voice_enabled:
+                    # Sử dụng voice response
+                    response_data = sambanova_service.generate_response_with_voice(
+                        new_state, model, language
+                    )
+                    ai_text = response_data["text"]
+                    audio_filepath = response_data["audio"]
+                    # Thêm AI response
+                    ai_message = {"role": "assistant", "content": ai_text}
+                    final_history = new_history + [ai_message]
+                    final_state = new_state + [ai_message]
+                    # Cập nhật audio output
+                    audio_update = gr.update(value=audio_filepath, visible=True) if audio_filepath else gr.update(visible=False)
+                    new_audio_state = audio_filepath
+                    yield final_history, final_state, new_audio_state, "✅ Hoàn thành với voice", audio_update, ai_text
+                else:
+                    # Chỉ text response
+                    ai_text = sambanova_service.generate_response(new_state, model, temp, top_p_val)
+                    ai_message = {"role": "assistant", "content": ai_text}
+                    final_history = new_history + [ai_message]
+                    final_state = new_state + [ai_message]
+                    yield final_history, final_state, audio_state, "✅ Hoàn thành", gr.update(visible=False), ai_text
             except Exception as e:
                 print(f"❌ Error: {e}")
                 error_msg = f"❌ Lỗi: {str(e)}"
+                yield history, state, audio_state, error_msg, gr.update(visible=False), ""
+        # Event handlers cho voice với voice output
+        def send_voice_message(audio, history, state, audio_state, model, language, temp, top_p_val, voice_enabled):
+            """Gửi tin nhắn voice và nhận voice response"""
+            if audio is None:
+                return history, state, audio_state, "❌ Vui lòng ghi âm tin nhắn", gr.update(visible=False), ""
+            try:
+                # Chuyển speech thành text
+                yield history, state, audio_state, "🎤 Đang chuyển speech thành text...", gr.update(visible=False), ""
+                text = sambanova_service.speech_to_text(audio)
+                if not text:
+                    return history, state, audio_state, "❌ Không thể nhận dạng giọng nói", gr.update(visible=False), ""
+                # Thêm user message vào history (cả audio và text)
+                user_audio_message = {"role": "user", "content": gr.Audio(audio)}
+                user_text_message = {"role": "user", "content": text}
+                new_history = history + [user_audio_message]
+                new_state = state + [user_text_message]
+                yield new_history, new_state, audio_state, "⏳ Đang xử lý voice message...", gr.update(visible=False), ""
+                # Lấy response từ AI
+                if voice_enabled:
+                    response_data = sambanova_service.generate_response_with_voice(
+                        new_state, model, language
+                    )
+                    ai_text = response_data["text"]
+                    audio_filepath = response_data["audio"]
+                    # Thêm AI response
+                    ai_message = {"role": "assistant", "content": ai_text}
+                    final_history = new_history + [ai_message]
+                    final_state = new_state + [ai_message]
+                    # Cập nhật audio output
+                    audio_update = gr.update(value=audio_filepath, visible=True) if audio_filepath else gr.update(visible=False)
+                    new_audio_state = audio_filepath
+                    yield final_history, final_state, new_audio_state, "✅ Voice message hoàn thành", audio_update, ai_text
+                else:
+                    ai_text = sambanova_service.generate_response(new_state, model, temp, top_p_val)
+                    ai_message = {"role": "assistant", "content": ai_text}
+                    final_history = new_history + [ai_message]
+                    final_state = new_state + [ai_message]
+                    yield final_history, final_state, audio_state, "✅ Hoàn thành", gr.update(visible=False), ai_text
+            except Exception as e:
+                print(f"❌ Voice Error: {e}")
+                error_msg = f"❌ Lỗi voice: {str(e)}"
+                yield history, state, audio_state, error_msg, gr.update(visible=False), ""
         def clear_chat():
             """Xóa toàn bộ hội thoại"""
+            return [], [], None, "🔄 Đã xóa hội thoại", gr.update(visible=False), ""
+        # Kết nối events cho text
+        send_text_btn.click(
+            fn=send_text_message,
+            inputs=[
+                text_input, chatbot, conversation_state, audio_output_state,
+                model_dropdown, language_dropdown, temperature, top_p, voice_output_toggle
+            ],
+            outputs=[chatbot, conversation_state, audio_output_state, status, audio_output, streaming_output]
         ).then(
+            fn=lambda: ("", None),
+            outputs=[text_input, audio_input]
+        )
+        # Kết nối events cho voice
+        send_voice_btn.click(
+            fn=send_voice_message,
+            inputs=[
+                audio_input, chatbot, conversation_state, audio_output_state,
+                model_dropdown, language_dropdown, temperature, top_p, voice_output_toggle
+            ],
+            outputs=[chatbot, conversation_state, audio_output_state, status, audio_output, streaming_output]
+        ).then(
+            fn=lambda: ("", None),
+            outputs=[text_input, audio_input]
         )
         clear_btn.click(
             fn=clear_chat,
+            outputs=[chatbot, conversation_state, audio_output_state, status, audio_output, streaming_output]
         )
     return sambanova_tab