Spaces:

datbkpro
/

voicebot

Sleeping

App Files Files Community

datbkpro commited on Oct 29, 2025

Commit

5e244c1

verified ·

1 Parent(s): d936d48

Update ui/tabs.py

Browse files

Files changed (1) hide show

ui/tabs.py +77 -118

ui/tabs.py CHANGED Viewed

@@ -200,176 +200,135 @@ def create_audio_tab(audio_service: AudioService):
         outputs=[transcription_output, response_output, tts_audio_output, language_display]  # UPDATED
     )
 def create_streaming_voice_tab(streaming_service: StreamingVoiceService):
-    """Tạo tab streaming voice với VAD"""
     with gr.Blocks() as streaming_tab:
-        gr.Markdown("## 🎤 Trò chuyện giọng nói thời gian thực với VAD")
-        gr.Markdown("""
-        ### 🎯 Chế độ VAD (Voice Activity Detection)
-        - **Tự động phát hiện** khi bạn bắt đầu nói
-        - **Không cần giữ nút** - hệ thống tự nhận diện
-        - **Loại bỏ tiếng ồn** - chỉ xử lý giọng nói thật
-        """)
         with gr.Row():
             with gr.Column(scale=1):
-                # VAD Controls
                 with gr.Row():
-                    start_vad_btn = gr.Button("🎙️ Bắt đầu VAD", variant="primary")
-                    stop_vad_btn = gr.Button("🛑 Dừng VAD", variant="secondary")
-                # Microphone component (vẫn giữ cho manual mode)
                 microphone = gr.Microphone(
-                    label="🎤 Hoặc nhấn để nói thủ công",
                     type="numpy",
-                    streaming=True,
-                    show_download_button=False
                 )
-                # Clear conversation button
-                clear_btn = gr.Button("🗑️ Xóa hội thoại", variant="secondary")
-                # Status display
-                status_display = gr.Textbox(
-                    label="Trạng thái",
-                    value="Chưa bắt đầu - nhấn 'Bắt đầu VAD'",
-                    interactive=False
-                )
-                # Conversation state
-                state_display = gr.JSON(
                     label="Thông tin hệ thống",
-                    value={}
                 )
             with gr.Column(scale=2):
-                # Real-time transcription
-                realtime_transcription = gr.Textbox(
                     label="📝 Bạn vừa nói",
                     lines=2,
-                    interactive=False,
-                    placeholder="Văn bản được chuyển đổi sẽ xuất hiện ở đây..."
                 )
-                # AI Response
-                ai_response = gr.Textbox(
                     label="🤖 Phản hồi AI",
                     lines=3,
-                    interactive=False,
-                    placeholder="Phản hồi của AI sẽ xuất hiện ở đây..."
                 )
-                # TTS Audio output
-                tts_output = gr.Audio(
-                    label="🔊 Phản hồi bằng giọng nói",
                     interactive=False,
                     autoplay=True
                 )
-        # Biến state để lưu kết quả từ VAD
-        vad_transcription = gr.State(value="")
-        vad_response = gr.State(value="")
-        vad_audio = gr.State(value=None)
-        vad_status = gr.State(value="waiting")
         def start_vad():
-            """Bắt đầu VAD listening"""
-            def vad_callback(result):
-                """Callback khi VAD phát hiện speech"""
-                # Cập nhật state với kết quả mới
-                vad_transcription.value = result.get('transcription', '')
-                vad_response.value = result.get('response', '')
-                vad_audio.value = result.get('tts_audio', None)
-                vad_status.value = result.get('status', 'completed')
-                print(f"VAD Callback: {result.get('transcription', '')}")
-            success = streaming_service.start_listening(vad_callback)
-            status = "✅ Đang lắng nghe với VAD..." if success else "❌ Lỗi khởi động VAD"
-            state = streaming_service.get_conversation_state()
             return status, state
         def stop_vad():
-            """Dừng VAD listening"""
             streaming_service.stop_listening()
-            state = streaming_service.get_conversation_state()
-            return "🛑 Đã dừng VAD", state
-        def process_manual_audio(audio_data):
-            """Xử lý audio manual (không dùng VAD)"""
             if audio_data is None:
-                return "❌ Không có âm thanh", "Vui lòng nói lại", None, "Đang chờ...", {}
             try:
                 result = streaming_service.process_streaming_audio(audio_data)
-                state = streaming_service.get_conversation_state()
-                status = "✅ Đã xử lý manual audio"
-                return result['transcription'], result['response'], result['tts_audio'], status, state
             except Exception as e:
-                error_msg = f"❌ Lỗi: {str(e)}"
-                return error_msg, "Xin lỗi, có lỗi xảy ra", None, "❌ Lỗi", {}
-        def clear_conversation():
             """Xóa hội thoại"""
             streaming_service.clear_conversation()
-            state = streaming_service.get_conversation_state()
-            # Reset các state
-            vad_transcription.value = ""
-            vad_response.value = ""
-            vad_audio.value = None
-            return "", "", None, "🗑️ Đã xóa hội thoại", state
-        def get_vad_results():
-            """Lấy kết quả từ VAD và cập nhật UI"""
-            if vad_status.value == "completed":
-                # Reset status sau khi lấy kết quả
-                vad_status.value = "waiting"
-                state = streaming_service.get_conversation_state()
-                return (vad_transcription.value, vad_response.value, vad_audio.value,
-                       "✅ VAD đã xử lý", state)
-            else:
-                # Không có kết quả mới, trả về None để không cập nhật
-                return gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip()
-        def refresh_state():
-            """Chỉ cập nhật state display"""
-            state = streaming_service.get_conversation_state()
-            return state
-        # Event handlers chính
-        start_vad_btn.click(
-            start_vad,
-            outputs=[status_display, state_display]
-        )
-        stop_vad_btn.click(
-            stop_vad,
-            outputs=[status_display, state_display]
-        )
         microphone.stream(
-            process_manual_audio,
             inputs=[microphone],
-            outputs=[realtime_transcription, ai_response, tts_output, status_display, state_display]
         )
         clear_btn.click(
-            clear_conversation,
-            outputs=[realtime_transcription, ai_response, tts_output, status_display, state_display]
-        )
-        # Nút refresh để cập nhật kết quả VAD thủ công
-        refresh_btn = gr.Button("🔄 Kiểm tra kết quả VAD", variant="secondary", visible=False)
-        refresh_btn.click(
-            get_vad_results,
-            outputs=[realtime_transcription, ai_response, tts_output, status_display, state_display]
-        )
-        # Tự động cập nhật state display mỗi 2 giây
-        streaming_tab.load(
-            refresh_state,
-            outputs=[state_display],
-            every=2
         )
     return streaming_tab

         outputs=[transcription_output, response_output, tts_audio_output, language_display]  # UPDATED
     )
 def create_streaming_voice_tab(streaming_service: StreamingVoiceService):
+    """Tạo tab streaming voice với VAD - Phiên bản cực kỳ đơn giản"""
     with gr.Blocks() as streaming_tab:
+        gr.Markdown("## 🎤 Trò chuyện giọng nói thời gian thực")
         with gr.Row():
             with gr.Column(scale=1):
+                # Controls
                 with gr.Row():
+                    start_btn = gr.Button("🎙️ Bắt đầu VAD", variant="primary")
+                    stop_btn = gr.Button("🛑 Dừng VAD", variant="secondary")
+                gr.Markdown("### Chế độ tự động (VAD)")
+                gr.Markdown("Hệ thống tự động nhận diện khi bạn bắt đầu nói")
+                with gr.Row():
+                    vad_status = gr.Textbox(
+                        label="Trạng thái VAD",
+                        value="Chưa bắt đầu",
+                        interactive=False
+                    )
+                gr.Markdown("### Chế độ thủ công")
                 microphone = gr.Microphone(
+                    label="🎤 Nhấn để nói thủ công",
                     type="numpy",
+                    streaming=True
                 )
+                clear_btn = gr.Button("🗑️ Xóa hội thoại")
+                # State info
+                state_info = gr.Textbox(
                     label="Thông tin hệ thống",
+                    value="Khởi tạo...",
+                    lines=3,
+                    interactive=False
                 )
             with gr.Column(scale=2):
+                transcription_box = gr.Textbox(
                     label="📝 Bạn vừa nói",
                     lines=2,
+                    interactive=False
                 )
+                response_box = gr.Textbox(
                     label="🤖 Phản hồi AI",
                     lines=3,
+                    interactive=False
                 )
+                audio_output = gr.Audio(
+                    label="🔊 Giọng nói AI",
                     interactive=False,
                     autoplay=True
                 )
+        # Biến đơn giản để track VAD
+        is_vad_active = gr.State(value=False)
+        last_vad_result = gr.State(value=None)
         def start_vad():
+            """Bắt đầu VAD"""
+            def callback(result):
+                last_vad_result.value = result
+                print(f"VAD: {result.get('transcription', '')}")
+            success = streaming_service.start_listening(callback)
+            if success:
+                is_vad_active.value = True
+                status = "✅ VAD đang chạy - Hãy nói gì đó!"
+                state = f"VAD: Đang hoạt động\n"
+                state += f"Processing: {streaming_service.is_processing}\n"
+                state += f"History: {len(streaming_service.conversation_history)} messages"
+            else:
+                status = "❌ Không thể khởi động VAD"
+                state = "Lỗi khởi động"
             return status, state
         def stop_vad():
+            """Dừng VAD"""
             streaming_service.stop_listening()
+            is_vad_active.value = False
+            state = f"VAD: Đã dừng\n"
+            state += f"History: {len(streaming_service.conversation_history)} messages"
+            return "🛑 VAD đã dừng", state
+        def process_microphone(audio_data):
+            """Xử lý microphone input"""
             if audio_data is None:
+                return "Chưa có âm thanh", "Hãy nói gì đó...", None, "VAD: Đang chạy" if is_vad_active.value else "VAD: Dừng"
             try:
                 result = streaming_service.process_streaming_audio(audio_data)
+                state = f"Manual mode\nHistory: {len(streaming_service.conversation_history)} messages"
+                return result['transcription'], result['response'], result['tts_audio'], state
             except Exception as e:
+                return f"Lỗi: {e}", "Xin lỗi, có lỗi xảy ra", None, "Lỗi xử lý"
+        def clear_chat():
             """Xóa hội thoại"""
             streaming_service.clear_conversation()
+            state = f"Đã xóa hội thoại\nHistory: 0 messages"
+            return "", "", None, state
+        def check_vad_auto():
+            """Tự động kiểm tra VAD results (nếu có)"""
+            if is_vad_active.value and last_vad_result.value:
+                result = last_vad_result.value
+                last_vad_result.value = None  # Reset sau khi dùng
+                state = f"VAD: Đã xử lý\nHistory: {len(streaming_service.conversation_history)} messages"
+                return result['transcription'], result['response'], result['tts_audio'], state
+            return gr.skip(), gr.skip(), gr.skip(), gr.skip()
+        # Event handlers đơn giản
+        start_btn.click(start_vad, outputs=[vad_status, state_info])
+        stop_btn.click(stop_vad, outputs=[vad_status, state_info])
         microphone.stream(
+            process_microphone,
             inputs=[microphone],
+            outputs=[transcription_box, response_box, audio_output, state_info]
         )
         clear_btn.click(
+            clear_chat,
+            outputs=[transcription_box, response_box, audio_output, state_info]
         )
     return streaming_tab