Spaces:

datbkpro
/

voicebot

Sleeping

App Files Files Community

datbkpro commited on 28 days ago

Commit

1ef025d

verified ·

1 Parent(s): f8c624c

Update ui/tabs.py

Browse files

Files changed (1) hide show

ui/tabs.py +193 -76

ui/tabs.py CHANGED Viewed

@@ -1018,7 +1018,7 @@ def create_streaming_voice_tab(streaming_service: StreamingVoiceService):
     with gr.Blocks() as streaming_tab:
         gr.Markdown("## 🎤 Trò chuyện giọng nói thời gian thực - Tối ưu hóa")
-        # FIXED: Thêm state để lưu kết quả VAD
         vad_result_state = gr.State(value=None)
         with gr.Row():
@@ -1027,6 +1027,7 @@ def create_streaming_voice_tab(streaming_service: StreamingVoiceService):
                 with gr.Row():
                     start_btn = gr.Button("🎙️ Bắt đầu VAD", variant="primary")
                     stop_btn = gr.Button("🛑 Dừng VAD", variant="secondary")
                 gr.Markdown("### Chế độ tự động (VAD)")
                 gr.Markdown("Hệ thống tự động nhận diện khi bạn bắt đầu nói")
@@ -1038,6 +1039,14 @@ def create_streaming_voice_tab(streaming_service: StreamingVoiceService):
                         interactive=False
                     )
                 gr.Markdown("### Chế độ thủ công")
                 microphone = gr.Microphone(
                     label="🎤 Nhấn để nói thủ công",
@@ -1052,27 +1061,21 @@ def create_streaming_voice_tab(streaming_service: StreamingVoiceService):
                     )
                     refresh_latency_btn = gr.Button("🔄 Refresh Metrics", size="sm")
-                clear_btn = gr.Button("🗑️ Xóa hội thoại")
-                # State info
-                state_info = gr.Textbox(
-                    label="Thông tin hệ thống",
-                    value="Khởi tạo...",
-                    lines=3,
-                    interactive=False
-                )
             with gr.Column(scale=2):
                 transcription_box = gr.Textbox(
-                    label="📝 Bạn vừa nói",
-                    lines=2,
-                    interactive=False
                 )
                 response_box = gr.Textbox(
                     label="🤖 Phản hồi AI",
-                    lines=3,
-                    interactive=False
                 )
                 audio_output = gr.Audio(
@@ -1083,98 +1086,207 @@ def create_streaming_voice_tab(streaming_service: StreamingVoiceService):
         # State variables
         is_vad_active = gr.State(value=False)
         def vad_callback(result):
-            """Callback khi VAD phát hiện speech - FIXED VERSION"""
-            print(f"🎯 Nhận kết quả từ VAD: {result['transcription']}")
-            vad_result_state.value = result
         def start_vad():
-            """Bắt đầu VAD - FIXED VERSION"""
-            success = streaming_service.start_listening(vad_callback)
-            if success:
-                is_vad_active.value = True
-                status = "✅ VAD đang chạy - Hãy nói gì đó!"
-                state = streaming_service.get_conversation_state()
-                state_text = f"VAD: Đang hoạt động\nQueue: {state['queue_size']}\nThreads: {state['worker_threads']}"
-            else:
-                status = "❌ Không thể khởi động VAD"
-                state_text = "Lỗi khởi động"
-            return status, state_text
         def stop_vad():
             """Dừng VAD"""
-            streaming_service.stop_listening()
-            is_vad_active.value = False
-            state = streaming_service.get_conversation_state()
-            state_text = f"VAD: Đã dừng\nHistory: {state['history_length']} messages"
-            return "🛑 VAD đã dừng", state_text
         def process_microphone(audio_data):
-            """Xử lý microphone input"""
             if audio_data is None:
-                return "Chưa có âm thanh", "Hãy nói gì đó...", None, "VAD: Đang chạy" if is_vad_active.value else "VAD: Dừng"
             try:
                 result = streaming_service.process_streaming_audio(audio_data)
                 state = streaming_service.get_conversation_state()
                 state_text = f"Manual mode\nHistory: {state['history_length']} messages"
-                return result['transcription'], result['response'], result['tts_audio'], state_text
             except Exception as e:
-                return f"Lỗi: {e}", "Xin lỗi, có lỗi xảy ra", None, "Lỗi xử lý"
         def check_vad_results():
-            """Kiểm tra kết quả VAD - FIXED: Cập nhật UI khi có kết quả mới"""
-            if vad_result_state.value is not None:
-                result = vad_result_state.value
-                vad_result_state.value = None  # Reset sau khi dùng
-                state = streaming_service.get_conversation_state()
-                state_text = f"VAD mode\nHistory: {state['history_length']} messages\nQueue: {state['queue_size']}"
-                return result['transcription'], result['response'], result['tts_audio'], state_text
-            return gr.skip(), gr.skip(), gr.skip(), gr.skip()
         def clear_chat():
             """Xóa hội thoại"""
             streaming_service.clear_conversation()
             state = streaming_service.get_conversation_state()
-            state_text = f"Đã xóa hội thoại\nHistory: {state['history_length']} messages"
-            return "", "", None, state_text
         def refresh_latency():
             """Làm mới latency metrics"""
-            stats = streaming_service.get_latency_stats()
-            return stats
-        def update_state_info():
             """Cập nhật thông tin trạng thái"""
-            state = streaming_service.get_conversation_state()
-            formatted_state = f"VAD: {'Đang chạy' if state['is_listening'] else 'Dừng'}\n"
-            formatted_state += f"Queue: {state['queue_size']}\n"
-            formatted_state += f"History: {state['history_length']} messages\n"
-            formatted_state += f"Threads: {state['worker_threads']}\n"
-            formatted_state += f"Last: {state['last_update']}"
-            latency_info = streaming_service.get_latency_stats()
-            return formatted_state, latency_info
         # Event handlers
-        start_btn.click(start_vad, outputs=[vad_status, state_info])
-        stop_btn.click(stop_vad, outputs=[vad_status, state_info])
         microphone.stream(
             process_microphone,
             inputs=[microphone],
-            outputs=[transcription_box, response_box, audio_output, state_info]
         )
         clear_btn.click(
             clear_chat,
-            outputs=[transcription_box, response_box, audio_output, state_info]
         )
         refresh_latency_btn.click(
@@ -1182,16 +1294,21 @@ def create_streaming_voice_tab(streaming_service: StreamingVoiceService):
             outputs=[latency_display]
         )
-        # FIXED: Timer để kiểm tra kết quả VAD
-        gr.Timer(1.0).tick(
             fn=check_vad_results,
-            outputs=[transcription_box, response_box, audio_output, state_info]
         )
-        # Timer để cập nhật system info
-        gr.Timer(3.0).tick(
-            fn=update_state_info,
-            outputs=[state_info, latency_display]
         )
     return streaming_tab

     with gr.Blocks() as streaming_tab:
         gr.Markdown("## 🎤 Trò chuyện giọng nói thời gian thực - Tối ưu hóa")
+        # Store VAD results
         vad_result_state = gr.State(value=None)
         with gr.Row():
                 with gr.Row():
                     start_btn = gr.Button("🎙️ Bắt đầu VAD", variant="primary")
                     stop_btn = gr.Button("🛑 Dừng VAD", variant="secondary")
+                    clear_btn = gr.Button("🗑️ Xóa hội thoại")
                 gr.Markdown("### Chế độ tự động (VAD)")
                 gr.Markdown("Hệ thống tự động nhận diện khi bạn bắt đầu nói")
                         interactive=False
                     )
+                # Hiển thị trạng thái real-time
+                status_display = gr.Textbox(
+                    label="🎯 Trạng thái hiện tại",
+                    value="Đang chờ...",
+                    interactive=False,
+                    lines=2
+                )
                 gr.Markdown("### Chế độ thủ công")
                 microphone = gr.Microphone(
                     label="🎤 Nhấn để nói thủ công",
                     )
                     refresh_latency_btn = gr.Button("🔄 Refresh Metrics", size="sm")
             with gr.Column(scale=2):
+                # Real-time transcription
                 transcription_box = gr.Textbox(
+                    label="📝 Bạn đang nói (real-time)",
+                    lines=3,
+                    interactive=False,
+                    value="Nói gì đó để bắt đầu..."
                 )
+                # AI Response
                 response_box = gr.Textbox(
                     label="🤖 Phản hồi AI",
+                    lines=5,
+                    interactive=False,
+                    value="Tôi sẽ trả lời bạn ở đây..."
                 )
                 audio_output = gr.Audio(
         # State variables
         is_vad_active = gr.State(value=False)
+        last_vad_update = gr.State(value=0)
+        # Global variable for VAD callback
+        vad_results_queue = queue.Queue()
         def vad_callback(result):
+            """Callback khi VAD phát hiện speech"""
+            print(f"🎯 VAD Callback: {result.get('transcription', 'No text')}")
+            vad_results_queue.put(result)
         def start_vad():
+            """Bắt đầu VAD"""
+            try:
+                # Set callback
+                success = streaming_service.start_listening(vad_callback)
+                if success:
+                    is_vad_active.value = True
+                    status = "✅ VAD đang chạy - Hãy nói gì đó!"
+                    # Hiển thị thông báo
+                    if streaming_service.speech_callback:
+                        streaming_service.speech_callback({
+                            'transcription': "VAD đã sẵn sàng! Hãy nói...",
+                            'response': "",
+                            'tts_audio': None,
+                            'status': 'listening'
+                        })
+                    state = streaming_service.get_conversation_state()
+                    state_text = f"✅ VAD: Đang hoạt động\nQueue: {state['queue_size']}\nThreads: {state['worker_threads']}"
+                    status_msg = "🎤 Đang lắng nghe... nói đi!"
+                else:
+                    status = "❌ Không thể khởi động VAD"
+                    state_text = "Lỗi khởi động"
+                    status_msg = "Lỗi!"
+                return status, state_text, status_msg
+            except Exception as e:
+                print(f"❌ Lỗi start_vad: {e}")
+                return "❌ Lỗi khởi động", f"Lỗi: {e}", "Lỗi!"
         def stop_vad():
             """Dừng VAD"""
+            try:
+                streaming_service.stop_listening()
+                is_vad_active.value = False
+                # Clear queue
+                while not vad_results_queue.empty():
+                    try:
+                        vad_results_queue.get_nowait()
+                    except:
+                        pass
+                state = streaming_service.get_conversation_state()
+                state_text = f"🛑 VAD: Đã dừng\nHistory: {state['history_length']} messages"
+                status_msg = "Đã dừng lắng nghe"
+                return "🛑 VAD đã dừng", state_text, status_msg
+            except Exception as e:
+                print(f"❌ Lỗi stop_vad: {e}")
+                return "Lỗi!", f"Lỗi: {e}", "Lỗi!"
         def process_microphone(audio_data):
+            """Xử lý microphone input manual mode"""
             if audio_data is None:
+                return "Chưa có âm thanh", "Hãy nói gì đó...", None, "VAD: Tắt", "Manual mode"
             try:
+                print(f"🎤 Manual audio: {len(audio_data[1])} samples")
+                # Process with streaming service
                 result = streaming_service.process_streaming_audio(audio_data)
                 state = streaming_service.get_conversation_state()
                 state_text = f"Manual mode\nHistory: {state['history_length']} messages"
+                # Determine status
+                if result['status'] == 'processing':
+                    status_msg = "⏳ Đang xử lý..."
+                elif result['status'] == 'listening':
+                    status_msg = "🎤 Đang nghe..."
+                else:
+                    status_msg = result['status']
+                return result['transcription'], result['response'], result['tts_audio'], state_text, status_msg
             except Exception as e:
+                print(f"❌ Lỗi process_microphone: {e}")
+                return f"Lỗi: {e}", "Xin lỗi, có lỗi xảy ra", None, "Lỗi xử lý", "Lỗi!"
         def check_vad_results():
+            """Kiểm tra và hiển thị kết quả VAD"""
+            try:
+                # Check if VAD is active
+                if not is_vad_active.value:
+                    return gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip()
+                # Try to get result from queue
+                try:
+                    result = vad_results_queue.get_nowait()
+                    print(f"📥 Got VAD result: {result.get('transcription', 'No text')}")
+                    state = streaming_service.get_conversation_state()
+                    state_text = f"VAD mode\nQueue: {state['queue_size']}\nThreads: {state['worker_threads']}"
+                    # Determine status message
+                    if result.get('status') == 'processing':
+                        status_msg = "⏳ Đang xử lý VAD..."
+                    elif result.get('status') == 'partial':
+                        status_msg = "🎤 Đang nhận diện..."
+                    elif result.get('status') == 'completed':
+                        status_msg = "✅ Đã xử lý xong"
+                    else:
+                        status_msg = result.get('status', 'Đang lắng nghe')
+                    return (
+                        result.get('transcription', ''),
+                        result.get('response', ''),
+                        result.get('tts_audio', None),
+                        state_text,
+                        status_msg
+                    )
+                except queue.Empty:
+                    # No new results
+                    return gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip()
+            except Exception as e:
+                print(f"❌ Lỗi check_vad_results: {e}")
+                return gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip()
         def clear_chat():
             """Xóa hội thoại"""
             streaming_service.clear_conversation()
             state = streaming_service.get_conversation_state()
+            state_text = f"✅ Đã xóa hội thoại\nHistory: {state['history_length']} messages"
+            status_msg = "Sẵn sàng"
+            return "", "", None, state_text, status_msg
         def refresh_latency():
             """Làm mới latency metrics"""
+            try:
+                stats = streaming_service.get_latency_stats()
+                return stats
+            except Exception as e:
+                print(f"❌ Lỗi refresh_latency: {e}")
+                return {}
+        def update_status_info():
             """Cập nhật thông tin trạng thái"""
+            try:
+                state = streaming_service.get_conversation_state()
+                formatted_state = f"🎯 VAD: {'✅ Đang chạy' if state['is_listening'] else '❌ Dừng'}\n"
+                formatted_state += f"📊 Queue: {state['queue_size']}\n"
+                formatted_state += f"📝 History: {state['history_length']} messages\n"
+                formatted_state += f"🧵 Threads: {state['worker_threads']}\n"
+                formatted_state += f"⏰ Last: {state['last_update']}"
+                # Get latency stats
+                latency_info = streaming_service.get_latency_stats()
+                # Current status
+                if state['is_listening']:
+                    current_status = "🎤 Đang lắng nghe... nói đi!"
+                else:
+                    current_status = "🛑 Đã dừng"
+                return formatted_state, latency_info, current_status
+            except Exception as e:
+                print(f"❌ Lỗi update_status_info: {e}")
+                return f"Lỗi: {e}", {}, "Lỗi!"
         # Event handlers
+        start_btn.click(
+            start_vad,
+            outputs=[vad_status, status_display, transcription_box]
+        )
+        stop_btn.click(
+            stop_vad,
+            outputs=[vad_status, status_display, transcription_box]
+        )
+        # Microphone streaming
         microphone.stream(
             process_microphone,
             inputs=[microphone],
+            outputs=[transcription_box, response_box, audio_output, status_display, vad_status]
         )
         clear_btn.click(
             clear_chat,
+            outputs=[transcription_box, response_box, audio_output, status_display, vad_status]
         )
         refresh_latency_btn.click(
             outputs=[latency_display]
         )
+        # IMPORTANT: Timer để kiểm tra kết quả VAD - sử dụng interval ngắn hơn
+        timer_component = gr.Timer(0.5)  # 500ms interval
+        # Connect timer to check function
+        timer_component.tick(
             fn=check_vad_results,
+            outputs=[transcription_box, response_box, audio_output, status_display, vad_status]
         )
+        # Timer để cập nhật system info (mỗi 2 giây)
+        info_timer = gr.Timer(2.0)
+        info_timer.tick(
+            fn=update_status_info,
+            outputs=[status_display, latency_display, vad_status]
         )
     return streaming_tab