Spaces:

datbkpro
/

voicebot

Sleeping

App Files Files Community

datbkpro commited on Nov 13, 2025

Commit

3bcab2c

verified ·

1 Parent(s): 19ac002

Update services/streaming_voice_service.py

Browse files

Files changed (1) hide show

services/streaming_voice_service.py +80 -72

services/streaming_voice_service.py CHANGED Viewed

@@ -190,7 +190,7 @@ class StreamingVoiceService:
         print("🔄 Đang khởi tạo VOSK ASR...")
         self.vosk_asr = VoskStreamingASR()
-        # Khởi tạo VAD - SỬ DỤNG SILERO VAD CỦA MÀY
         self.vad_processor = SileroVAD()
         self.is_listening = False
         self.speech_callback = None
@@ -204,11 +204,27 @@ class StreamingVoiceService:
         self.response_queue = queue.Queue()
         self.processing_active = False
         # Latency tracking
         self.latency_metrics = {
             'asr': [], 'llm': [], 'tts': [], 'total': []
         }
     def start_listening(self, speech_callback: Callable) -> bool:
         """Bắt đầu lắng nghe với Silero VAD"""
         if self.is_listening:
@@ -241,6 +257,7 @@ class StreamingVoiceService:
                 name="AI-Response-Worker"
             )
             worker_thread.start()
             print("🎙️ Đã bắt đầu lắng nghe với Silero VAD")
@@ -257,8 +274,69 @@ class StreamingVoiceService:
         return False
     def _on_speech_detected(self, speech_audio: np.ndarray, sample_rate: int):
-        """Callback khi Silero VAD phát hiện speech - FIXED VERSION"""
         if not self.is_listening:
             return
@@ -312,56 +390,6 @@ class StreamingVoiceService:
         except Exception as e:
             print(f"❌ Lỗi trong VAD speech detection: {e}")
-    def _process_response_worker(self):
-        """Worker xử lý phản hồi AI từ queue"""
-        while self.processing_active:
-            try:
-                # Lấy item từ queue với timeout
-                item = self.response_queue.get(timeout=1.0)
-                if item is None:  # Tín hiệu dừng
-                    break
-                transcription = item['transcription']
-                start_time = item['timestamp']
-                print(f"🤖 Processing AI response for: '{transcription}'")
-                # Tạo phản hồi AI với latency tracking
-                llm_start_time = time.time()
-                response = self._generate_ai_response(transcription)
-                llm_time = time.time() - llm_start_time
-                self.latency_metrics['llm'].append(llm_time)
-                tts_start_time = time.time()
-                tts_audio_path = self._text_to_speech(response)
-                tts_time = time.time() - tts_start_time
-                if tts_time > 0:
-                    self.latency_metrics['tts'].append(tts_time)
-                # Gửi kết quả về callback
-                if self.speech_callback:
-                    self.speech_callback({
-                        'transcription': transcription,
-                        'response': response,
-                        'tts_audio': tts_audio_path,
-                        'status': 'completed'
-                    })
-                # Đánh dấu task hoàn thành
-                self.response_queue.task_done()
-            except queue.Empty:
-                continue
-            except Exception as e:
-                print(f"❌ Lỗi trong response worker: {e}")
-                if self.speech_callback:
-                    self.speech_callback({
-                        'transcription': "Lỗi xử lý",
-                        'response': f"Xin lỗi, có lỗi xảy ra: {str(e)}",
-                        'tts_audio': None,
-                        'status': 'error'
-                    })
     def process_streaming_audio(self, audio_data: tuple) -> Dict[str, Any]:
         """Xử lý audio streaming manual mode"""
         if not audio_data:
@@ -500,15 +528,6 @@ class StreamingVoiceService:
             'status': 'error'
         }
-    def stop_listening(self):
-        """Dừng lắng nghe"""
-        self.is_listening = False
-        self.processing_active = False
-        self.vad_processor.stop_stream()
-        if self.vosk_asr:
-            self.vosk_asr.stop_stream()
-        print("🛑 Đã dừng lắng nghe")
     def clear_conversation(self):
         """Xóa lịch sử hội thoại"""
         self.conversation_history = []
@@ -516,17 +535,6 @@ class StreamingVoiceService:
         self.partial_transcription = ""
         print("🗑️ Đã xóa lịch sử hội thoại")
-    def get_conversation_state(self) -> dict:
-        return {
-            'is_listening': self.is_listening,
-            'history_length': len(self.conversation_history),
-            'current_transcription': self.current_transcription,
-            'partial_transcription': self.partial_transcription,
-            'queue_size': self.response_queue.qsize(),
-            'vosk_active': self.vosk_asr.is_streaming if self.vosk_asr else False,
-            'last_update': time.strftime("%H:%M:%S")
-        }
     def get_latency_stats(self) -> dict:
         """Lấy thống kê latency"""
         stats = {}

         print("🔄 Đang khởi tạo VOSK ASR...")
         self.vosk_asr = VoskStreamingASR()
+        # Khởi tạo VAD
         self.vad_processor = SileroVAD()
         self.is_listening = False
         self.speech_callback = None
         self.response_queue = queue.Queue()
         self.processing_active = False
+        # Worker threads tracking
+        self.worker_threads = 0
         # Latency tracking
         self.latency_metrics = {
             'asr': [], 'llm': [], 'tts': [], 'total': []
         }
+    def get_conversation_state(self) -> dict:
+        """Lấy trạng thái hội thoại - FIXED VERSION"""
+        return {
+            'is_listening': self.is_listening,
+            'history_length': len(self.conversation_history),
+            'current_transcription': self.current_transcription,
+            'partial_transcription': self.partial_transcription,
+            'queue_size': self.response_queue.qsize(),
+            'worker_threads': self.worker_threads,  # THÊM KEY NÀY
+            'vosk_active': self.vosk_asr.is_streaming if self.vosk_asr else False,
+            'last_update': time.strftime("%H:%M:%S")
+        }
     def start_listening(self, speech_callback: Callable) -> bool:
         """Bắt đầu lắng nghe với Silero VAD"""
         if self.is_listening:
                 name="AI-Response-Worker"
             )
             worker_thread.start()
+            self.worker_threads = 1  # CẬP NHẬT SỐ LƯỢNG THREAD
             print("🎙️ Đã bắt đầu lắng nghe với Silero VAD")
         return False
+    def stop_listening(self):
+        """Dừng lắng nghe"""
+        self.is_listening = False
+        self.processing_active = False
+        self.worker_threads = 0  # RESET SỐ THREAD
+        self.vad_processor.stop_stream()
+        if self.vosk_asr:
+            self.vosk_asr.stop_stream()
+        print("🛑 Đã dừng lắng nghe")
+    def _process_response_worker(self):
+        """Worker xử lý phản hồi AI từ queue"""
+        while self.processing_active:
+            try:
+                # Lấy item từ queue với timeout
+                item = self.response_queue.get(timeout=1.0)
+                if item is None:  # Tín hiệu dừng
+                    break
+                transcription = item['transcription']
+                start_time = item['timestamp']
+                print(f"🤖 Processing AI response for: '{transcription}'")
+                # Tạo phản hồi AI với latency tracking
+                llm_start_time = time.time()
+                response = self._generate_ai_response(transcription)
+                llm_time = time.time() - llm_start_time
+                self.latency_metrics['llm'].append(llm_time)
+                tts_start_time = time.time()
+                tts_audio_path = self._text_to_speech(response)
+                tts_time = time.time() - tts_start_time
+                if tts_time > 0:
+                    self.latency_metrics['tts'].append(tts_time)
+                # Gửi kết quả về callback
+                if self.speech_callback:
+                    self.speech_callback({
+                        'transcription': transcription,
+                        'response': response,
+                        'tts_audio': tts_audio_path,
+                        'status': 'completed'
+                    })
+                # Đánh dấu task hoàn thành
+                self.response_queue.task_done()
+            except queue.Empty:
+                continue
+            except Exception as e:
+                print(f"❌ Lỗi trong response worker: {e}")
+                if self.speech_callback:
+                    self.speech_callback({
+                        'transcription': "Lỗi xử lý",
+                        'response': f"Xin lỗi, có lỗi xảy ra: {str(e)}",
+                        'tts_audio': None,
+                        'status': 'error'
+                    })
+    # CÁC METHOD KHÁC GIỮ NGUYÊN...
     def _on_speech_detected(self, speech_audio: np.ndarray, sample_rate: int):
+        """Callback khi Silero VAD phát hiện speech"""
         if not self.is_listening:
             return
         except Exception as e:
             print(f"❌ Lỗi trong VAD speech detection: {e}")
     def process_streaming_audio(self, audio_data: tuple) -> Dict[str, Any]:
         """Xử lý audio streaming manual mode"""
         if not audio_data:
             'status': 'error'
         }
     def clear_conversation(self):
         """Xóa lịch sử hội thoại"""
         self.conversation_history = []
         self.partial_transcription = ""
         print("🗑️ Đã xóa lịch sử hội thoại")
     def get_latency_stats(self) -> dict:
         """Lấy thống kê latency"""
         stats = {}