Spaces:

Nguyen5
/

chatbot1

Sleeping

App Files Files Community

Nguyen5 commited on Dec 8, 2025

Commit

77c7f90

1 Parent(s): 4e44ffc

commit

Browse files

Files changed (1) hide show

app.py +35 -59

app.py CHANGED Viewed

@@ -183,100 +183,82 @@ def format_sources(src):
 # CORE CHAT-FUNKTION với tất cả tính năng mới
 # =====================================================
 def chat_fn(text_input, audio_path, history, lang_sel, use_vad):
-    """
-    Main chat function với xử lý VAD và transcription
-    """
     print(f"DEBUG: chat_fn called - text_input: '{text_input}', audio_path: {audio_path}, history length: {len(history) if history else 0}")
-    # Khởi tạo history nếu None
-    if history is None:
-        history = []
-    # Biến để lưu text cần xử lý
     text_to_process = ""
     # Xử lý audio nếu có
     if audio_path and os.path.exists(audio_path):
         print(f"DEBUG: Processing audio file: {audio_path}")
-        # Lưu đường dẫn audio vào state
         state.current_audio_path = audio_path
-        # Kiểm tra VAD nếu được bật
         if use_vad and ENABLE_VAD:
             try:
                 import soundfile as sf
                 audio_data, sample_rate = sf.read(audio_path)
-                print(f"DEBUG: Audio loaded - shape: {audio_data.shape}, sample_rate: {sample_rate}")
                 vad_result = handle_voice_activity(audio_data, sample_rate)
                 print(f"DEBUG: VAD result: {vad_result}")
-                # Nếu VAD phát hiện có giọng nói, hoặc nếu VAD không bật, tiến hành transcribe
                 if vad_result.get("is_speech", True):
-                    # Transcribe audio
                     transcribed_text = transcribe_audio_optimized(audio_path, language=lang_sel)
                     if transcribed_text and transcribed_text.strip():
                         text_to_process = transcribed_text.strip()
                         print(f"DEBUG: Transcribed text: {text_to_process}")
-                else:
-                    print("DEBUG: VAD detected no speech, skipping transcription")
             except Exception as e:
                 print(f"DEBUG: Error in VAD/transcription: {e}")
-                # Fallback: transcribe ngay cả khi có lỗi
                 transcribed_text = transcribe_audio_optimized(audio_path, language=lang_sel)
                 if transcribed_text and transcribed_text.strip():
                     text_to_process = transcribed_text.strip()
         else:
-            # Nếu VAD không bật, transcribe trực tiếp
             transcribed_text = transcribe_audio_optimized(audio_path, language=lang_sel)
             if transcribed_text and transcribed_text.strip():
                 text_to_process = transcribed_text.strip()
                 print(f"DEBUG: Transcribed text (no VAD): {text_to_process}")
     # Nếu có text input từ textbox, ưu tiên sử dụng nó
     if text_input and text_input.strip():
         text_to_process = text_input.strip()
         print(f"DEBUG: Using text input: {text_to_process}")
-    # Nếu không có gì để xử lý
     if not text_to_process:
-        print("DEBUG: No text to process")
-        # Trả về history hiện tại và status
         status_text = f"Bereit | VAD: {'On' if use_vad and ENABLE_VAD else 'Off'} | Model: OpenAI whisper-1"
-        if history is None:
-            history = []
-        return history, "", None, status_text
     print(f"DEBUG: Processing text: {text_to_process}")
-    # Tăng cường context cho câu hỏi
-    enhanced_question = enhance_conversation_context(text_to_process, history)
     try:
-        # RAG-Antwort berechnen
         ans, sources = answer(enhanced_question, retriever, llm)
         bot_msg = ans + format_sources(sources)
-        # Thêm vào state
         state.add_message("user", text_to_process)
         state.add_message("assistant", ans)
-        # History aktualisieren (ChatGPT-Style)
-        history.append({"role": "user", "content": text_to_process})
-        history.append({"role": "assistant", "content": bot_msg})
-        print(f"DEBUG: Answer generated, history length: {len(history)}")
     except Exception as e:
         print(f"DEBUG: Error in RAG pipeline: {e}")
-        # Fallback response
         error_msg = "Entschuldigung, es gab einen Fehler bei der Verarbeitung Ihrer Anfrage. Bitte versuchen Sie es erneut."
-        history.append({"role": "user", "content": text_to_process})
-        history.append({"role": "assistant", "content": error_msg})
     status_text = f"Bereit | VAD: {'On' if use_vad and ENABLE_VAD else 'Off'} | Model: OpenAI whisper-1"
-    return history, "", None, status_text
 # =====================================================
 # FUNCTIONS FOR UI CONTROLS
@@ -365,25 +347,19 @@ def handle_audio_stream(audio_path, use_vad):
 # TTS FUNCTION
 # =====================================================
 def read_last_answer(history):
-    """Đọc câu trả lời cuối cùng"""
     if not history:
         print("DEBUG: No history for TTS")
         return None
-    # Tìm câu trả lời cuối cùng của assistant
     for msg in reversed(history):
-        if isinstance(msg, dict) and msg.get("role") == "assistant":
-            content = msg.get("content", "")
-            # Loại bỏ phần sources từ câu trả lời
             if "## 📚 Quellen" in content:
                 content = content.split("## 📚 Quellen")[0].strip()
             print(f"DEBUG: Synthesizing speech for: {content[:100]}...")
             audio_result = synthesize_speech(content)
             if audio_result:
                 print("DEBUG: TTS successful")
                 return audio_result
     print("DEBUG: No assistant message found for TTS")
     return None

 # CORE CHAT-FUNKTION với tất cả tính năng mới
 # =====================================================
 def chat_fn(text_input, audio_path, history, lang_sel, use_vad):
     print(f"DEBUG: chat_fn called - text_input: '{text_input}', audio_path: {audio_path}, history length: {len(history) if history else 0}")
+    # Chuẩn hóa history về dạng list các cặp [user, assistant]
+    def to_pairs(h):
+        if not h:
+            return []
+        if isinstance(h[0], dict):
+            pairs = []
+            current = [None, None]
+            for m in h:
+                if m.get("role") == "user":
+                    if current != [None, None]:
+                        pairs.append(current)
+                    current = [m.get("content", ""), None]
+                elif m.get("role") == "assistant":
+                    if current[0] is None:
+                        pairs.append([None, m.get("content", "")])
+                    else:
+                        current[1] = m.get("content", "")
+                        pairs.append(current)
+                        current = [None, None]
+            if current != [None, None]:
+                pairs.append(current)
+            return pairs
+        return h
+    pairs = to_pairs(history)
     text_to_process = ""
+    # Lấy audio_path nếu chưa có, dùng bản ghi cuối cùng
+    if (not audio_path) and state.current_audio_path and os.path.exists(state.current_audio_path):
+        audio_path = state.current_audio_path
     # Xử lý audio nếu có
     if audio_path and os.path.exists(audio_path):
         print(f"DEBUG: Processing audio file: {audio_path}")
         state.current_audio_path = audio_path
         if use_vad and ENABLE_VAD:
             try:
                 import soundfile as sf
                 audio_data, sample_rate = sf.read(audio_path)
                 vad_result = handle_voice_activity(audio_data, sample_rate)
                 print(f"DEBUG: VAD result: {vad_result}")
                 if vad_result.get("is_speech", True):
                     transcribed_text = transcribe_audio_optimized(audio_path, language=lang_sel)
                     if transcribed_text and transcribed_text.strip():
                         text_to_process = transcribed_text.strip()
                         print(f"DEBUG: Transcribed text: {text_to_process}")
             except Exception as e:
                 print(f"DEBUG: Error in VAD/transcription: {e}")
                 transcribed_text = transcribe_audio_optimized(audio_path, language=lang_sel)
                 if transcribed_text and transcribed_text.strip():
                     text_to_process = transcribed_text.strip()
         else:
             transcribed_text = transcribe_audio_optimized(audio_path, language=lang_sel)
             if transcribed_text and transcribed_text.strip():
                 text_to_process = transcribed_text.strip()
                 print(f"DEBUG: Transcribed text (no VAD): {text_to_process}")
     # Nếu có text input từ textbox, ưu tiên sử dụng nó
     if text_input and text_input.strip():
         text_to_process = text_input.strip()
         print(f"DEBUG: Using text input: {text_to_process}")
+    # Không có text để xử lý
     if not text_to_process:
         status_text = f"Bereit | VAD: {'On' if use_vad and ENABLE_VAD else 'Off'} | Model: OpenAI whisper-1"
+        return pairs, "", None, status_text
     print(f"DEBUG: Processing text: {text_to_process}")
+    enhanced_question = enhance_conversation_context(text_to_process, pairs)
     try:
         ans, sources = answer(enhanced_question, retriever, llm)
         bot_msg = ans + format_sources(sources)
         state.add_message("user", text_to_process)
         state.add_message("assistant", ans)
+        pairs.append([text_to_process, bot_msg])
     except Exception as e:
         print(f"DEBUG: Error in RAG pipeline: {e}")
         error_msg = "Entschuldigung, es gab einen Fehler bei der Verarbeitung Ihrer Anfrage. Bitte versuchen Sie es erneut."
+        pairs.append([text_to_process, error_msg])
     status_text = f"Bereit | VAD: {'On' if use_vad and ENABLE_VAD else 'Off'} | Model: OpenAI whisper-1"
+    return pairs, "", None, status_text
 # =====================================================
 # FUNCTIONS FOR UI CONTROLS
 # TTS FUNCTION
 # =====================================================
 def read_last_answer(history):
     if not history:
         print("DEBUG: No history for TTS")
         return None
     for msg in reversed(history):
+        if isinstance(msg, (list, tuple)) and len(msg) == 2 and msg[1]:
+            content = msg[1]
             if "## 📚 Quellen" in content:
                 content = content.split("## 📚 Quellen")[0].strip()
             print(f"DEBUG: Synthesizing speech for: {content[:100]}...")
             audio_result = synthesize_speech(content)
             if audio_result:
                 print("DEBUG: TTS successful")
                 return audio_result
     print("DEBUG: No assistant message found for TTS")
     return None