Nguyen5 commited on
Commit
77c7f90
·
1 Parent(s): 4e44ffc
Files changed (1) hide show
  1. app.py +35 -59
app.py CHANGED
@@ -183,100 +183,82 @@ def format_sources(src):
183
  # CORE CHAT-FUNKTION với tất cả tính năng mới
184
  # =====================================================
185
  def chat_fn(text_input, audio_path, history, lang_sel, use_vad):
186
- """
187
- Main chat function với xử lý VAD và transcription
188
- """
189
  print(f"DEBUG: chat_fn called - text_input: '{text_input}', audio_path: {audio_path}, history length: {len(history) if history else 0}")
190
-
191
- # Khởi tạo history nếu None
192
- if history is None:
193
- history = []
194
-
195
- # Biến để lưu text cần xử lý
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  text_to_process = ""
197
-
 
 
198
  # Xử lý audio nếu có
199
  if audio_path and os.path.exists(audio_path):
200
  print(f"DEBUG: Processing audio file: {audio_path}")
201
-
202
- # Lưu đường dẫn audio vào state
203
  state.current_audio_path = audio_path
204
-
205
- # Kiểm tra VAD nếu được bật
206
  if use_vad and ENABLE_VAD:
207
  try:
208
  import soundfile as sf
209
  audio_data, sample_rate = sf.read(audio_path)
210
- print(f"DEBUG: Audio loaded - shape: {audio_data.shape}, sample_rate: {sample_rate}")
211
-
212
  vad_result = handle_voice_activity(audio_data, sample_rate)
213
  print(f"DEBUG: VAD result: {vad_result}")
214
-
215
- # Nếu VAD phát hiện có giọng nói, hoặc nếu VAD không bật, tiến hành transcribe
216
  if vad_result.get("is_speech", True):
217
- # Transcribe audio
218
  transcribed_text = transcribe_audio_optimized(audio_path, language=lang_sel)
219
  if transcribed_text and transcribed_text.strip():
220
  text_to_process = transcribed_text.strip()
221
  print(f"DEBUG: Transcribed text: {text_to_process}")
222
- else:
223
- print("DEBUG: VAD detected no speech, skipping transcription")
224
  except Exception as e:
225
  print(f"DEBUG: Error in VAD/transcription: {e}")
226
- # Fallback: transcribe ngay cả khi có lỗi
227
  transcribed_text = transcribe_audio_optimized(audio_path, language=lang_sel)
228
  if transcribed_text and transcribed_text.strip():
229
  text_to_process = transcribed_text.strip()
230
  else:
231
- # Nếu VAD không bật, transcribe trực tiếp
232
  transcribed_text = transcribe_audio_optimized(audio_path, language=lang_sel)
233
  if transcribed_text and transcribed_text.strip():
234
  text_to_process = transcribed_text.strip()
235
  print(f"DEBUG: Transcribed text (no VAD): {text_to_process}")
236
-
237
  # Nếu có text input từ textbox, ưu tiên sử dụng nó
238
  if text_input and text_input.strip():
239
  text_to_process = text_input.strip()
240
  print(f"DEBUG: Using text input: {text_to_process}")
241
-
242
- # Nếu không có gì để xử lý
243
  if not text_to_process:
244
- print("DEBUG: No text to process")
245
- # Trả về history hiện tại và status
246
  status_text = f"Bereit | VAD: {'On' if use_vad and ENABLE_VAD else 'Off'} | Model: OpenAI whisper-1"
247
- if history is None:
248
- history = []
249
- return history, "", None, status_text
250
-
251
  print(f"DEBUG: Processing text: {text_to_process}")
252
-
253
- # Tăng cường context cho câu hỏi
254
- enhanced_question = enhance_conversation_context(text_to_process, history)
255
-
256
  try:
257
- # RAG-Antwort berechnen
258
  ans, sources = answer(enhanced_question, retriever, llm)
259
  bot_msg = ans + format_sources(sources)
260
-
261
- # Thêm vào state
262
  state.add_message("user", text_to_process)
263
  state.add_message("assistant", ans)
264
-
265
- # History aktualisieren (ChatGPT-Style)
266
- history.append({"role": "user", "content": text_to_process})
267
- history.append({"role": "assistant", "content": bot_msg})
268
-
269
- print(f"DEBUG: Answer generated, history length: {len(history)}")
270
-
271
  except Exception as e:
272
  print(f"DEBUG: Error in RAG pipeline: {e}")
273
- # Fallback response
274
  error_msg = "Entschuldigung, es gab einen Fehler bei der Verarbeitung Ihrer Anfrage. Bitte versuchen Sie es erneut."
275
- history.append({"role": "user", "content": text_to_process})
276
- history.append({"role": "assistant", "content": error_msg})
277
-
278
  status_text = f"Bereit | VAD: {'On' if use_vad and ENABLE_VAD else 'Off'} | Model: OpenAI whisper-1"
279
- return history, "", None, status_text
280
 
281
  # =====================================================
282
  # FUNCTIONS FOR UI CONTROLS
@@ -365,25 +347,19 @@ def handle_audio_stream(audio_path, use_vad):
365
  # TTS FUNCTION
366
  # =====================================================
367
  def read_last_answer(history):
368
- """Đọc câu trả lời cuối cùng"""
369
  if not history:
370
  print("DEBUG: No history for TTS")
371
  return None
372
-
373
- # Tìm câu trả lời cuối cùng của assistant
374
  for msg in reversed(history):
375
- if isinstance(msg, dict) and msg.get("role") == "assistant":
376
- content = msg.get("content", "")
377
- # Loại bỏ phần sources từ câu trả lời
378
  if "## 📚 Quellen" in content:
379
  content = content.split("## 📚 Quellen")[0].strip()
380
-
381
  print(f"DEBUG: Synthesizing speech for: {content[:100]}...")
382
  audio_result = synthesize_speech(content)
383
  if audio_result:
384
  print("DEBUG: TTS successful")
385
  return audio_result
386
-
387
  print("DEBUG: No assistant message found for TTS")
388
  return None
389
 
 
183
  # CORE CHAT-FUNKTION với tất cả tính năng mới
184
  # =====================================================
185
  def chat_fn(text_input, audio_path, history, lang_sel, use_vad):
 
 
 
186
  print(f"DEBUG: chat_fn called - text_input: '{text_input}', audio_path: {audio_path}, history length: {len(history) if history else 0}")
187
+ # Chuẩn hóa history về dạng list các cặp [user, assistant]
188
+ def to_pairs(h):
189
+ if not h:
190
+ return []
191
+ if isinstance(h[0], dict):
192
+ pairs = []
193
+ current = [None, None]
194
+ for m in h:
195
+ if m.get("role") == "user":
196
+ if current != [None, None]:
197
+ pairs.append(current)
198
+ current = [m.get("content", ""), None]
199
+ elif m.get("role") == "assistant":
200
+ if current[0] is None:
201
+ pairs.append([None, m.get("content", "")])
202
+ else:
203
+ current[1] = m.get("content", "")
204
+ pairs.append(current)
205
+ current = [None, None]
206
+ if current != [None, None]:
207
+ pairs.append(current)
208
+ return pairs
209
+ return h
210
+ pairs = to_pairs(history)
211
  text_to_process = ""
212
+ # Lấy audio_path nếu chưa có, dùng bản ghi cuối cùng
213
+ if (not audio_path) and state.current_audio_path and os.path.exists(state.current_audio_path):
214
+ audio_path = state.current_audio_path
215
  # Xử lý audio nếu có
216
  if audio_path and os.path.exists(audio_path):
217
  print(f"DEBUG: Processing audio file: {audio_path}")
 
 
218
  state.current_audio_path = audio_path
 
 
219
  if use_vad and ENABLE_VAD:
220
  try:
221
  import soundfile as sf
222
  audio_data, sample_rate = sf.read(audio_path)
 
 
223
  vad_result = handle_voice_activity(audio_data, sample_rate)
224
  print(f"DEBUG: VAD result: {vad_result}")
 
 
225
  if vad_result.get("is_speech", True):
 
226
  transcribed_text = transcribe_audio_optimized(audio_path, language=lang_sel)
227
  if transcribed_text and transcribed_text.strip():
228
  text_to_process = transcribed_text.strip()
229
  print(f"DEBUG: Transcribed text: {text_to_process}")
 
 
230
  except Exception as e:
231
  print(f"DEBUG: Error in VAD/transcription: {e}")
 
232
  transcribed_text = transcribe_audio_optimized(audio_path, language=lang_sel)
233
  if transcribed_text and transcribed_text.strip():
234
  text_to_process = transcribed_text.strip()
235
  else:
 
236
  transcribed_text = transcribe_audio_optimized(audio_path, language=lang_sel)
237
  if transcribed_text and transcribed_text.strip():
238
  text_to_process = transcribed_text.strip()
239
  print(f"DEBUG: Transcribed text (no VAD): {text_to_process}")
 
240
  # Nếu có text input từ textbox, ưu tiên sử dụng nó
241
  if text_input and text_input.strip():
242
  text_to_process = text_input.strip()
243
  print(f"DEBUG: Using text input: {text_to_process}")
244
+ # Không có text để xử lý
 
245
  if not text_to_process:
 
 
246
  status_text = f"Bereit | VAD: {'On' if use_vad and ENABLE_VAD else 'Off'} | Model: OpenAI whisper-1"
247
+ return pairs, "", None, status_text
 
 
 
248
  print(f"DEBUG: Processing text: {text_to_process}")
249
+ enhanced_question = enhance_conversation_context(text_to_process, pairs)
 
 
 
250
  try:
 
251
  ans, sources = answer(enhanced_question, retriever, llm)
252
  bot_msg = ans + format_sources(sources)
 
 
253
  state.add_message("user", text_to_process)
254
  state.add_message("assistant", ans)
255
+ pairs.append([text_to_process, bot_msg])
 
 
 
 
 
 
256
  except Exception as e:
257
  print(f"DEBUG: Error in RAG pipeline: {e}")
 
258
  error_msg = "Entschuldigung, es gab einen Fehler bei der Verarbeitung Ihrer Anfrage. Bitte versuchen Sie es erneut."
259
+ pairs.append([text_to_process, error_msg])
 
 
260
  status_text = f"Bereit | VAD: {'On' if use_vad and ENABLE_VAD else 'Off'} | Model: OpenAI whisper-1"
261
+ return pairs, "", None, status_text
262
 
263
  # =====================================================
264
  # FUNCTIONS FOR UI CONTROLS
 
347
  # TTS FUNCTION
348
  # =====================================================
349
  def read_last_answer(history):
 
350
  if not history:
351
  print("DEBUG: No history for TTS")
352
  return None
 
 
353
  for msg in reversed(history):
354
+ if isinstance(msg, (list, tuple)) and len(msg) == 2 and msg[1]:
355
+ content = msg[1]
 
356
  if "## 📚 Quellen" in content:
357
  content = content.split("## 📚 Quellen")[0].strip()
 
358
  print(f"DEBUG: Synthesizing speech for: {content[:100]}...")
359
  audio_result = synthesize_speech(content)
360
  if audio_result:
361
  print("DEBUG: TTS successful")
362
  return audio_result
 
363
  print("DEBUG: No assistant message found for TTS")
364
  return None
365