Spaces:

Nguyen5
/

chatbot1

Sleeping

App Files Files Community

Nguyen5 commited on Dec 8, 2025

Commit

c411e11

1 Parent(s): e51bcdb

commit

Browse files

Files changed (1) hide show

app.py +589 -380

app.py CHANGED Viewed

@@ -1,81 +1,43 @@
-# app.py – Prüfungsrechts-Chatbot (Đơn giản như ChatGPT)
 import os
 import time
-import tempfile
 from typing import Optional, Dict, Any
 import gradio as gr
 from gradio_pdf import PDF
 import numpy as np
-import soundfile as sf
-from openai import OpenAI
-from speech_io import transcribe_with_openai, synthesize_speech
-# =====================================================
-# CONFIGURATION
-# =====================================================
-OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
-# Initialize OpenAI client only when key is available
-openai_client = OpenAI(api_key=OPENAI_API_KEY) if OPENAI_API_KEY else None
-# Language configuration
 ASR_LANGUAGE_HINT = os.getenv("ASR_LANGUAGE", "de")
 # =====================================================
-# INITIALIZATION - RAG Components / Demo Mode
-# =====================================================
-DEMO_MODE = os.getenv("DEMO_MODE", "false").lower() == "true"
-retriever = None
-llm = None
-pdf_meta = {"pdf_url": ""}
-hg_url = None
-if not DEMO_MODE:
-    from load_documents import load_all_documents
-    from split_documents import split_documents
-    from vectorstore import build_vectorstore
-    from retriever import get_retriever
-    from llm import load_llm
-    from rag_pipeline import answer
-    print("📚 Lade Dokumente…")
-    docs = load_all_documents()
-    print("🔪 Splitte Dokumente…")
-    chunks = split_documents(docs)
-    print("🔍 Erstelle VectorStore…")
-    vs = build_vectorstore(chunks)
-    print("🔎 Erzeuge Retriever…")
-    retriever = get_retriever(vs)
-    print("🤖 Lade LLM…")
-    llm = load_llm()
-    pdf_meta = next(d.metadata for d in docs if d.metadata.get("type") == "pdf")
-    hg_meta = next(d.metadata for d in docs if d.metadata.get("type") == "hg")
-    hg_url = hg_meta.get("viewer_url")
-def generate_demo_answer(message: str) -> str:
-    return (
-        "Chế độ demo: trả lời mẫu cho câu hỏi của bạn. "
-        "Phiên bản đầy đủ sẽ tham chiếu đến nguồn và luật liên quan."
-    )
-# =====================================================
-# STATE MANAGEMENT
 # =====================================================
 class ConversationState:
-    """Quản lý trạng thái hội thoại đơn giản"""
-    def __init__(self):
-        self.messages = []
-        self.current_mode = "text"  # "text" hoặc "audio"
-        self.is_audio_recording = False
     def add_message(self, role: str, content: str):
         """Thêm message vào hội thoại"""
         self.messages.append({
@@ -86,124 +48,132 @@ class ConversationState:
         # Giới hạn lịch sử
         if len(self.messages) > 20:
             self.messages = self.messages[-20:]
-    def get_chat_history(self):
-        """Chuyển đổi sang format cho Gradio Chatbot"""
-        history = []
-        for msg in self.messages:
-            if msg["role"] == "user":
-                history.append([msg["content"], None])
-            elif msg["role"] == "assistant":
-                if history and history[-1][1] is None:
-                    history[-1][1] = msg["content"]
-                else:
-                    history.append([None, msg["content"]])
-        return history
     def reset(self):
         """Reset trạng thái hội thoại"""
         self.messages = []
-        self.is_audio_recording = False
 # Khởi tạo state
 state = ConversationState()
 # =====================================================
-# AUDIO PROCESSING FUNCTIONS
 # =====================================================
-def process_audio_input(audio_data: Optional[tuple], history) -> tuple:
-    """
-    Xử lý audio input từ microphone
-    """
-    if audio_data is None:
-        return history, "", "Warten auf Audioaufnahme..."
     try:
-        # Lấy sample rate và audio data
-        sample_rate, audio_array = audio_data
-        # Tạo file tạm để lưu audio
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
-            temp_path = tmp.name
-            # Lưu audio data
-            sf.write(temp_path, audio_array, int(sample_rate))
-        print("DEBUG: Audio saved to temp file, transcribing...")
-        # Transcribe audio bằng OpenAI Whisper
-        transcribed_text = transcribe_with_openai(temp_path, language=ASR_LANGUAGE_HINT)
-        # Xóa file tạm
-        os.unlink(temp_path)
-        if not transcribed_text or not transcribed_text.strip():
-            return history, "", "Keine Sprache erkannt. Bitte versuchen Sie es erneut."
-        print(f"DEBUG: Transcribed text: {transcribed_text}")
-        # Thêm vào history
-        new_history = history + [[transcribed_text, None]]
-        # Process với RAG
-        if retriever and llm:
-            ans, sources = answer(transcribed_text, retriever, llm)
-            full_response = ans + format_sources(sources)
-        else:
-            ans = generate_demo_answer(transcribed_text)
-            full_response = ans
-        # Cập nhật history với response
-        new_history[-1][1] = full_response
-        # Thêm vào state
-        state.add_message("user", transcribed_text)
-        state.add_message("assistant", ans)
-        return new_history, transcribed_text, "Antwort generiert ✓"
     except Exception as e:
-        print(f"DEBUG: Error processing audio: {e}")
-        return history, "", f"Fehler: {str(e)[:50]}"
-def toggle_audio_mode(mode_choice: str, history):
-    """Chuyển đổi giữa text và audio mode"""
-    if mode_choice == "Audio (Sprachmodus)":
-        state.current_mode = "audio"
-        state.is_audio_recording = True
-        mode_text = "🎤 Sprachmodus aktiv - Klicken und Sprechen"
     else:
-        state.current_mode = "text"
-        state.is_audio_recording = False
-        mode_text = "⌨️ Textmodus aktiv"
-    return (
-        gr.update(visible=(mode_choice == "Audio (Sprachmodus)")),
-        gr.update(visible=(mode_choice == "Text (Schreibmodus)")),
-        gr.update(visible=(mode_choice == "Text (Schreibmodus)")),
-        mode_text
-    )
-def process_text_input(message: str, history):
-    if not message or not message.strip():
-        return history, ""
-    new_history = history + [[message, None]]
-    try:
-        if retriever and llm:
-            ans, sources = answer(message, retriever, llm)
-            full_response = ans + format_sources(sources)
-        else:
-            ans = generate_demo_answer(message)
-            full_response = ans
-        new_history[-1][1] = full_response
-        state.add_message("user", message)
-        state.add_message("assistant", ans)
-    except Exception as e:
-        error_msg = f"Entschuldigung, es gab einen Fehler: {str(e)[:100]}"
-        new_history[-1][1] = error_msg
-    return new_history, ""
 def format_sources(src):
-    """Format sources cho display"""
     if not src:
         return ""
@@ -217,47 +187,226 @@ def format_sources(src):
     return "\n".join(out)
 def clear_conversation():
     """Xóa hội thoại"""
     state.reset()
-    return [], "Konversation gelöscht"
-def speak_last_response(history):
     """Đọc câu trả lời cuối cùng"""
     if not history:
-        return None, "Keine Antwort zum Vorlesen"
-    # Tìm câu trả lời cuối cùng
-    for i in range(len(history)-1, -1, -1):
-        if history[i][1]:  # assistant response exists
-            response_text = history[i][1]
-            # Loại bỏ phần sources
-            if "## 📚 Quellen" in response_text:
-                response_text = response_text.split("## 📚 Quellen")[0].strip()
-            # Tạo speech
-            audio_result = synthesize_speech(response_text[:500])  # Giới hạn độ dài
             if audio_result:
-                sr, audio_data = audio_result
-                return (sr, audio_data), "Audio wird abgespielt..."
-    return None, "Keine passende Antwort gefunden"
 # =====================================================
-# UI – GRADIO INTERFACE (Đơn giản như ChatGPT)
 # =====================================================
-with gr.Blocks(
-    title="🧑‍⚖️ Prüfungsrechts-Chatbot",
-) as demo:
-    # CSS Styling đơn giản
     gr.HTML("""
     <style>
     .gradio-container {
-        max-width: 900px;
         margin: 0 auto;
-        font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
         padding: 20px;
     }
     .header {
@@ -265,252 +414,310 @@ with gr.Blocks(
         margin-bottom: 30px;
         padding: 20px;
         background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-        border-radius: 12px;
         color: white;
     }
-    .mode-selector {
         background: #f8f9fa;
-        padding: 15px;
-        border-radius: 10px;
         margin-bottom: 20px;
-        display: flex;
-        align-items: center;
-        gap: 15px;
         border: 1px solid #e2e8f0;
     }
-    .mode-indicator {
-        padding: 8px 16px;
-        border-radius: 20px;
-        font-weight: 600;
-        background: #e0e7ff;
-        color: #4f46e5;
-    }
-    .input-area {
         background: white;
-        border-radius: 12px;
-        padding: 15px;
-        border: 2px solid #e2e8f0;
-        margin-top: 20px;
     }
     .input-row {
         display: flex;
-        gap: 10px;
         align-items: center;
     }
-    .audio-visualizer {
-        padding: 10px;
-        text-align: center;
-        color: #666;
-        font-style: italic;
     }
-    .tts-btn {
-        margin-top: 10px;
-        padding: 8px 16px;
-        background: #10b981;
-        color: white;
-        border: none;
-        border-radius: 8px;
-        cursor: pointer;
     }
-    .tts-btn:hover {
-        background: #059669;
     }
-    .clear-btn {
-        background: #ef4444;
-        color: white;
-        border: none;
-        border-radius: 8px;
-        padding: 8px 16px;
-        cursor: pointer;
-        margin-left: 10px;
     }
-    .clear-btn:hover {
-        background: #dc2626;
     }
     </style>
     """)
-    # Header đơn giản
     with gr.Column(elem_classes=["header"]):
         gr.Markdown("# 🧑‍⚖️ Prüfungsrechts-Chatbot")
-        gr.Markdown("### Stellen Sie Fragen zu Prüfungsordnung und Hochschulgesetz NRW")
-    # Mode Selector
-    with gr.Column(elem_classes=["mode-selector"]):
         with gr.Row():
-            mode_selector = gr.Radio(
-                choices=["Text (Schreibmodus)", "Audio (Sprachmodus)"],
-                value="Text (Schreibmodus)",
-                label="",
-                scale=3,
-                elem_id="mode-selector"
-            )
-            mode_indicator = gr.Textbox(
-                value="⌨️ Textmodus aktiv",
-                label="Status",
-                interactive=False,
-                scale=2
-            )
-            clear_btn = gr.Button("🗑️ Löschen", elem_classes=["clear-btn"], scale=1)
-    # Main Chat Interface
-    chatbot = gr.Chatbot(
-        label="Konversation",
-        height=500,
-        avatar_images=(
-            "https://em-content.zobj.net/source/microsoft-teams/363/bust-in-silhouette_1f464.png",
-            "https://em-content.zobj.net/source/microsoft-teams/363/robot_1f916.png"
-        )
-    )
-    # Input Area (thay đổi theo mode)
-    with gr.Column(elem_classes=["input-area"], visible=True) as input_area:
-        # Text Input (visible khi text mode)
-        with gr.Column(visible=True) as text_input_container:
-            with gr.Row(elem_classes=["input-row"]):
-                text_input = gr.Textbox(
-                    label="",
-                    placeholder="Stellen Sie eine juristische Frage... (Enter zum Senden)",
-                    lines=2,
-                    max_lines=4,
-                    scale=8,
-                    show_label=False,
-                    container=False,
-                    autofocus=True
                 )
-                text_send_btn = gr.Button(
-                    "Senden",
-                    variant="primary",
-                    scale=1,
-                    min_width=80
                 )
-            with gr.Row():
-                sug1 = gr.Button("Tóm tắt quy định thi", variant="secondary")
-                sug2 = gr.Button("Quy trình khiếu nại kết quả thi", variant="secondary")
-                sug3 = gr.Button("Điều kiện được thi lại", variant="secondary")
-        # Audio Input (visible khi audio mode)
-        with gr.Column(visible=False) as audio_input_container:
-            gr.Markdown("### 🎤 Klicken und Sprechen")
-            with gr.Row():
-                audio_input = gr.Audio(
-                    sources=["microphone"],
-                    type="numpy",
-                    streaming=False,
-                    show_label=False,
-                    interactive=True,
-                    scale=8
                 )
-                audio_status = gr.Textbox(
-                    label="Status",
-                    value="Warten auf Aufnahme...",
-                    interactive=False,
-                    scale=2
                 )
-            gr.Markdown("*Drücken Sie aufnehmen, sprechen Sie Ihre Frage, dann stoppen*", elem_classes=["audio-visualizer"])
-    # TTS Controls
-    with gr.Row():
-        tts_btn = gr.Button("🔊 Letzte Antwort vorlesen", variant="secondary", size="sm")
-        tts_audio = gr.Audio(label="", interactive=False, visible=False)
-        tts_status = gr.Textbox(label="", interactive=False, visible=False)
-    if not DEMO_MODE:
-        with gr.Accordion("📚 Dokumente & Quellen anzeigen", open=False):
-            with gr.Tabs():
-                with gr.TabItem("📄 Prüfungsordnung"):
-                    PDF(pdf_meta["pdf_url"], height=350)
-                with gr.TabItem("📘 Hochschulgesetz NRW"):
-                    if hg_url:
-                        gr.HTML(f'''
-                        <div style="padding: 10px;">
-                            <h4>Hochschulgesetz NRW Viewer</h4>
-                            <a href="{hg_url}" target="_blank" style="display: inline-block; padding: 8px 16px; background: #3b82f6; color: white; text-decoration: none; border-radius: 5px; margin-bottom: 10px;">
-                                Im Viewer öffnen ↗
-                            </a>
-                            <iframe src="{hg_url}" width="100%" height="400px" style="border: 1px solid #ddd; border-radius: 6px;"></iframe>
-                        </div>
-                        ''')
-                    else:
-                        gr.Markdown("Viewer-Link nicht verfügbar.")
     # =====================================================
     # EVENT HANDLERS
     # =====================================================
-    # Mode toggle
-    mode_selector.change(
-        toggle_audio_mode,
-        inputs=[mode_selector, chatbot],
-        outputs=[
-            audio_input_container,
-            text_input_container,
-            text_send_btn,
-            mode_indicator
-        ]
     )
-    # Text input handling
-    text_send_btn.click(
-        process_text_input,
-        inputs=[text_input, chatbot],
-        outputs=[chatbot, text_input]
     )
-    text_input.submit(
-        process_text_input,
-        inputs=[text_input, chatbot],
-        outputs=[chatbot, text_input]
     )
-    # Audio input handling
-    def handle_audio_complete(audio_data, history):
-        """Xử lý khi audio recording hoàn tất"""
-        return process_audio_input(audio_data, history)
-    audio_input.stop_recording(
-        handle_audio_complete,
-        inputs=[audio_input, chatbot],
-        outputs=[chatbot, audio_status, audio_status]
-    ).then(
-        lambda: ("", "Warten auf neue Aufnahme..."),
-        outputs=[audio_input, audio_status]
-    ).then(
-        speak_last_response,
-        inputs=[chatbot],
-        outputs=[tts_audio, tts_status]
     ).then(
-        lambda: gr.Audio(visible=True),
-        outputs=[tts_audio]
     ).then(
-        lambda: gr.Textbox(visible=True),
-        outputs=[tts_status]
     )
-    sug1.click(lambda history: process_text_input("Bitte fassen Sie die relevanten Prüfungsregeln zusammen.", history), inputs=[chatbot], outputs=[chatbot, text_input])
-    sug2.click(lambda history: process_text_input("Wie ist der Ablauf einer Prüfungsanfechtung?", history), inputs=[chatbot], outputs=[chatbot, text_input])
-    sug3.click(lambda history: process_text_input("Unter welchen Bedingungen kann man eine Prüfung wiederholen?", history), inputs=[chatbot], outputs=[chatbot, text_input])
-    # Clear conversation
-    clear_btn.click(
-        clear_conversation,
-        outputs=[chatbot, mode_indicator]
     )
-    # TTS button
     tts_btn.click(
-        speak_last_response,
         inputs=[chatbot],
         outputs=[tts_audio, tts_status]
     ).then(
@@ -522,4 +729,6 @@ with gr.Blocks(
     )
 if __name__ == "__main__":
-    demo.queue().launch(show_error=True)

+# app.py – Prüfungsrechts-Chatbot (RAG + Sprache, UI kiểu ChatGPT) với các tính năng nâng cao
+#
 import os
 import time
+from dataclasses import dataclass, field
 from typing import Optional, Dict, Any
 import gradio as gr
 from gradio_pdf import PDF
 import numpy as np
+from load_documents import load_all_documents
+from split_documents import split_documents
+from vectorstore import build_vectorstore
+from retriever import get_retriever
+from llm import load_llm
+from rag_pipeline import answer
+from speech_io import transcribe_audio, synthesize_speech, transcribe_with_groq, detect_voice_activity
+# Cấu hình môi trường
 ASR_LANGUAGE_HINT = os.getenv("ASR_LANGUAGE", "de")
+USE_GROQ = os.getenv("USE_GROQ", "false").lower() == "true"
+GROQ_MODEL = os.getenv("GROQ_MODEL", "whisper-large-v3-turbo")
+ENABLE_VAD = os.getenv("ENABLE_VAD", "true").lower() == "true"
+VAD_THRESHOLD = float(os.getenv("VAD_THRESHOLD", "0.3"))
 # =====================================================
+# STATE MANAGEMENT - Quản lý trạng thái hội thoại liền mạch
 # =====================================================
+@dataclass
 class ConversationState:
+    """Quản lý trạng thái hội thoại"""
+    messages: list = field(default_factory=list)
+    last_audio_time: float = field(default_factory=time.time)
+    is_listening: bool = False
+    vad_confidence: float = 0.0
+    conversation_context: str = ""
+    whisper_model: str = field(default_factory=lambda: os.getenv("WHISPER_MODEL", "base"))
+    language: str = field(default_factory=lambda: ASR_LANGUAGE_HINT)
+    current_audio_path: Optional[str] = None
     def add_message(self, role: str, content: str):
         """Thêm message vào hội thoại"""
         self.messages.append({
         # Giới hạn lịch sử
         if len(self.messages) > 20:
             self.messages = self.messages[-20:]
+        # Cập nhật context
+        self._update_context()
+    def _update_context(self):
+        """Cập nhật context từ hội thoại"""
+        if not self.messages:
+            self.conversation_context = ""
+            return
+        context_parts = []
+        for msg in self.messages[-5:]:  # Giữ 5 message gần nhất
+            prefix = "User" if msg["role"] == "user" else "Assistant"
+            context_parts.append(f"{prefix}: {msg['content'][:200]}")  # Giới hạn độ dài
+        self.conversation_context = "\n".join(context_parts)
+    def get_recent_context(self, num_messages: int = 3) -> str:
+        """Lấy context gần đây"""
+        if not self.messages or num_messages <= 0:
+            return ""
+        recent = self.messages[-num_messages:] if len(self.messages) >= num_messages else self.messages
+        return "\n".join([f"{m['role']}: {m['content']}" for m in recent])
     def reset(self):
         """Reset trạng thái hội thoại"""
         self.messages = []
+        self.conversation_context = ""
+        self.is_listening = False
+        self.vad_confidence = 0.0
+        self.current_audio_path = None
 # Khởi tạo state
 state = ConversationState()
 # =====================================================
+# INITIALISIERUNG (global)
 # =====================================================
+print("📚 Lade Dokumente…")
+docs = load_all_documents()
+print("🔪 Splitte Dokumente…")
+chunks = split_documents(docs)
+print("🔍 Erstelle VectorStore…")
+vs = build_vectorstore(chunks)
+print("🔎 Erzeuge Retriever…")
+retriever = get_retriever(vs)
+print("🤖 Lade LLM…")
+llm = load_llm()
+# Dokument-Metadaten für UI
+pdf_meta = next(d.metadata for d in docs if d.metadata.get("type") == "pdf")
+hg_meta = next(d.metadata for d in docs if d.metadata.get("type") == "hg")
+hg_url = hg_meta.get("viewer_url")
+# =====================================================
+# VOICE ACTIVITY DETECTION
+# =====================================================
+def handle_voice_activity(audio_data: Optional[np.ndarray], sample_rate: int) -> Dict[str, Any]:
+    """Xử lý phát hiện hoạt động giọng nói"""
+    if audio_data is None or len(audio_data) == 0:
+        return {"is_speech": False, "confidence": 0.0, "status": "No audio data"}
     try:
+        vad_result = detect_voice_activity(audio_data, sample_rate, threshold=VAD_THRESHOLD)
+        # Cập nhật state
+        state.is_listening = vad_result["is_speech"]
+        if vad_result["is_speech"]:
+            state.last_audio_time = time.time()
+            state.vad_confidence = vad_result["confidence"]
+        return {
+            "is_speech": vad_result["is_speech"],
+            "confidence": vad_result["confidence"],
+            "status": f"Speech detected: {vad_result['is_speech']} (conf: {vad_result['confidence']:.2f})"
+        }
     except Exception as e:
+        print(f"VAD error: {e}")
+        return {"is_speech": False, "confidence": 0.0, "status": f"VAD error: {e}"}
+# =====================================================
+# TRANSCRIBE WITH OPTIMIZED PIPELINE
+# =====================================================
+def transcribe_audio_optimized(audio_path: str, language: Optional[str] = None) -> str:
+    """Transcribe audio với pipeline tối ưu"""
+    if not audio_path or not os.path.exists(audio_path):
+        return ""
+    if USE_GROQ and GROQ_MODEL:
+        print("Using Groq for transcription...")
+        return transcribe_with_groq(audio_path, language=language)
     else:
+        return transcribe_audio(audio_path, language=language)
+# =====================================================
+# CONVERSATIONAL INTELLIGENCE
+# =====================================================
+def enhance_conversation_context(user_input: str, history: list) -> str:
+    """Tăng cường context hội thoại"""
+    if not user_input:
+        return user_input
+    # Thêm context đơn giản từ history
+    if history and len(history) > 0:
+        # Lấy 3 tin nhắn gần nhất từ history
+        recent_history = history[-3:] if len(history) >= 3 else history
+        context_parts = ["Previous conversation:"]
+        for msg in recent_history:
+            role = "User" if msg.get("role") == "user" else "Assistant"
+            content = msg.get("content", "")[:100]  # Giới hạn độ dài
+            context_parts.append(f"{role}: {content}")
+        context = "\n".join(context_parts)
+        return f"{context}\n\nCurrent question: {user_input}"
+    return user_input
+# =====================================================
+# Quellen formatieren – Markdown für Chat
+# =====================================================
 def format_sources(src):
     if not src:
         return ""
     return "\n".join(out)
+# =====================================================
+# CORE CHAT-FUNKTION với tất cả tính năng mới
+# =====================================================
+def chat_fn(text_input, audio_path, history, lang_sel, use_vad):
+    """
+    Main chat function với xử lý VAD và transcription
+    """
+    print(f"DEBUG: chat_fn called - text_input: '{text_input}', audio_path: {audio_path}, history length: {len(history) if history else 0}")
+    # Khởi tạo history nếu None
+    if history is None:
+        history = []
+    # Biến để lưu text cần xử lý
+    text_to_process = ""
+    # Xử lý audio nếu có
+    if audio_path and os.path.exists(audio_path):
+        print(f"DEBUG: Processing audio file: {audio_path}")
+        # Lưu đường dẫn audio vào state
+        state.current_audio_path = audio_path
+        # Kiểm tra VAD nếu được bật
+        if use_vad and ENABLE_VAD:
+            try:
+                import soundfile as sf
+                audio_data, sample_rate = sf.read(audio_path)
+                print(f"DEBUG: Audio loaded - shape: {audio_data.shape}, sample_rate: {sample_rate}")
+                vad_result = handle_voice_activity(audio_data, sample_rate)
+                print(f"DEBUG: VAD result: {vad_result}")
+                # Nếu VAD phát hiện có giọng nói, hoặc nếu VAD không bật, tiến hành transcribe
+                if vad_result.get("is_speech", True):
+                    # Transcribe audio
+                    transcribed_text = transcribe_audio_optimized(audio_path, language=lang_sel)
+                    if transcribed_text and transcribed_text.strip():
+                        text_to_process = transcribed_text.strip()
+                        print(f"DEBUG: Transcribed text: {text_to_process}")
+                else:
+                    print("DEBUG: VAD detected no speech, skipping transcription")
+            except Exception as e:
+                print(f"DEBUG: Error in VAD/transcription: {e}")
+                # Fallback: transcribe ngay cả khi có lỗi
+                transcribed_text = transcribe_audio_optimized(audio_path, language=lang_sel)
+                if transcribed_text and transcribed_text.strip():
+                    text_to_process = transcribed_text.strip()
+        else:
+            # Nếu VAD không bật, transcribe trực tiếp
+            transcribed_text = transcribe_audio_optimized(audio_path, language=lang_sel)
+            if transcribed_text and transcribed_text.strip():
+                text_to_process = transcribed_text.strip()
+                print(f"DEBUG: Transcribed text (no VAD): {text_to_process}")
+    # Nếu có text input từ textbox, ưu tiên sử dụng nó
+    if text_input and text_input.strip():
+        text_to_process = text_input.strip()
+        print(f"DEBUG: Using text input: {text_to_process}")
+    # Nếu không có gì để xử lý
+    if not text_to_process:
+        print("DEBUG: No text to process")
+        # Trả về history hiện tại và status
+        status_text = f"Bereit | VAD: {'On' if use_vad and ENABLE_VAD else 'Off'} | Model: {state.whisper_model}"
+        if history is None:
+            history = []
+        return history, "", None, status_text
+    print(f"DEBUG: Processing text: {text_to_process}")
+    # Tăng cường context cho câu hỏi
+    enhanced_question = enhance_conversation_context(text_to_process, history)
+    try:
+        # RAG-Antwort berechnen
+        ans, sources = answer(enhanced_question, retriever, llm)
+        bot_msg = ans + format_sources(sources)
+        # Thêm vào state
+        state.add_message("user", text_to_process)
+        state.add_message("assistant", ans)
+        # History aktualisieren (ChatGPT-Style)
+        history.append({"role": "user", "content": text_to_process})
+        history.append({"role": "assistant", "content": bot_msg})
+        print(f"DEBUG: Answer generated, history length: {len(history)}")
+    except Exception as e:
+        print(f"DEBUG: Error in RAG pipeline: {e}")
+        # Fallback response
+        error_msg = "Entschuldigung, es gab einen Fehler bei der Verarbeitung Ihrer Anfrage. Bitte versuchen Sie es erneut."
+        history.append({"role": "user", "content": text_to_process})
+        history.append({"role": "assistant", "content": error_msg})
+    status_text = f"Bereit | VAD: {'On' if use_vad and ENABLE_VAD else 'Off'} | Model: {state.whisper_model}"
+    return history, "", None, status_text
+# =====================================================
+# FUNCTIONS FOR UI CONTROLS
+# =====================================================
+def toggle_vad(use_vad):
+    """Toggle Voice Activity Detection"""
+    global ENABLE_VAD
+    ENABLE_VAD = use_vad
+    status = "EIN" if use_vad else "AUS"
+    return f"Voice Activity Detection: {status} | Model: {state.whisper_model}"
+def change_whisper_model(model_size):
+    """Đổi Whisper model"""
+    state.whisper_model = model_size
+    os.environ["WHISPER_MODEL"] = model_size
+    return f"Whisper Model: {model_size} | VAD: {'On' if ENABLE_VAD else 'Off'}"
 def clear_conversation():
     """Xóa hội thoại"""
     state.reset()
+    return [], "Konversation gelöscht | Bereit"
+def update_vad_indicator():
+    """Cập nhật VAD indicator"""
+    if state.is_listening:
+        indicator_html = """
+        <div style="display: flex; align-items: center; gap: 8px;">
+            <div style="width: 12px; height: 12px; border-radius: 50%; background-color: #10b981; box-shadow: 0 0 10px #10b981; animation: pulse 1.5s infinite;"></div>
+            <span style="color: #10b981; font-weight: bold;">Sprache erkannt</span>
+        </div>
+        <style>
+        @keyframes pulse {
+            0% { opacity: 0.7; }
+            50% { opacity: 1; }
+            100% { opacity: 0.7; }
+        }
+        </style>
+        """
+    else:
+        indicator_html = """
+        <div style="display: flex; align-items: center; gap: 8px;">
+            <div style="width: 12px; height: 12px; border-radius: 50%; background-color: #6b7280;"></div>
+            <span>Bereit</span>
+        </div>
+        """
+    return indicator_html
+# =====================================================
+# AUDIO STREAMING HANDLER
+# =====================================================
+def handle_audio_stream(audio_path, use_vad):
+    """Xử lý audio streaming real-time"""
+    if not audio_path or not os.path.exists(audio_path):
+        return "", update_vad_indicator(), "Keine Audiodatei"
+    try:
+        import soundfile as sf
+        audio_data, sample_rate = sf.read(audio_path)
+        # Cập nhật VAD indicator
+        vad_html = update_vad_indicator()
+        if use_vad and ENABLE_VAD:
+            vad_result = handle_voice_activity(audio_data, sample_rate)
+            if vad_result.get("is_speech", False):
+                # Nếu phát hiện giọng nói, transcribe
+                text = transcribe_audio_optimized(audio_path, language=state.language)
+                status = f"Sprache erkannt ({vad_result.get('confidence', 0):.2f})"
+                return text, vad_html, status
+            else:
+                status = "Keine Sprache erkannt"
+                return "", vad_html, status
+        else:
+            # Nếu VAD không bật, vẫn transcribe nhưng hiển thị trạng thái khác
+            text = transcribe_audio_optimized(audio_path, language=state.language)
+            status = "Transkription (VAD aus)"
+            return text, vad_html, status
+    except Exception as e:
+        print(f"Error in audio stream handler: {e}")
+        return "", update_vad_indicator(), f"Fehler: {str(e)[:50]}"
+# =====================================================
+# TTS FUNCTION
+# =====================================================
+def read_last_answer(history):
     """Đọc câu trả lời cuối cùng"""
     if not history:
+        print("DEBUG: No history for TTS")
+        return None
+    # Tìm câu trả lời cuối cùng của assistant
+    for msg in reversed(history):
+        if isinstance(msg, dict) and msg.get("role") == "assistant":
+            content = msg.get("content", "")
+            # Loại bỏ phần sources từ câu trả lời
+            if "## 📚 Quellen" in content:
+                content = content.split("## 📚 Quellen")[0].strip()
+            print(f"DEBUG: Synthesizing speech for: {content[:100]}...")
+            audio_result = synthesize_speech(content)
             if audio_result:
+                print("DEBUG: TTS successful")
+                return audio_result
+    print("DEBUG: No assistant message found for TTS")
+    return None
 # =====================================================
+# UI – GRADIO với tất cả tính năng mới
 # =====================================================
+with gr.Blocks(title="Prüfungsrechts-Chatbot (RAG + Sprache) - Enhanced") as demo:
+    # CSS Styling nâng cao
     gr.HTML("""
     <style>
     .gradio-container {
+        max-width: 1200px;
         margin: 0 auto;
         padding: 20px;
+        font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
     }
     .header {
         margin-bottom: 30px;
         padding: 20px;
         background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        border-radius: 15px;
         color: white;
     }
+    .control-panel {
         background: #f8f9fa;
+        padding: 20px;
+        border-radius: 15px;
         margin-bottom: 20px;
         border: 1px solid #e2e8f0;
     }
+    .chat-container {
         background: white;
+        border-radius: 15px;
+        padding: 20px;
+        box-shadow: 0 4px 20px rgba(0,0,0,0.1);
+        margin-bottom: 20px;
     }
     .input-row {
+        background: #f8fafc;
+        border-radius: 25px;
+        padding: 10px 20px;
+        border: 2px solid #e2e8f0;
+        transition: all 0.3s ease;
         display: flex;
         align-items: center;
+        gap: 10px;
     }
+    .input-row:focus-within {
+        border-color: #667eea;
+        box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
     }
+    .send-btn {
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
+        color: white !important;
+        border: none !important;
+        border-radius: 50% !important;
+        width: 44px !important;
+        height: 44px !important;
+        display: flex !important;
+        align-items: center !important;
+        justify-content: center !important;
+        cursor: pointer !important;
     }
+    .send-btn:hover {
+        transform: scale(1.05);
+        box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4) !important;
     }
+    .vad-indicator-container {
+        padding: 10px;
+        background: #f1f5f9;
+        border-radius: 10px;
+        margin: 10px 0;
+        display: flex;
+        align-items: center;
+        gap: 10px;
     }
+    .feature-badge {
+        display: inline-block;
+        padding: 4px 12px;
+        background: #e0e7ff;
+        color: #4f46e5;
+        border-radius: 20px;
+        font-size: 12px;
+        font-weight: 500;
+        margin: 2px;
+    }
+    .chatbot {
+        min-height: 400px;
+        max-height: 500px;
+        overflow-y: auto;
+    }
+    /* Responsive design */
+    @media (max-width: 768px) {
+        .gradio-container {
+            padding: 10px;
+        }
+        .input-row {
+            flex-direction: column;
+            gap: 10px;
+        }
+        .send-btn {
+            width: 100% !important;
+            height: 44px !important;
+            border-radius: 10px !important;
+        }
     }
     </style>
     """)
+    # Header
     with gr.Column(elem_classes=["header"]):
         gr.Markdown("# 🧑‍⚖️ Prüfungsrechts-Chatbot")
+        gr.Markdown("### Intelligent Voice Interface with Advanced Features")
+        # Feature badges
+        gr.HTML("""
+        <div style="text-align: center; margin: 10px 0;">
+            <span class="feature-badge">🎤 Voice Activity Detection</span>
+            <span class="feature-badge">⚡ Fast Transcription</span>
+            <span class="feature-badge">🧠 Conversational AI</span>
+            <span class="feature-badge">📚 Document RAG</span>
+        </div>
+        """)
+    # Control Panel
+    with gr.Column(elem_classes=["control-panel"]):
         with gr.Row():
+            with gr.Column(scale=2):
+                # Model Selection
+                model_selector = gr.Dropdown(
+                    choices=["tiny", "base", "small", "medium"],
+                    value=state.whisper_model,
+                    label="Whisper Model",
+                    info="Wählen Sie das Modell für Spracherkennung"
                 )
+                # VAD Control
+                vad_toggle = gr.Checkbox(
+                    value=ENABLE_VAD,
+                    label="Voice Activity Detection aktivieren",
+                    info="Automatische Spracherkennung"
                 )
+                # Language Selection
+                lang_selector = gr.Dropdown(
+                    choices=["de", "en", "auto"],
+                    value=ASR_LANGUAGE_HINT,
+                    label="Spracherkennung Sprache"
                 )
+            with gr.Column(scale=1):
+                # Status Display
+                status_display = gr.Textbox(
+                    label="System Status",
+                    value="Bereit",
+                    interactive=False
                 )
+                # Clear Conversation Button
+                clear_btn = gr.Button("🗑️ Konversation löschen", variant="secondary", size="sm")
+                # VAD Indicator
+                vad_indicator = gr.HTML(value=update_vad_indicator(), label="VAD Status")
+    # Main Chat Interface
+    with gr.Column(elem_classes=["chat-container"]):
+        # Chatbot Display
+        chatbot = gr.Chatbot(
+            label="Konversation",
+            height=400,
+            avatar_images=(None, "🤖")
+        )
+        # Input Row với VAD Indicator
+        with gr.Row(elem_classes=["input-row"]):
+            # Text Input
+            chat_text = gr.Textbox(
+                label=None,
+                placeholder="Stellen Sie eine Frage oder sprechen Sie ins Mikrofon...",
+                lines=1,
+                max_lines=4,
+                scale=8,
+                container=False,
+                show_label=False
+            )
+            # Audio Input
+            chat_audio = gr.Audio(
+                sources=["microphone"],
+                type="filepath",
+                format="wav",
+                streaming=True,
+                interactive=True,
+                show_label=False,
+                scale=1,
+                elem_id="audio-input"
+            )
+            # Send Button
+            send_btn = gr.Button("➤", variant="primary", elem_classes=["send-btn"], scale=1)
+        # TTS Controls
+        with gr.Row():
+            tts_btn = gr.Button("🔊 Antwort vorlesen", variant="secondary", size="sm")
+            tts_audio = gr.Audio(label="Audio Ausgabe", interactive=False, visible=False)
+            tts_status = gr.Textbox(label="TTS Status", interactive=False, visible=False)
+    # Documents Section
+    with gr.Accordion("📚 Quellen & Dokumente", open=False):
+        with gr.Tabs():
+            with gr.TabItem("📄 Prüfungsordnung (PDF)"):
+                PDF(pdf_meta["pdf_url"], height=300)
+            with gr.TabItem("📘 Hochschulgesetz NRW"):
+                if isinstance(hg_url, str) and hg_url.startswith("http"):
+                    gr.Markdown(f"### [Im Viewer öffnen]({hg_url})")
+                    gr.HTML(f'<iframe src="{hg_url}" width="100%" height="500px" style="border: 1px solid #ddd; border-radius: 8px;"></iframe>')
+                else:
+                    gr.Markdown("Viewer-Link nicht verfügbar.")
     # =====================================================
     # EVENT HANDLERS
     # =====================================================
+    # Model Selection
+    model_selector.change(
+        change_whisper_model,
+        inputs=[model_selector],
+        outputs=[status_display]
     )
+    # VAD Toggle
+    vad_toggle.change(
+        toggle_vad,
+        inputs=[vad_toggle],
+        outputs=[status_display]
     )
+    # Clear Conversation
+    clear_btn.click(
+        clear_conversation,
+        outputs=[chatbot, status_display]
+    ).then(
+        lambda: update_vad_indicator(),
+        outputs=[vad_indicator]
     )
+    # Main Chat Function
+    def process_chat(text_input, audio_path, history, lang_sel, use_vad):
+        """Wrapper function để xử lý chat"""
+        try:
+            return chat_fn(text_input, audio_path, history, lang_sel, use_vad)
+        except Exception as e:
+            print(f"Error in process_chat: {e}")
+            error_msg = f"Fehler: {str(e)}"
+            if history is None:
+                history = []
+            return history, "", None, error_msg
+    # Send Button Click
+    send_btn.click(
+        process_chat,
+        inputs=[chat_text, chat_audio, chatbot, lang_selector, vad_toggle],
+        outputs=[chatbot, chat_text, chat_audio, status_display]
     ).then(
+        lambda: update_vad_indicator(),
+        outputs=[vad_indicator]
+    )
+    # Text Submit (Enter key)
+    chat_text.submit(
+        process_chat,
+        inputs=[chat_text, chat_audio, chatbot, lang_selector, vad_toggle],
+        outputs=[chatbot, chat_text, chat_audio, status_display]
     ).then(
+        lambda: update_vad_indicator(),
+        outputs=[vad_indicator]
     )
+    # Audio Change Handler
+    def on_audio_change(audio_path, use_vad):
+        """Xử lý khi audio thay đổi"""
+        if audio_path:
+            print(f"DEBUG: Audio changed: {audio_path}")
+            # Xử lý streaming
+            text, vad_html, status = handle_audio_stream(audio_path, use_vad)
+            return text, vad_html, status
+        return "", update_vad_indicator(), "Bereit"
+    chat_audio.change(
+        on_audio_change,
+        inputs=[chat_audio, vad_toggle],
+        outputs=[chat_text, vad_indicator, status_display]
     )
+    # Audio Streaming
+    chat_audio.stream(
+        on_audio_change,
+        inputs=[chat_audio, vad_toggle],
+        outputs=[chat_text, vad_indicator, status_display]
+    )
+    # TTS Button
+    def handle_tts(history):
+        """Xử lý TTS"""
+        audio_result = read_last_answer(history)
+        if audio_result:
+            return audio_result, "Audio wird abgespielt..."
+        return None, "Keine Antwort zum Vorlesen gefunden"
     tts_btn.click(
+        handle_tts,
         inputs=[chatbot],
         outputs=[tts_audio, tts_status]
     ).then(
     )
 if __name__ == "__main__":
+    demo.queue().launch(ssr_mode=False, show_error=True)