Spaces:

Nguyen5
/

chatbot1

Sleeping

App Files Files Community

Nguyen5 commited on Dec 7, 2025

Commit

7c86ca3

1 Parent(s): b97c143

commit

Browse files

Files changed (1) hide show

app.py +145 -222

app.py CHANGED Viewed

@@ -1,10 +1,8 @@
-# app.py – Prüfungsrechts-Chatbot (RAG + Sprache, UI kiểu ChatGPT)
-#
-import os
 import gradio as gr
-from gradio_pdf import PDF
-from load_documents import load_all_documents
 from split_documents import split_documents
 from vectorstore import build_vectorstore
 from retriever import get_retriever
@@ -12,267 +10,192 @@ from llm import load_llm
 from rag_pipeline import answer
 from speech_io import transcribe_audio, synthesize_speech
-ASR_LANGUAGE_HINT = os.getenv("ASR_LANGUAGE", "auto")
 # =====================================================
-# INITIALISIERUNG (global)
 # =====================================================
-print("📚 Lade Dokumente…")
-docs = load_all_documents()
-print("🔪 Splitte Dokumente…")
-chunks = split_documents(docs)
-print("🔍 Erstelle VectorStore…")
-vs = build_vectorstore(chunks)
-print("🔎 Erzeuge Retriever…")
-retriever = get_retriever(vs)
-print("🤖 Lade LLM…")
-llm = load_llm()
-# Dokument-Metadaten für UI
-pdf_meta = next(d.metadata for d in docs if d.metadata.get("type") == "pdf")
-hg_meta = next(d.metadata for d in docs if d.metadata.get("type") == "hg")
-hg_url = hg_meta.get("viewer_url")
 # =====================================================
-# Quellen formatieren – Markdown für Chat
 # =====================================================
-def format_sources(src):
-    if not src:
         return ""
-    out = ["", "## 📚 Quellen"]
-    for s in src:
-        line = f"- [{s['source']}]({s['url']})"
-        if s.get("page") is not None:
-            line += f" (Seite {s['page']})"
-        out.append(line)
-    return "\n".join(out)
 # =====================================================
-# CORE CHAT-FUNKTION (Text + separates Mikro-Audio)
 # =====================================================
-def chat_fn(text_input, audio_path, history, lang_sel, auto_tts):
-    """
-    text_input: Textbox-Inhalt (str)
-    audio_path: Pfad zu WAV/FLAC vom Mikro (gr.Audio, type="filepath")
-    history: Liste von OpenAI-ähnlichen Messages (role, content)
-    """
-    text = (text_input or "").strip()
-    if audio_path and not text:
-        spoken = transcribe_audio(audio_path, language=lang_sel)
-        text = spoken
-    if not text:
-        # Nichts zu tun
-        return history, None, "", None
-    # 2) RAG-Antwort berechnen
-    ans, sources = answer(text, retriever, llm)
-    bot_msg = ans + format_sources(sources)
-    # 3) History aktualisieren (ChatGPT-Style)
     history = history + [
-        {"role": "user", "content": text},
         {"role": "assistant", "content": bot_msg},
     ]
-    tts_audio = synthesize_speech(bot_msg) if auto_tts else None
-    # 5) Input-Felder leeren
-    return history, tts_audio, "", None, text
 # =====================================================
-# LAST ANSWER → TTS (für Button "Antwort erneut vorlesen")
 # =====================================================
 def read_last_answer(history):
     if not history:
         return None
     for msg in reversed(history):
-        if msg.get("role") == "assistant":
-            return synthesize_speech(msg.get("content", ""))
     return None
 # =====================================================
-# UI – GRADIO
 # =====================================================
-with gr.Blocks(title="Prüfungsrechts-Chatbot (RAG + Sprache)") as demo:
-    gr.HTML(
-        """
-        <style>
-        html, body {height: auto !important; overflow-y: auto !important;}
-        .gradio-container {max-width: 960px; margin: 0 auto; padding: 12px;}
-        * { box-sizing: border-box; }
-        #chat-input-row, .gradio-container, .gradio-container * { max-width: 100%; }
-        #chat-input-row > * { flex: 0 0 auto; }
-        #chat-textbox { flex: 1 1 auto; width: 100%; }
-        .status { font-size: 12px; color: #555; }
-        @media (max-width: 768px) {
-          .gradio-container {max-width: 100%; padding: 8px;}
-          #sidebar {display: none;}
-          #chat-input-row {flex-direction: column; gap: 6px; border-radius: 16px;}
-          #chat-textbox textarea {min-height: 48px;}
-        }
-        /* ChatGPT-like Bottom Bar */
-        #chat-input-row {
-            align-items: center;
-            gap: 8px;
-            padding: 8px 12px;
-            border: 1px solid rgba(0,0,0,0.08);
-            border-radius: 9999px;
-            background: var(--background-primary);
-            box-shadow: 0 2px 6px rgba(0,0,0,0.06);
-        }
-        /* Textbox inside pill */
-        #chat-textbox textarea {
-            min-height: 42px;
-            max-height: 120px;
-            border: none !important;
-            background: transparent !important;
-            box-shadow: none !important;
-            resize: none;
-            padding-left: 0;
-        }
-        /* Icon buttons (plus, mic, send) */
-        .icon-btn, .compact-btn {
-            width: 40px;
-            height: 40px;
-            border-radius: 9999px !important;
-            display: inline-flex;
-            align-items: center;
-            justify-content: center;
-            border: 1px solid rgba(0,0,0,0.08) !important;
-            background: #f7f7f8 !important;
-            box-shadow: none !important;
-        }
-        .send-btn {
-            background: #111 !important;
-            color: #fff !important;
-            border-color: #111 !important;
-        }
-        /* Make audio mic compact */
-        #chat-audio {min-width: 40px;}
-        #chat-audio .wrap, #chat-audio .audio-wrap, #chat-audio .audio-controls {
-            max-width: 40px;
-        }
-        </style>
-        """
-    )
-    gr.Markdown("# 🧑‍⚖️ Prüfungsrechts-Chatbot")
     gr.Markdown(
-        "Dieser Chatbot beantwortet Fragen **ausschließlich** aus der "
-        "Prüfungsordnung (PDF) und dem Hochschulgesetz NRW. "
-        "Sie können Text eingeben oder direkt ins Mikrofon sprechen."
     )
-    with gr.Column():
-        chatbot = gr.Chatbot(label="Chat", height=420)
-        spoken_out = gr.Textbox(label="Gesprochener Text", interactive=False)
-        status_md = gr.Markdown("Bereit", elem_id="status")
-        # Audio-Ausgabe (TTS)
-        voice_out = gr.Audio(label="Vorgelesene Antwort", type="numpy", interactive=False)
-        # Eingabezeile à la ChatGPT: Plus + Text + Mikro + Senden
-        with gr.Row(elem_id="chat-input-row"):
-            attach_btn = gr.UploadButton("＋", file_types=["file"], file_count="multiple", elem_classes=["icon-btn"], scale=1)
-            chat_text = gr.Textbox(
-                elem_id="chat-textbox",
-                label=None,
-                placeholder="Stelle irgendeine Frage — Enter sendet, Shift+Enter neue Zeile",
-                lines=1,
-                max_lines=6,
                 autofocus=True,
-                scale=8,
             )
-            chat_audio = gr.Audio(
-                elem_id="chat-audio",
-                label=None,
-                sources=["microphone"],
-                type="filepath",
-                format="wav",
-                interactive=True,
-                scale=1,
-                show_label=False,
             )
-            send_btn = gr.Button("➤", elem_classes=["compact-btn", "send-btn"], scale=1)
-        lang_dd = gr.Dropdown(choices=["auto","de","en","vi"], value="auto", label="Sprache")
-        mode_radio = gr.Radio(choices=["Audio","Text"], value="Audio", label="Eingabemodus")
-        record_player = gr.Audio(label="Letzte Aufnahme", type="filepath", interactive=False)
-        stop_rec_btn = gr.Button("⏹ Aufnahme löschen")
-        auto_tts_chk = gr.Checkbox(label="Antwort vorlesen", value=False)
-        chat_text.submit(
-            chat_fn,
-            [chat_text, chat_audio, chatbot, lang_dd, auto_tts_chk],
-            [chatbot, voice_out, chat_text, chat_audio, spoken_out],
-        )
-        def transcribe_to_textbox(audio_path, lang):
-            status = "🎙️ Aufnahme beendet – verarbeite Audio…"
-            s = transcribe_audio(audio_path, language=lang)
-            status = "✅ Verarbeitung abgeschlossen"
-            return s, s, audio_path, status
-        chat_audio.change(
-            transcribe_to_textbox,
-            [chat_audio, lang_dd],
-            [chat_text, spoken_out, record_player, status_md],
-        )
-        send_btn.click(
-            chat_fn,
-            [chat_text, chat_audio, chatbot, lang_dd, auto_tts_chk],
-            [chatbot, voice_out, chat_text, chat_audio, spoken_out],
-        )
-        def toggle_mode(m):
-            status = "Audio-Modus aktiv" if m=="Audio" else "Text-Modus aktiv"
-            return gr.update(visible=(m=="Text")), gr.update(visible=(m=="Audio")), status
-        mode_radio.change(toggle_mode, [mode_radio], [chat_text, chat_audio, status_md])
-        def clear_record(p):
-            try:
-                import os
-                if isinstance(p, str) and os.path.exists(p):
-                    os.remove(p)
-            except:
-                pass
-            return None
-        stop_rec_btn.click(clear_record, [record_player], [record_player])
-        # Button: Antwort erneut vorlesen
-        read_btn = gr.Button("🔁 Antwort erneut vorlesen")
-        read_btn.click(
-            read_last_answer,
-            [chatbot],
-            [voice_out],
-        )
-        # Chat löschen
-        clear_btn = gr.Button("Chat zurücksetzen")
-        clear_btn.click(
-            lambda: ([], None, "", None, ""),
-            None,
-            [chatbot, voice_out, chat_text, chat_audio, spoken_out],
-        )
-        # Quellen & Dokumente kompakt unterhalb
-        with gr.Accordion("Quellen & Dokumente", open=False):
             gr.Markdown("### 📄 Prüfungsordnung (PDF)")
-            PDF(pdf_meta["pdf_url"], height=250)
-            gr.Markdown("### 📘 Hochschulgesetz NRW")
-            if isinstance(hg_url, str) and hg_url.startswith("http"):
-                gr.Markdown(f"[Im Viewer öffnen]({hg_url})")
-            else:
-                gr.Markdown("Viewer-Link nicht verfügbar.")
 if __name__ == "__main__":
     demo.queue().launch(ssr_mode=False, show_error=True)

+# app.py – Prüfungsrechts-Chatbot mit OpenAI (Supabase RAG)
 import gradio as gr
+from load_documents import load_documents, PDF_URL, HG_HTML_URL
 from split_documents import split_documents
 from vectorstore import build_vectorstore
 from retriever import get_retriever
 from rag_pipeline import answer
 from speech_io import transcribe_audio, synthesize_speech
 # =====================================================
+# INITIALISIERUNG (beim Start der Space einmalig)
 # =====================================================
+print("🔹 Lade Dokumente aus Supabase …")
+_docs = load_documents()
+print("🔹 Splitte Dokumente …")
+_chunks = split_documents(_docs)
+print("🔹 Baue VectorStore …")
+_vs = build_vectorstore(_chunks)
+print("🔹 Erzeuge Retriever …")
+_retriever = get_retriever(_vs)
+print("🔹 Lade OpenAI LLM …")
+_llm = load_llm()
 # =====================================================
+# Quellen formatieren – Markdown im Chat
 # =====================================================
+def format_sources_markdown(sources):
+    if not sources:
         return ""
+    lines = ["", "### 📚 Quellen (verwendete Dokumentstellen):"]
+    for s in sources:
+        sid = s["id"]
+        src = s["source"]
+        page = s["page"]
+        url = s["url"]
+        snippet = s["snippet"]
+        if page:
+            title = f"Quelle {sid} – {src}, Seite {page}"
+        else:
+            title = f"Quelle {sid} – {src}"
+        if url:
+            base = f"- [{title}]({url})"
+        else:
+            base = f"- {title}"
+        lines.append(base)
+        if snippet:
+            lines.append(f"  > {snippet}")
+    return "\n".join(lines)
 # =====================================================
+# TEXT CHATBOT
 # =====================================================
+def chatbot_text(user_message, history):
+    if not user_message:
+        return history, ""
+    answer_text, sources = answer(
+        question=user_message,
+        retriever=_retriever,
+        chat_model=_llm,
+    )
+    quellen_block = format_sources_markdown(sources)
+    bot_msg = answer_text + "\n\n" + quellen_block
     history = history + [
+        {"role": "user", "content": user_message},
         {"role": "assistant", "content": bot_msg},
     ]
+    return history, ""
+# =====================================================
+# VOICE CHATBOT
+# =====================================================
+def chatbot_voice(audio_path, history):
+    text = transcribe_audio(audio_path)
+    if not text:
+        return history, None, ""
+    history = history + [{"role": "user", "content": text}]
+    answer_text, sources = answer(
+        question=text,
+        retriever=_retriever,
+        chat_model=_llm,
+    )
+    quellen_block = format_sources_markdown(sources)
+    bot_msg = answer_text + "\n\n" + quellen_block
+    history = history + [{"role": "assistant", "content": bot_msg}]
+    audio = synthesize_speech(bot_msg)
+    return history, audio, ""
 # =====================================================
+# Wieder-Vorlesen der letzten Antwort
 # =====================================================
 def read_last_answer(history):
     if not history:
         return None
     for msg in reversed(history):
+        if msg["role"] == "assistant":
+            return synthesize_speech(msg["content"])
     return None
 # =====================================================
+# UI (Gradio)
 # =====================================================
+with gr.Blocks(title="Prüfungsrechts-Chatbot (Supabase + OpenAI)") as demo:
+    gr.Markdown("# 🧑‍⚖️ Prüfungsrechts-Chatbot (Supabase RAG + OpenAI)")
     gr.Markdown(
+        "Fragen zum Prüfungsrecht (Prüfungsordnung + Hochschulgesetz NRW). "
+        "Antworten mit Quellenangabe und Direktlinks."
     )
+    with gr.Row():
+        # ---------- LINKER BEREICH: CHAT ----------
+        with gr.Column(scale=2):
+            chatbot = gr.Chatbot(
+                type="messages",
+                label="Chat",
+                height=550,
+            )
+            msg = gr.Textbox(
+                label="Frage eingeben",
+                placeholder="Stelle deine Frage zum Prüfungsrecht …",
                 autofocus=True,
             )
+            msg.submit(chatbot_text, [msg, chatbot], [chatbot, msg])
+            send_btn = gr.Button("Senden (Text)")
+            send_btn.click(chatbot_text, [msg, chatbot], [chatbot, msg])
+            gr.Markdown("### 🎙️ Spracheingabe")
+            voice_in = gr.Audio(sources=["microphone"], type="filepath")
+            voice_out = gr.Audio(label="Vorgelesene Antwort", type="numpy")
+            send_voice_btn = gr.Button("Sprechen & Senden")
+            send_voice_btn.click(
+                chatbot_voice,
+                [voice_in, chatbot],
+                [chatbot, voice_out, msg],
             )
+            read_btn = gr.Button("Antwort erneut vorlesen")
+            read_btn.click(read_last_answer, [chatbot], [voice_out])
+            clear_btn = gr.Button("Chat löschen")
+            clear_btn.click(lambda: [], None, chatbot)
+        # ---------- RECHTER BEREICH: VIEWER ----------
+        with gr.Column(scale=1):
             gr.Markdown("### 📄 Prüfungsordnung (PDF)")
+            gr.HTML(
+                f"""
+                <iframe src="{PDF_URL}"
+                        style="width:100%; height:330px; border:none;">
+                </iframe>
+                """
+            )
+            gr.Markdown("### 📘 Hochschulgesetz NRW (Paragraph-Viewer)")
+            gr.HTML(
+                f"""
+                <iframe src="{HG_HTML_URL}"
+                        style="width:100%; height:330px; border:none;">
+                </iframe>
+                """
+            )
 if __name__ == "__main__":
     demo.queue().launch(ssr_mode=False, show_error=True)