Spaces:

Nguyen5
/

chatbot

Sleeping

App Files Files Community

Nguyen5 commited on Dec 4, 2025

Commit

c151194

1 Parent(s): 0fff3f2

commit

Browse files

Files changed (1) hide show

app.py +71 -59

app.py CHANGED Viewed

@@ -1,44 +1,52 @@
-# app.py – Prüfungsrechts-Chatbot mit OpenAI (Supabase RAG)
 import gradio as gr
-from load_documents import load_documents, PDF_URL, HG_HTML_URL
 from split_documents import split_documents
 from vectorstore import build_vectorstore
 from retriever import get_retriever
 from llm import load_llm
-from rag_pipeline import answer
 from speech_io import transcribe_audio, synthesize_speech
 # =====================================================
-# INITIALISIERUNG (beim Start der Space einmalig)
 # =====================================================
-print("🔹 Lade Dokumente aus Supabase …")
 _docs = load_documents()
-print("🔹 Splitte Dokumente …")
 _chunks = split_documents(_docs)
-print("🔹 Baue VectorStore …")
 _vs = build_vectorstore(_chunks)
-print("🔹 Erzeuge Retriever …")
 _retriever = get_retriever(_vs)
-print("🔹 Lade OpenAI LLM …")
 _llm = load_llm()
 # =====================================================
-# Quellen formatieren – Markdown im Chat
 # =====================================================
 def format_sources_markdown(sources):
     if not sources:
         return ""
-    lines = ["", "### 📚 Quellen (verwendete Dokumentstellen):"]
     for s in sources:
         sid = s["id"]
         src = s["source"]
@@ -46,17 +54,18 @@ def format_sources_markdown(sources):
         url = s["url"]
         snippet = s["snippet"]
-        if page:
-            title = f"Quelle {sid} – {src}, Seite {page}"
-        else:
-            title = f"Quelle {sid} – {src}"
         if url:
             base = f"- [{title}]({url})"
         else:
             base = f"- {title}"
         lines.append(base)
         if snippet:
             lines.append(f"  > {snippet}")
@@ -77,11 +86,10 @@ def chatbot_text(user_message, history):
     )
     quellen_block = format_sources_markdown(sources)
-    bot_msg = answer_text + "\n\n" + quellen_block
     history = history + [
         {"role": "user", "content": user_message},
-        {"role": "assistant", "content": bot_msg},
     ]
     return history, ""
@@ -91,29 +99,32 @@ def chatbot_text(user_message, history):
 # =====================================================
 def chatbot_voice(audio_path, history):
     text = transcribe_audio(audio_path)
     if not text:
         return history, None, ""
     history = history + [{"role": "user", "content": text}]
     answer_text, sources = answer(
         question=text,
         retriever=_retriever,
         chat_model=_llm,
     )
     quellen_block = format_sources_markdown(sources)
-    bot_msg = answer_text + "\n\n" + quellen_block
     history = history + [{"role": "assistant", "content": bot_msg}]
     audio = synthesize_speech(bot_msg)
     return history, audio, ""
 # =====================================================
-# Wieder-Vorlesen der letzten Antwort
 # =====================================================
 def read_last_answer(history):
@@ -123,78 +134,79 @@ def read_last_answer(history):
     for msg in reversed(history):
         if msg["role"] == "assistant":
             return synthesize_speech(msg["content"])
     return None
 # =====================================================
-# UI (Gradio)
 # =====================================================
-with gr.Blocks(title="Prüfungsrechts-Chatbot (Supabase + OpenAI)") as demo:
-    gr.Markdown("# 🧑‍⚖️ Prüfungsrechts-Chatbot (Supabase RAG + OpenAI)")
     gr.Markdown(
-        "Fragen zum Prüfungsrecht (Prüfungsordnung + Hochschulgesetz NRW). "
-        "Antworten mit Quellenangabe und Direktlinks."
     )
     with gr.Row():
-        # ---------- LINKER BEREICH: CHAT ----------
         with gr.Column(scale=2):
-            chatbot = gr.Chatbot(
-                label="Chat",
-                height=550,
-            )
             msg = gr.Textbox(
                 label="Frage eingeben",
                 placeholder="Stelle deine Frage zum Prüfungsrecht …",
-                autofocus=True,
             )
-            msg.submit(chatbot_text, [msg, chatbot], [chatbot, msg])
             send_btn = gr.Button("Senden (Text)")
-            send_btn.click(chatbot_text, [msg, chatbot], [chatbot, msg])
             gr.Markdown("### 🎙️ Spracheingabe")
             voice_in = gr.Audio(sources=["microphone"], type="filepath")
             voice_out = gr.Audio(label="Vorgelesene Antwort", type="numpy")
-            send_voice_btn = gr.Button("Sprechen & Senden")
-            send_voice_btn.click(
                 chatbot_voice,
                 [voice_in, chatbot],
-                [chatbot, voice_out, msg],
             )
-            read_btn = gr.Button("Antwort erneut vorlesen")
-            read_btn.click(read_last_answer, [chatbot], [voice_out])
-            clear_btn = gr.Button("Chat löschen")
             clear_btn.click(lambda: [], None, chatbot)
-        # ---------- RECHTER BEREICH: VIEWER ----------
-        with gr.Column(scale=1):
             gr.Markdown("### 📄 Prüfungsordnung (PDF)")
-            gr.HTML(
-                f"""
-                <iframe src="{PDF_URL}"
-                        style="width:100%; height:330px; border:none;">
-                </iframe>
-                """
-            )
-            gr.Markdown("### 📘 Hochschulgesetz NRW (Paragraph-Viewer)")
             gr.HTML(
-                f"""
-                <iframe src="{HG_HTML_URL}"
-                        style="width:100%; height:330px; border:none;">
-                </iframe>
-                """
             )
 if __name__ == "__main__":
     demo.queue().launch(ssr_mode=False, show_error=True)

+-	app.py:
+# app.py – Prüfungsrechts-Chatbot (RAG + Sprachmodus)
+# Version 26.11 – ohne Modi, stabil für Text + Voice
 import gradio as gr
+from gradio_pdf import PDF
+from huggingface_hub import hf_hub_download
+from load_documents import load_documents, DATASET, PDF_FILE, HTML_FILE
 from split_documents import split_documents
 from vectorstore import build_vectorstore
 from retriever import get_retriever
 from llm import load_llm
+from rag_pipeline import answer, PDF_BASE_URL, LAW_URL
 from speech_io import transcribe_audio, synthesize_speech
 # =====================================================
+# INITIALISIERUNG (global)
 # =====================================================
+print("🔹 Lade Dokumente ...")
 _docs = load_documents()
+print("🔹 Splitte Dokumente ...")
 _chunks = split_documents(_docs)
+print("🔹 Baue VectorStore (FAISS) ...")
 _vs = build_vectorstore(_chunks)
+print("🔹 Erzeuge Retriever ...")
 _retriever = get_retriever(_vs)
+print("🔹 Lade LLM ...")
 _llm = load_llm()
+print("🔹 Lade Dateien für Viewer …")
+_pdf_path = hf_hub_download(DATASET, PDF_FILE, repo_type="dataset")
+_html_path = hf_hub_download(DATASET, HTML_FILE, repo_type="dataset")
 # =====================================================
+# Quellen formatieren – Markdown für Chat
 # =====================================================
 def format_sources_markdown(sources):
     if not sources:
         return ""
+    lines = ["", "**📚 Quellen (genutzte Dokumentstellen):**"]
     for s in sources:
         sid = s["id"]
         src = s["source"]
         url = s["url"]
         snippet = s["snippet"]
+        title = f"Quelle {sid} – {src}"
         if url:
             base = f"- [{title}]({url})"
         else:
             base = f"- {title}"
+        if page and "Prüfungsordnung" in src:
+            base += f", Seite {page}"
         lines.append(base)
         if snippet:
             lines.append(f"  > {snippet}")
     )
     quellen_block = format_sources_markdown(sources)
     history = history + [
         {"role": "user", "content": user_message},
+        {"role": "assistant", "content": answer_text + quellen_block},
     ]
     return history, ""
 # =====================================================
 def chatbot_voice(audio_path, history):
+    # 1. Speech → Text
     text = transcribe_audio(audio_path)
     if not text:
         return history, None, ""
+    # Lưu vào lịch sử chat
     history = history + [{"role": "user", "content": text}]
+    # 2. RAG trả lời
     answer_text, sources = answer(
         question=text,
         retriever=_retriever,
         chat_model=_llm,
     )
     quellen_block = format_sources_markdown(sources)
+    bot_msg = answer_text + quellen_block
     history = history + [{"role": "assistant", "content": bot_msg}]
+    # 3. Text → Speech
     audio = synthesize_speech(bot_msg)
     return history, audio, ""
 # =====================================================
+# LAST ANSWER → TTS
 # =====================================================
 def read_last_answer(history):
     for msg in reversed(history):
         if msg["role"] == "assistant":
             return synthesize_speech(msg["content"])
     return None
 # =====================================================
+# UI – GRADIO
 # =====================================================
+with gr.Blocks(title="Prüfungsrechts-Chatbot (RAG + Sprache)") as demo:
+    gr.Markdown("# 🧑‍⚖️ Prüfungsrechts-Chatbot")
     gr.Markdown(
+        "Dieser Chatbot beantwortet Fragen **ausschließlich** aus der "
+        "Prüfungsordnung (PDF) und dem Hochschulgesetz NRW (Website). "
+        "Du kannst Text eingeben oder direkt ins Mikrofon sprechen."
     )
     with gr.Row():
         with gr.Column(scale=2):
+            chatbot = gr.Chatbot(type="messages", label="Chat", height=500)
             msg = gr.Textbox(
                 label="Frage eingeben",
                 placeholder="Stelle deine Frage zum Prüfungsrecht …",
             )
+            # TEXT SENDEN
+            msg.submit(
+                chatbot_text,
+                [msg, chatbot],
+                [chatbot, msg]
+            )
             send_btn = gr.Button("Senden (Text)")
+            send_btn.click(
+                chatbot_text,
+                [msg, chatbot],
+                [chatbot, msg]
+            )
+            # SPRACHEINGABE
             gr.Markdown("### 🎙️ Spracheingabe")
             voice_in = gr.Audio(sources=["microphone"], type="filepath")
             voice_out = gr.Audio(label="Vorgelesene Antwort", type="numpy")
+            voice_btn = gr.Button("Sprechen & senden")
+            voice_btn.click(
                 chatbot_voice,
                 [voice_in, chatbot],
+                [chatbot, voice_out, msg]
             )
+            read_btn = gr.Button("🔁 Antwort erneut vorlesen")
+            read_btn.click(
+                read_last_answer,
+                [chatbot],
+                [voice_out]
+            )
+            clear_btn = gr.Button("Chat zurücksetzen")
             clear_btn.click(lambda: [], None, chatbot)
+        # =====================
+        # RECHTE SPALTE: Viewer
+        # =====================
+        with gr.Column(scale=1):
             gr.Markdown("### 📄 Prüfungsordnung (PDF)")
+            PDF(_pdf_path, height=350)
+            gr.Markdown("### 📘 Hochschulgesetz NRW (Website)")
             gr.HTML(
+                f'<iframe src="{LAW_URL}" style="width:100%;height:350px;border:none;"></iframe>'
             )
 if __name__ == "__main__":
     demo.queue().launch(ssr_mode=False, show_error=True)