|
|
|
|
|
|
|
|
import gradio as gr |
|
|
|
|
|
from load_documents import load_documents, PDF_URL, HG_HTML_URL |
|
|
from split_documents import split_documents |
|
|
from vectorstore import build_vectorstore |
|
|
from retriever import get_retriever |
|
|
from llm import load_llm |
|
|
from rag_pipeline import answer |
|
|
from speech_io import transcribe_audio, synthesize_speech |
|
|
|
|
|
|
|
|
|
|
|
print("🔹 Lade Dokumente aus Supabase …") |
|
|
_docs = load_documents() |
|
|
|
|
|
print("🔹 Splitte Dokumente …") |
|
|
_chunks = split_documents(_docs) |
|
|
|
|
|
print("🔹 Baue VectorStore …") |
|
|
_vs = build_vectorstore(_chunks) |
|
|
|
|
|
print("🔹 Erzeuge Retriever …") |
|
|
_retriever = get_retriever(_vs) |
|
|
|
|
|
print("🔹 Lade LLM (Ollama) …") |
|
|
_llm = load_llm() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def format_sources_markdown(sources): |
|
|
if not sources: |
|
|
return "" |
|
|
|
|
|
lines = ["", "### 📚 Quellen (verwendete Dokumentstellen):"] |
|
|
|
|
|
for s in sources: |
|
|
sid = s["id"] |
|
|
src = s["source"] |
|
|
page = s["page"] |
|
|
url = s["url"] |
|
|
snippet = s["snippet"] |
|
|
|
|
|
if page: |
|
|
title = f"Quelle {sid} – {src}, Seite {page}" |
|
|
else: |
|
|
title = f"Quelle {sid} – {src}" |
|
|
|
|
|
if url: |
|
|
base = f"- [{title}]({url})" |
|
|
else: |
|
|
base = f"- {title}" |
|
|
|
|
|
lines.append(base) |
|
|
if snippet: |
|
|
lines.append(f" > {snippet}") |
|
|
|
|
|
return "\n".join(lines) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def chatbot_text(user_message, history): |
|
|
if not user_message: |
|
|
return history, "" |
|
|
|
|
|
answer_text, sources = answer( |
|
|
question=user_message, |
|
|
retriever=_retriever, |
|
|
chat_model=_llm, |
|
|
) |
|
|
|
|
|
quellen_block = format_sources_markdown(sources) |
|
|
bot_msg = answer_text + "\n\n" + quellen_block |
|
|
|
|
|
history = history + [ |
|
|
{"role": "user", "content": user_message}, |
|
|
{"role": "assistant", "content": bot_msg}, |
|
|
] |
|
|
|
|
|
return history, "" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def chatbot_voice(audio_path, history): |
|
|
text = transcribe_audio(audio_path) |
|
|
if not text: |
|
|
return history, None, "" |
|
|
|
|
|
history = history + [{"role": "user", "content": text}] |
|
|
|
|
|
answer_text, sources = answer( |
|
|
question=text, |
|
|
retriever=_retriever, |
|
|
chat_model=_llm, |
|
|
) |
|
|
|
|
|
quellen_block = format_sources_markdown(sources) |
|
|
bot_msg = answer_text + "\n\n" + quellen_block |
|
|
|
|
|
history = history + [{"role": "assistant", "content": bot_msg}] |
|
|
|
|
|
audio = synthesize_speech(bot_msg) |
|
|
return history, audio, "" |
|
|
|
|
|
|
|
|
def read_last_answer(history): |
|
|
if not history: |
|
|
return None |
|
|
for msg in reversed(history): |
|
|
if msg["role"] == "assistant": |
|
|
return synthesize_speech(msg["content"]) |
|
|
return None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="Prüfungsrechts-Chatbot (Supabase + Ollama)") as demo: |
|
|
|
|
|
gr.Markdown("# 🧑⚖️ Prüfungsrechts-Chatbot (Supabase RAG, Ollama)") |
|
|
gr.Markdown("Fragen zum Prüfungsrecht? Text oder Mikrofon möglich.") |
|
|
|
|
|
with gr.Row(): |
|
|
|
|
|
|
|
|
with gr.Column(scale=2): |
|
|
chatbot = gr.Chatbot( |
|
|
type="messages", |
|
|
label="Chat", |
|
|
height=550, |
|
|
) |
|
|
|
|
|
msg = gr.Textbox( |
|
|
label="Frage eingeben", |
|
|
placeholder="Stelle deine Frage zum Prüfungsrecht …", |
|
|
autofocus=True, |
|
|
) |
|
|
msg.submit(chatbot_text, [msg, chatbot], [chatbot, msg]) |
|
|
|
|
|
send_btn = gr.Button("Senden (Text)") |
|
|
send_btn.click(chatbot_text, [msg, chatbot], [chatbot, msg]) |
|
|
|
|
|
gr.Markdown("### 🎙️ Spracheingabe") |
|
|
voice_in = gr.Audio(sources=["microphone"], type="filepath") |
|
|
voice_out = gr.Audio(label="Vorgelesene Antwort", type="numpy") |
|
|
|
|
|
send_voice_btn = gr.Button("Sprechen & Senden") |
|
|
send_voice_btn.click( |
|
|
chatbot_voice, |
|
|
[voice_in, chatbot], |
|
|
[chatbot, voice_out, msg], |
|
|
) |
|
|
|
|
|
read_btn = gr.Button("Antwort erneut vorlesen") |
|
|
read_btn.click(read_last_answer, [chatbot], [voice_out]) |
|
|
|
|
|
clear_btn = gr.Button("Chat löschen") |
|
|
clear_btn.click(lambda: [], None, chatbot) |
|
|
|
|
|
|
|
|
with gr.Column(scale=1): |
|
|
gr.Markdown("### 📄 Prüfungsordnung (PDF)") |
|
|
gr.HTML( |
|
|
f""" |
|
|
<iframe src="{PDF_URL}" |
|
|
style="width:100%; height:330px; border:none;"> |
|
|
</iframe> |
|
|
""" |
|
|
) |
|
|
|
|
|
gr.Markdown("### 📘 Hochschulgesetz NRW (Paragraph-Viewer)") |
|
|
gr.HTML( |
|
|
f""" |
|
|
<iframe src="{HG_HTML_URL}" |
|
|
style="width:100%; height:330px; border:none;"> |
|
|
</iframe> |
|
|
""" |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.queue().launch(ssr_mode=False, show_error=True) |
|
|
|