chatbot1 / app.py
Nguyen5's picture
commit
533ef4b
# app.py – Prüfungsrechts-Chatbot (RAG + Sprachmodus)
# Version 26.11 – ohne Modi, stabil für Text + Voice
import gradio as gr
from gradio_pdf import PDF
from huggingface_hub import hf_hub_download
from load_documents import load_documents, DATASET, PDF_FILE, HTML_FILE
from split_documents import split_documents
from vectorstore import build_vectorstore
from retriever import get_retriever
from llm import load_llm
from rag_pipeline import answer, PDF_BASE_URL, LAW_URL
from speech_io import transcribe_audio, synthesize_speech
# =====================================================
# INITIALISIERUNG (global)
# =====================================================
print("🔹 Lade Dokumente ...")
_docs = load_documents()
print("🔹 Splitte Dokumente ...")
_chunks = split_documents(_docs)
print("🔹 Baue VectorStore (FAISS) ...")
_vs = build_vectorstore(_chunks)
print("🔹 Erzeuge Retriever ...")
_retriever = get_retriever(_vs)
print("🔹 Lade LLM ...")
_llm = load_llm()
print("🔹 Lade Dateien für Viewer …")
_pdf_path = hf_hub_download(DATASET, PDF_FILE, repo_type="dataset")
_html_path = hf_hub_download(DATASET, HTML_FILE, repo_type="dataset")
# =====================================================
# Quellen formatieren – Markdown für Chat
# =====================================================
def format_sources_markdown(sources):
if not sources:
return ""
lines = ["", "**📚 Quellen (genutzte Dokumentstellen):**"]
for s in sources:
sid = s["id"]
src = s["source"]
page = s["page"]
url = s["url"]
snippet = s["snippet"]
title = f"Quelle {sid}{src}"
if url:
base = f"- [{title}]({url})"
else:
base = f"- {title}"
if page and "Prüfungsordnung" in src:
base += f", Seite {page}"
lines.append(base)
if snippet:
lines.append(f" > {snippet}")
return "\n".join(lines)
# =====================================================
# TEXT CHATBOT
# =====================================================
def chatbot_text(user_message, history):
if not user_message:
return history, ""
answer_text, sources = answer(
question=user_message,
retriever=_retriever,
chat_model=_llm,
)
quellen_block = format_sources_markdown(sources)
history = history + [
{"role": "user", "content": user_message},
{"role": "assistant", "content": answer_text + quellen_block},
]
return history, ""
# =====================================================
# VOICE CHATBOT
# =====================================================
def chatbot_voice(audio_path, history):
# 1. Speech → Text
text = transcribe_audio(audio_path)
if not text:
return history, None, ""
# Lưu vào lịch sử chat
history = history + [{"role": "user", "content": text}]
# 2. RAG trả lời
answer_text, sources = answer(
question=text,
retriever=_retriever,
chat_model=_llm,
)
quellen_block = format_sources_markdown(sources)
bot_msg = answer_text + quellen_block
history = history + [{"role": "assistant", "content": bot_msg}]
# 3. Text → Speech
audio = synthesize_speech(bot_msg)
return history, audio, ""
# =====================================================
# LAST ANSWER → TTS
# =====================================================
def read_last_answer(history):
if not history:
return None
for msg in reversed(history):
if msg["role"] == "assistant":
return synthesize_speech(msg["content"])
return None
# =====================================================
# UI – GRADIO
# =====================================================
with gr.Blocks(title="Prüfungsrechts-Chatbot (RAG + Sprache)") as demo:
gr.Markdown("# 🧑‍⚖️ Prüfungsrechts-Chatbot")
gr.Markdown(
"Dieser Chatbot beantwortet Fragen **ausschließlich** aus der "
"Prüfungsordnung (PDF) und dem Hochschulgesetz NRW (Website). "
"Du kannst Text eingeben oder direkt ins Mikrofon sprechen."
)
with gr.Row():
with gr.Column(scale=2):
chatbot = gr.Chatbot(label="Chat", height=500)
msg = gr.Textbox(
label="Frage eingeben",
placeholder="Stelle deine Frage zum Prüfungsrecht …",
)
# TEXT SENDEN
msg.submit(
chatbot_text,
[msg, chatbot],
[chatbot, msg]
)
send_btn = gr.Button("Senden (Text)")
send_btn.click(
chatbot_text,
[msg, chatbot],
[chatbot, msg]
)
# SPRACHEINGABE
gr.Markdown("### 🎙️ Spracheingabe")
voice_in = gr.Audio(sources=["microphone"], type="filepath")
voice_out = gr.Audio(label="Vorgelesene Antwort", type="numpy")
voice_btn = gr.Button("Sprechen & senden")
voice_btn.click(
chatbot_voice,
[voice_in, chatbot],
[chatbot, voice_out, msg]
)
read_btn = gr.Button("🔁 Antwort erneut vorlesen")
read_btn.click(
read_last_answer,
[chatbot],
[voice_out]
)
clear_btn = gr.Button("Chat zurücksetzen")
clear_btn.click(lambda: [], None, chatbot)
# =====================
# RECHTE SPALTE: Viewer
# =====================
with gr.Column(scale=1):
gr.Markdown("### 📄 Prüfungsordnung (PDF)")
PDF(_pdf_path, height=350)
gr.Markdown("### 📘 Hochschulgesetz NRW (Website)")
gr.HTML(
f'<iframe src="{LAW_URL}" style="width:100%;height:350px;border:none;"></iframe>'
)
if __name__ == "__main__":
demo.launch()