File size: 6,043 Bytes
3a9ed51 6548bf5 80c3670 3a9ed51 f2e421a 3a9ed51 c411e11 3a9ed51 3dd5086 3a9ed51 fcc2090 c411e11 fcc2090 c411e11 3a9ed51 c411e11 3a9ed51 c411e11 3a9ed51 c411e11 3a9ed51 c411e11 3a9ed51 c411e11 3a9ed51 c411e11 3a9ed51 c411e11 ed0df67 3a9ed51 c411e11 f2e421a 3a9ed51 f2e421a 3a9ed51 b8e573b 3a9ed51 7c86ca3 3a9ed51 7c86ca3 3a9ed51 6548bf5 3a9ed51 c411e11 3a9ed51 c411e11 3a9ed51 c411e11 3a9ed51 c411e11 3a9ed51 80c3670 3a9ed51 80c3670 3a9ed51 ed0df67 c411e11 3a9ed51 c411e11 3a9ed51 80c3670 f2e421a c411e11 3a9ed51 c411e11 3a9ed51 80c3670 3a9ed51 c411e11 3dd5086 6bb0f73 3a9ed51 6bb0f73 3a9ed51 533ef4b 3a9ed51 c411e11 3a9ed51 80c3670 3a9ed51 80c3670 3a9ed51 80c3670 3a9ed51 c411e11 11e64e1 6548bf5 80c3670 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
# app.py – Prüfungsrechts-Chatbot (RAG + Sprachmodus)
# Version 26.11 – ohne Modi, stabil für Text + Voice
import gradio as gr
from gradio_pdf import PDF
from huggingface_hub import hf_hub_download
from load_documents import load_documents, DATASET, PDF_FILE, HTML_FILE
from split_documents import split_documents
from vectorstore import build_vectorstore
from retriever import get_retriever
from llm import load_llm
from rag_pipeline import answer, PDF_BASE_URL, LAW_URL
from speech_io import transcribe_audio, synthesize_speech
# =====================================================
# INITIALISIERUNG (global)
# =====================================================
print("🔹 Lade Dokumente ...")
_docs = load_documents()
print("🔹 Splitte Dokumente ...")
_chunks = split_documents(_docs)
print("🔹 Baue VectorStore (FAISS) ...")
_vs = build_vectorstore(_chunks)
print("🔹 Erzeuge Retriever ...")
_retriever = get_retriever(_vs)
print("🔹 Lade LLM ...")
_llm = load_llm()
print("🔹 Lade Dateien für Viewer …")
_pdf_path = hf_hub_download(DATASET, PDF_FILE, repo_type="dataset")
_html_path = hf_hub_download(DATASET, HTML_FILE, repo_type="dataset")
# =====================================================
# Quellen formatieren – Markdown für Chat
# =====================================================
def format_sources_markdown(sources):
if not sources:
return ""
lines = ["", "**📚 Quellen (genutzte Dokumentstellen):**"]
for s in sources:
sid = s["id"]
src = s["source"]
page = s["page"]
url = s["url"]
snippet = s["snippet"]
title = f"Quelle {sid} – {src}"
if url:
base = f"- [{title}]({url})"
else:
base = f"- {title}"
if page and "Prüfungsordnung" in src:
base += f", Seite {page}"
lines.append(base)
if snippet:
lines.append(f" > {snippet}")
return "\n".join(lines)
# =====================================================
# TEXT CHATBOT
# =====================================================
def chatbot_text(user_message, history):
if not user_message:
return history, ""
answer_text, sources = answer(
question=user_message,
retriever=_retriever,
chat_model=_llm,
)
quellen_block = format_sources_markdown(sources)
history = history + [
{"role": "user", "content": user_message},
{"role": "assistant", "content": answer_text + quellen_block},
]
return history, ""
# =====================================================
# VOICE CHATBOT
# =====================================================
def chatbot_voice(audio_path, history):
# 1. Speech → Text
text = transcribe_audio(audio_path)
if not text:
return history, None, ""
# Lưu vào lịch sử chat
history = history + [{"role": "user", "content": text}]
# 2. RAG trả lời
answer_text, sources = answer(
question=text,
retriever=_retriever,
chat_model=_llm,
)
quellen_block = format_sources_markdown(sources)
bot_msg = answer_text + quellen_block
history = history + [{"role": "assistant", "content": bot_msg}]
# 3. Text → Speech
audio = synthesize_speech(bot_msg)
return history, audio, ""
# =====================================================
# LAST ANSWER → TTS
# =====================================================
def read_last_answer(history):
if not history:
return None
for msg in reversed(history):
if msg["role"] == "assistant":
return synthesize_speech(msg["content"])
return None
# =====================================================
# UI – GRADIO
# =====================================================
with gr.Blocks(title="Prüfungsrechts-Chatbot (RAG + Sprache)") as demo:
gr.Markdown("# 🧑⚖️ Prüfungsrechts-Chatbot")
gr.Markdown(
"Dieser Chatbot beantwortet Fragen **ausschließlich** aus der "
"Prüfungsordnung (PDF) und dem Hochschulgesetz NRW (Website). "
"Du kannst Text eingeben oder direkt ins Mikrofon sprechen."
)
with gr.Row():
with gr.Column(scale=2):
chatbot = gr.Chatbot(label="Chat", height=500)
msg = gr.Textbox(
label="Frage eingeben",
placeholder="Stelle deine Frage zum Prüfungsrecht …",
)
# TEXT SENDEN
msg.submit(
chatbot_text,
[msg, chatbot],
[chatbot, msg]
)
send_btn = gr.Button("Senden (Text)")
send_btn.click(
chatbot_text,
[msg, chatbot],
[chatbot, msg]
)
# SPRACHEINGABE
gr.Markdown("### 🎙️ Spracheingabe")
voice_in = gr.Audio(sources=["microphone"], type="filepath")
voice_out = gr.Audio(label="Vorgelesene Antwort", type="numpy")
voice_btn = gr.Button("Sprechen & senden")
voice_btn.click(
chatbot_voice,
[voice_in, chatbot],
[chatbot, voice_out, msg]
)
read_btn = gr.Button("🔁 Antwort erneut vorlesen")
read_btn.click(
read_last_answer,
[chatbot],
[voice_out]
)
clear_btn = gr.Button("Chat zurücksetzen")
clear_btn.click(lambda: [], None, chatbot)
# =====================
# RECHTE SPALTE: Viewer
# =====================
with gr.Column(scale=1):
gr.Markdown("### 📄 Prüfungsordnung (PDF)")
PDF(_pdf_path, height=350)
gr.Markdown("### 📘 Hochschulgesetz NRW (Website)")
gr.HTML(
f'<iframe src="{LAW_URL}" style="width:100%;height:350px;border:none;"></iframe>'
)
if __name__ == "__main__":
demo.launch()
|