# app.py – Prüfungsrechts-Chatbot (RAG + Sprachmodus) # Version 26.11 – ohne Modi, stabil für Text + Voice import gradio as gr from gradio_pdf import PDF from huggingface_hub import hf_hub_download from load_documents import load_documents, DATASET, PDF_FILE, HTML_FILE from split_documents import split_documents from vectorstore import build_vectorstore from retriever import get_retriever from llm import load_llm from rag_pipeline import answer, PDF_BASE_URL, LAW_URL from speech_io import transcribe_audio, synthesize_speech # ===================================================== # INITIALISIERUNG (global) # ===================================================== print("🔹 Lade Dokumente ...") _docs = load_documents() print("🔹 Splitte Dokumente ...") _chunks = split_documents(_docs) print("🔹 Baue VectorStore (FAISS) ...") _vs = build_vectorstore(_chunks) print("🔹 Erzeuge Retriever ...") _retriever = get_retriever(_vs) print("🔹 Lade LLM ...") _llm = load_llm() print("🔹 Lade Dateien für Viewer …") _pdf_path = hf_hub_download(DATASET, PDF_FILE, repo_type="dataset") _html_path = hf_hub_download(DATASET, HTML_FILE, repo_type="dataset") # ===================================================== # Quellen formatieren – Markdown für Chat # ===================================================== def format_sources_markdown(sources): if not sources: return "" lines = ["", "**📚 Quellen (genutzte Dokumentstellen):**"] for s in sources: sid = s["id"] src = s["source"] page = s["page"] url = s["url"] snippet = s["snippet"] title = f"Quelle {sid} – {src}" if url: base = f"- [{title}]({url})" else: base = f"- {title}" if page and "Prüfungsordnung" in src: base += f", Seite {page}" lines.append(base) if snippet: lines.append(f" > {snippet}") return "\n".join(lines) # ===================================================== # TEXT CHATBOT # ===================================================== def chatbot_text(user_message, history): if not user_message: return history, "" answer_text, sources = answer( question=user_message, retriever=_retriever, chat_model=_llm, ) quellen_block = format_sources_markdown(sources) history = history + [ {"role": "user", "content": user_message}, {"role": "assistant", "content": answer_text + quellen_block}, ] return history, "" # ===================================================== # VOICE CHATBOT # ===================================================== def chatbot_voice(audio_path, history): # 1. Speech → Text text = transcribe_audio(audio_path) if not text: return history, None, "" # Lưu vào lịch sử chat history = history + [{"role": "user", "content": text}] # 2. RAG trả lời answer_text, sources = answer( question=text, retriever=_retriever, chat_model=_llm, ) quellen_block = format_sources_markdown(sources) bot_msg = answer_text + quellen_block history = history + [{"role": "assistant", "content": bot_msg}] # 3. Text → Speech audio = synthesize_speech(bot_msg) return history, audio, "" # ===================================================== # LAST ANSWER → TTS # ===================================================== def read_last_answer(history): if not history: return None for msg in reversed(history): if msg["role"] == "assistant": return synthesize_speech(msg["content"]) return None # ===================================================== # UI – GRADIO # ===================================================== with gr.Blocks(title="Prüfungsrechts-Chatbot (RAG + Sprache)") as demo: gr.Markdown("# 🧑‍⚖️ Prüfungsrechts-Chatbot") gr.Markdown( "Dieser Chatbot beantwortet Fragen **ausschließlich** aus der " "Prüfungsordnung (PDF) und dem Hochschulgesetz NRW (Website). " "Du kannst Text eingeben oder direkt ins Mikrofon sprechen." ) with gr.Row(): with gr.Column(scale=2): chatbot = gr.Chatbot(label="Chat", height=500) msg = gr.Textbox( label="Frage eingeben", placeholder="Stelle deine Frage zum Prüfungsrecht …", ) # TEXT SENDEN msg.submit( chatbot_text, [msg, chatbot], [chatbot, msg] ) send_btn = gr.Button("Senden (Text)") send_btn.click( chatbot_text, [msg, chatbot], [chatbot, msg] ) # SPRACHEINGABE gr.Markdown("### 🎙️ Spracheingabe") voice_in = gr.Audio(sources=["microphone"], type="filepath") voice_out = gr.Audio(label="Vorgelesene Antwort", type="numpy") voice_btn = gr.Button("Sprechen & senden") voice_btn.click( chatbot_voice, [voice_in, chatbot], [chatbot, voice_out, msg] ) read_btn = gr.Button("🔁 Antwort erneut vorlesen") read_btn.click( read_last_answer, [chatbot], [voice_out] ) clear_btn = gr.Button("Chat zurücksetzen") clear_btn.click(lambda: [], None, chatbot) # ===================== # RECHTE SPALTE: Viewer # ===================== with gr.Column(scale=1): gr.Markdown("### 📄 Prüfungsordnung (PDF)") PDF(_pdf_path, height=350) gr.Markdown("### 📘 Hochschulgesetz NRW (Website)") gr.HTML( f'' ) if __name__ == "__main__": demo.launch()