# app.py – SUPABASE RAG CHATBOT (Docker + Ollama) import gradio as gr from load_documents import load_documents, PDF_URL, HG_HTML_URL from split_documents import split_documents from vectorstore import build_vectorstore from retriever import get_retriever from llm import load_llm from rag_pipeline import answer from speech_io import transcribe_audio, synthesize_speech # ================= INITIALISIERUNG ===================== print("🔹 Lade Dokumente aus Supabase …") _docs = load_documents() print("🔹 Splitte Dokumente …") _chunks = split_documents(_docs) print("🔹 Baue VectorStore …") _vs = build_vectorstore(_chunks) print("🔹 Erzeuge Retriever …") _retriever = get_retriever(_vs) print("🔹 Lade LLM (Ollama) …") _llm = load_llm() # ================= Quellen Markdown ==================== def format_sources_markdown(sources): if not sources: return "" lines = ["", "### 📚 Quellen (verwendete Dokumentstellen):"] for s in sources: sid = s["id"] src = s["source"] page = s["page"] url = s["url"] snippet = s["snippet"] if page: title = f"Quelle {sid} – {src}, Seite {page}" else: title = f"Quelle {sid} – {src}" if url: base = f"- [{title}]({url})" else: base = f"- {title}" lines.append(base) if snippet: lines.append(f" > {snippet}") return "\n".join(lines) # ================= TEXT CHATBOT ======================== def chatbot_text(user_message, history): if not user_message: return history, "" answer_text, sources = answer( question=user_message, retriever=_retriever, chat_model=_llm, ) quellen_block = format_sources_markdown(sources) bot_msg = answer_text + "\n\n" + quellen_block history = history + [ {"role": "user", "content": user_message}, {"role": "assistant", "content": bot_msg}, ] return history, "" # ================= VOICE CHATBOT ======================= def chatbot_voice(audio_path, history): text = transcribe_audio(audio_path) if not text: return history, None, "" history = history + [{"role": "user", "content": text}] answer_text, sources = answer( question=text, retriever=_retriever, chat_model=_llm, ) quellen_block = format_sources_markdown(sources) bot_msg = answer_text + "\n\n" + quellen_block history = history + [{"role": "assistant", "content": bot_msg}] audio = synthesize_speech(bot_msg) return history, audio, "" def read_last_answer(history): if not history: return None for msg in reversed(history): if msg["role"] == "assistant": return synthesize_speech(msg["content"]) return None # ================= UI (Gradio) ========================= with gr.Blocks(title="Prüfungsrechts-Chatbot (Supabase + Ollama)") as demo: gr.Markdown("# 🧑‍⚖️ Prüfungsrechts-Chatbot (Supabase RAG, Ollama)") gr.Markdown("Fragen zum Prüfungsrecht? Text oder Mikrofon möglich.") with gr.Row(): # ---------- CHAT ---------- with gr.Column(scale=2): chatbot = gr.Chatbot( type="messages", label="Chat", height=550, ) msg = gr.Textbox( label="Frage eingeben", placeholder="Stelle deine Frage zum Prüfungsrecht …", autofocus=True, ) msg.submit(chatbot_text, [msg, chatbot], [chatbot, msg]) send_btn = gr.Button("Senden (Text)") send_btn.click(chatbot_text, [msg, chatbot], [chatbot, msg]) gr.Markdown("### 🎙️ Spracheingabe") voice_in = gr.Audio(sources=["microphone"], type="filepath") voice_out = gr.Audio(label="Vorgelesene Antwort", type="numpy") send_voice_btn = gr.Button("Sprechen & Senden") send_voice_btn.click( chatbot_voice, [voice_in, chatbot], [chatbot, voice_out, msg], ) read_btn = gr.Button("Antwort erneut vorlesen") read_btn.click(read_last_answer, [chatbot], [voice_out]) clear_btn = gr.Button("Chat löschen") clear_btn.click(lambda: [], None, chatbot) # ---------- VIEWER ---------- with gr.Column(scale=1): gr.Markdown("### 📄 Prüfungsordnung (PDF)") gr.HTML( f""" """ ) gr.Markdown("### 📘 Hochschulgesetz NRW (Paragraph-Viewer)") gr.HTML( f""" """ ) if __name__ == "__main__": demo.queue().launch(ssr_mode=False, show_error=True)