Spaces:

Nguyen5
/

chatbot2

Runtime error

File size: 5,157 Bytes

ed084d7

# app.py – SUPABASE RAG CHATBOT (Docker + Ollama)

import gradio as gr

from load_documents import load_documents, PDF_URL, HG_HTML_URL
from split_documents import split_documents
from vectorstore import build_vectorstore
from retriever import get_retriever
from llm import load_llm
from rag_pipeline import answer
from speech_io import transcribe_audio, synthesize_speech

# ================= INITIALISIERUNG =====================

print("🔹 Lade Dokumente aus Supabase …")
_docs = load_documents()

print("🔹 Splitte Dokumente …")
_chunks = split_documents(_docs)

print("🔹 Baue VectorStore …")
_vs = build_vectorstore(_chunks)

print("🔹 Erzeuge Retriever …")
_retriever = get_retriever(_vs)

print("🔹 Lade LLM (Ollama) …")
_llm = load_llm()


# ================= Quellen Markdown ====================

def format_sources_markdown(sources):
    if not sources:
        return ""

    lines = ["", "### 📚 Quellen (verwendete Dokumentstellen):"]

    for s in sources:
        sid = s["id"]
        src = s["source"]
        page = s["page"]
        url = s["url"]
        snippet = s["snippet"]

        if page:
            title = f"Quelle {sid} – {src}, Seite {page}"
        else:
            title = f"Quelle {sid} – {src}"

        if url:
            base = f"- [{title}]({url})"
        else:
            base = f"- {title}"

        lines.append(base)
        if snippet:
            lines.append(f"  > {snippet}")

    return "\n".join(lines)


# ================= TEXT CHATBOT ========================

def chatbot_text(user_message, history):
    if not user_message:
        return history, ""

    answer_text, sources = answer(
        question=user_message,
        retriever=_retriever,
        chat_model=_llm,
    )

    quellen_block = format_sources_markdown(sources)
    bot_msg = answer_text + "\n\n" + quellen_block

    history = history + [
        {"role": "user", "content": user_message},
        {"role": "assistant", "content": bot_msg},
    ]

    return history, ""


# ================= VOICE CHATBOT =======================

def chatbot_voice(audio_path, history):
    text = transcribe_audio(audio_path)
    if not text:
        return history, None, ""

    history = history + [{"role": "user", "content": text}]

    answer_text, sources = answer(
        question=text,
        retriever=_retriever,
        chat_model=_llm,
    )

    quellen_block = format_sources_markdown(sources)
    bot_msg = answer_text + "\n\n" + quellen_block

    history = history + [{"role": "assistant", "content": bot_msg}]

    audio = synthesize_speech(bot_msg)
    return history, audio, ""


def read_last_answer(history):
    if not history:
        return None
    for msg in reversed(history):
        if msg["role"] == "assistant":
            return synthesize_speech(msg["content"])
    return None


# ================= UI (Gradio) =========================

with gr.Blocks(title="Prüfungsrechts-Chatbot (Supabase + Ollama)") as demo:

    gr.Markdown("# 🧑‍⚖️ Prüfungsrechts-Chatbot (Supabase RAG, Ollama)")
    gr.Markdown("Fragen zum Prüfungsrecht? Text oder Mikrofon möglich.")

    with gr.Row():

        # ---------- CHAT ----------
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(
                type="messages",
                label="Chat",
                height=550,
            )

            msg = gr.Textbox(
                label="Frage eingeben",
                placeholder="Stelle deine Frage zum Prüfungsrecht …",
                autofocus=True,
            )
            msg.submit(chatbot_text, [msg, chatbot], [chatbot, msg])

            send_btn = gr.Button("Senden (Text)")
            send_btn.click(chatbot_text, [msg, chatbot], [chatbot, msg])

            gr.Markdown("### 🎙️ Spracheingabe")
            voice_in = gr.Audio(sources=["microphone"], type="filepath")
            voice_out = gr.Audio(label="Vorgelesene Antwort", type="numpy")

            send_voice_btn = gr.Button("Sprechen & Senden")
            send_voice_btn.click(
                chatbot_voice,
                [voice_in, chatbot],
                [chatbot, voice_out, msg],
            )

            read_btn = gr.Button("Antwort erneut vorlesen")
            read_btn.click(read_last_answer, [chatbot], [voice_out])

            clear_btn = gr.Button("Chat löschen")
            clear_btn.click(lambda: [], None, chatbot)

        # ---------- VIEWER ----------
        with gr.Column(scale=1):
            gr.Markdown("### 📄 Prüfungsordnung (PDF)")
            gr.HTML(
                f"""
                <iframe src="{PDF_URL}"
                        style="width:100%; height:330px; border:none;">
                </iframe>
                """
            )

            gr.Markdown("### 📘 Hochschulgesetz NRW (Paragraph-Viewer)")
            gr.HTML(
                f"""
                <iframe src="{HG_HTML_URL}"
                        style="width:100%; height:330px; border:none;">
                </iframe>
                """
            )

if __name__ == "__main__":
    demo.queue().launch(ssr_mode=False, show_error=True)