File size: 5,157 Bytes
ed084d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# app.py – SUPABASE RAG CHATBOT (Docker + Ollama)

import gradio as gr

from load_documents import load_documents, PDF_URL, HG_HTML_URL
from split_documents import split_documents
from vectorstore import build_vectorstore
from retriever import get_retriever
from llm import load_llm
from rag_pipeline import answer
from speech_io import transcribe_audio, synthesize_speech

# ================= INITIALISIERUNG =====================

print("🔹 Lade Dokumente aus Supabase …")
_docs = load_documents()

print("🔹 Splitte Dokumente …")
_chunks = split_documents(_docs)

print("🔹 Baue VectorStore …")
_vs = build_vectorstore(_chunks)

print("🔹 Erzeuge Retriever …")
_retriever = get_retriever(_vs)

print("🔹 Lade LLM (Ollama) …")
_llm = load_llm()


# ================= Quellen Markdown ====================

def format_sources_markdown(sources):
    if not sources:
        return ""

    lines = ["", "### 📚 Quellen (verwendete Dokumentstellen):"]

    for s in sources:
        sid = s["id"]
        src = s["source"]
        page = s["page"]
        url = s["url"]
        snippet = s["snippet"]

        if page:
            title = f"Quelle {sid}{src}, Seite {page}"
        else:
            title = f"Quelle {sid}{src}"

        if url:
            base = f"- [{title}]({url})"
        else:
            base = f"- {title}"

        lines.append(base)
        if snippet:
            lines.append(f"  > {snippet}")

    return "\n".join(lines)


# ================= TEXT CHATBOT ========================

def chatbot_text(user_message, history):
    if not user_message:
        return history, ""

    answer_text, sources = answer(
        question=user_message,
        retriever=_retriever,
        chat_model=_llm,
    )

    quellen_block = format_sources_markdown(sources)
    bot_msg = answer_text + "\n\n" + quellen_block

    history = history + [
        {"role": "user", "content": user_message},
        {"role": "assistant", "content": bot_msg},
    ]

    return history, ""


# ================= VOICE CHATBOT =======================

def chatbot_voice(audio_path, history):
    text = transcribe_audio(audio_path)
    if not text:
        return history, None, ""

    history = history + [{"role": "user", "content": text}]

    answer_text, sources = answer(
        question=text,
        retriever=_retriever,
        chat_model=_llm,
    )

    quellen_block = format_sources_markdown(sources)
    bot_msg = answer_text + "\n\n" + quellen_block

    history = history + [{"role": "assistant", "content": bot_msg}]

    audio = synthesize_speech(bot_msg)
    return history, audio, ""


def read_last_answer(history):
    if not history:
        return None
    for msg in reversed(history):
        if msg["role"] == "assistant":
            return synthesize_speech(msg["content"])
    return None


# ================= UI (Gradio) =========================

with gr.Blocks(title="Prüfungsrechts-Chatbot (Supabase + Ollama)") as demo:

    gr.Markdown("# 🧑‍⚖️ Prüfungsrechts-Chatbot (Supabase RAG, Ollama)")
    gr.Markdown("Fragen zum Prüfungsrecht? Text oder Mikrofon möglich.")

    with gr.Row():

        # ---------- CHAT ----------
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(
                type="messages",
                label="Chat",
                height=550,
            )

            msg = gr.Textbox(
                label="Frage eingeben",
                placeholder="Stelle deine Frage zum Prüfungsrecht …",
                autofocus=True,
            )
            msg.submit(chatbot_text, [msg, chatbot], [chatbot, msg])

            send_btn = gr.Button("Senden (Text)")
            send_btn.click(chatbot_text, [msg, chatbot], [chatbot, msg])

            gr.Markdown("### 🎙️ Spracheingabe")
            voice_in = gr.Audio(sources=["microphone"], type="filepath")
            voice_out = gr.Audio(label="Vorgelesene Antwort", type="numpy")

            send_voice_btn = gr.Button("Sprechen & Senden")
            send_voice_btn.click(
                chatbot_voice,
                [voice_in, chatbot],
                [chatbot, voice_out, msg],
            )

            read_btn = gr.Button("Antwort erneut vorlesen")
            read_btn.click(read_last_answer, [chatbot], [voice_out])

            clear_btn = gr.Button("Chat löschen")
            clear_btn.click(lambda: [], None, chatbot)

        # ---------- VIEWER ----------
        with gr.Column(scale=1):
            gr.Markdown("### 📄 Prüfungsordnung (PDF)")
            gr.HTML(
                f"""
                <iframe src="{PDF_URL}"
                        style="width:100%; height:330px; border:none;">
                </iframe>
                """
            )

            gr.Markdown("### 📘 Hochschulgesetz NRW (Paragraph-Viewer)")
            gr.HTML(
                f"""
                <iframe src="{HG_HTML_URL}"
                        style="width:100%; height:330px; border:none;">
                </iframe>
                """
            )

if __name__ == "__main__":
    demo.queue().launch(ssr_mode=False, show_error=True)