Nguyen5 commited on
Commit
7c86ca3
·
1 Parent(s): b97c143
Files changed (1) hide show
  1. app.py +145 -222
app.py CHANGED
@@ -1,10 +1,8 @@
1
- # app.py – Prüfungsrechts-Chatbot (RAG + Sprache, UI kiểu ChatGPT)
2
- #
3
- import os
4
  import gradio as gr
5
- from gradio_pdf import PDF
6
 
7
- from load_documents import load_all_documents
8
  from split_documents import split_documents
9
  from vectorstore import build_vectorstore
10
  from retriever import get_retriever
@@ -12,267 +10,192 @@ from llm import load_llm
12
  from rag_pipeline import answer
13
  from speech_io import transcribe_audio, synthesize_speech
14
 
15
- ASR_LANGUAGE_HINT = os.getenv("ASR_LANGUAGE", "auto")
16
-
17
  # =====================================================
18
- # INITIALISIERUNG (global)
19
  # =====================================================
20
 
21
- print("📚 Lade Dokumente…")
22
- docs = load_all_documents()
23
-
24
- print("🔪 Splitte Dokumente…")
25
- chunks = split_documents(docs)
26
 
27
- print("🔍 Erstelle VectorStore…")
28
- vs = build_vectorstore(chunks)
29
 
30
- print("🔎 Erzeuge Retriever…")
31
- retriever = get_retriever(vs)
32
 
33
- print("🤖 Lade LLM…")
34
- llm = load_llm()
35
 
36
- # Dokument-Metadaten für UI
37
- pdf_meta = next(d.metadata for d in docs if d.metadata.get("type") == "pdf")
38
- hg_meta = next(d.metadata for d in docs if d.metadata.get("type") == "hg")
39
- hg_url = hg_meta.get("viewer_url")
40
 
41
  # =====================================================
42
- # Quellen formatieren – Markdown für Chat
43
  # =====================================================
44
- def format_sources(src):
45
- if not src:
 
46
  return ""
47
 
48
- out = ["", "## 📚 Quellen"]
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
- for s in src:
51
- line = f"- [{s['source']}]({s['url']})"
52
- if s.get("page") is not None:
53
- line += f" (Seite {s['page']})"
54
- out.append(line)
55
 
56
- return "\n".join(out)
 
 
 
 
57
 
58
  # =====================================================
59
- # CORE CHAT-FUNKTION (Text + separates Mikro-Audio)
60
  # =====================================================
61
- def chat_fn(text_input, audio_path, history, lang_sel, auto_tts):
62
- """
63
- text_input: Textbox-Inhalt (str)
64
- audio_path: Pfad zu WAV/FLAC vom Mikro (gr.Audio, type="filepath")
65
- history: Liste von OpenAI-ähnlichen Messages (role, content)
66
- """
67
- text = (text_input or "").strip()
68
-
69
- if audio_path and not text:
70
- spoken = transcribe_audio(audio_path, language=lang_sel)
71
- text = spoken
72
 
73
- if not text:
74
- # Nichts zu tun
75
- return history, None, "", None
76
 
77
- # 2) RAG-Antwort berechnen
78
- ans, sources = answer(text, retriever, llm)
79
- bot_msg = ans + format_sources(sources)
 
 
 
 
 
80
 
81
- # 3) History aktualisieren (ChatGPT-Style)
82
  history = history + [
83
- {"role": "user", "content": text},
84
  {"role": "assistant", "content": bot_msg},
85
  ]
86
 
87
- tts_audio = synthesize_speech(bot_msg) if auto_tts else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
- # 5) Input-Felder leeren
90
- return history, tts_audio, "", None, text
91
 
92
  # =====================================================
93
- # LAST ANSWER TTS (für Button "Antwort erneut vorlesen")
94
  # =====================================================
 
95
  def read_last_answer(history):
96
  if not history:
97
  return None
98
 
99
  for msg in reversed(history):
100
- if msg.get("role") == "assistant":
101
- return synthesize_speech(msg.get("content", ""))
102
-
103
  return None
104
 
105
  # =====================================================
106
- # UI – GRADIO
107
  # =====================================================
108
- with gr.Blocks(title="Prüfungsrechts-Chatbot (RAG + Sprache)") as demo:
109
- gr.HTML(
110
- """
111
- <style>
112
- html, body {height: auto !important; overflow-y: auto !important;}
113
- .gradio-container {max-width: 960px; margin: 0 auto; padding: 12px;}
114
- * { box-sizing: border-box; }
115
- #chat-input-row, .gradio-container, .gradio-container * { max-width: 100%; }
116
- #chat-input-row > * { flex: 0 0 auto; }
117
- #chat-textbox { flex: 1 1 auto; width: 100%; }
118
- .status { font-size: 12px; color: #555; }
119
- @media (max-width: 768px) {
120
- .gradio-container {max-width: 100%; padding: 8px;}
121
- #sidebar {display: none;}
122
- #chat-input-row {flex-direction: column; gap: 6px; border-radius: 16px;}
123
- #chat-textbox textarea {min-height: 48px;}
124
- }
125
-
126
- /* ChatGPT-like Bottom Bar */
127
- #chat-input-row {
128
- align-items: center;
129
- gap: 8px;
130
- padding: 8px 12px;
131
- border: 1px solid rgba(0,0,0,0.08);
132
- border-radius: 9999px;
133
- background: var(--background-primary);
134
- box-shadow: 0 2px 6px rgba(0,0,0,0.06);
135
- }
136
-
137
- /* Textbox inside pill */
138
- #chat-textbox textarea {
139
- min-height: 42px;
140
- max-height: 120px;
141
- border: none !important;
142
- background: transparent !important;
143
- box-shadow: none !important;
144
- resize: none;
145
- padding-left: 0;
146
- }
147
-
148
- /* Icon buttons (plus, mic, send) */
149
- .icon-btn, .compact-btn {
150
- width: 40px;
151
- height: 40px;
152
- border-radius: 9999px !important;
153
- display: inline-flex;
154
- align-items: center;
155
- justify-content: center;
156
- border: 1px solid rgba(0,0,0,0.08) !important;
157
- background: #f7f7f8 !important;
158
- box-shadow: none !important;
159
- }
160
- .send-btn {
161
- background: #111 !important;
162
- color: #fff !important;
163
- border-color: #111 !important;
164
- }
165
- /* Make audio mic compact */
166
- #chat-audio {min-width: 40px;}
167
- #chat-audio .wrap, #chat-audio .audio-wrap, #chat-audio .audio-controls {
168
- max-width: 40px;
169
- }
170
- </style>
171
- """
172
- )
173
- gr.Markdown("# 🧑‍⚖️ Prüfungsrechts-Chatbot")
174
  gr.Markdown(
175
- "Dieser Chatbot beantwortet Fragen **ausschließlich** aus der "
176
- "Prüfungsordnung (PDF) und dem Hochschulgesetz NRW. "
177
- "Sie können Text eingeben oder direkt ins Mikrofon sprechen."
178
  )
179
 
180
- with gr.Column():
181
- chatbot = gr.Chatbot(label="Chat", height=420)
182
- spoken_out = gr.Textbox(label="Gesprochener Text", interactive=False)
183
- status_md = gr.Markdown("Bereit", elem_id="status")
184
-
185
- # Audio-Ausgabe (TTS)
186
- voice_out = gr.Audio(label="Vorgelesene Antwort", type="numpy", interactive=False)
187
-
188
- # Eingabezeile à la ChatGPT: Plus + Text + Mikro + Senden
189
- with gr.Row(elem_id="chat-input-row"):
190
- attach_btn = gr.UploadButton("+", file_types=["file"], file_count="multiple", elem_classes=["icon-btn"], scale=1)
191
- chat_text = gr.Textbox(
192
- elem_id="chat-textbox",
193
- label=None,
194
- placeholder="Stelle irgendeine Frage — Enter sendet, Shift+Enter neue Zeile",
195
- lines=1,
196
- max_lines=6,
197
  autofocus=True,
198
- scale=8,
199
  )
200
- chat_audio = gr.Audio(
201
- elem_id="chat-audio",
202
- label=None,
203
- sources=["microphone"],
204
- type="filepath",
205
- format="wav",
206
- interactive=True,
207
- scale=1,
208
- show_label=False,
 
 
 
 
 
 
209
  )
210
- send_btn = gr.Button("➤", elem_classes=["compact-btn", "send-btn"], scale=1)
211
- lang_dd = gr.Dropdown(choices=["auto","de","en","vi"], value="auto", label="Sprache")
212
- mode_radio = gr.Radio(choices=["Audio","Text"], value="Audio", label="Eingabemodus")
213
- record_player = gr.Audio(label="Letzte Aufnahme", type="filepath", interactive=False)
214
- stop_rec_btn = gr.Button(" Aufnahme löschen")
215
- auto_tts_chk = gr.Checkbox(label="Antwort vorlesen", value=False)
216
-
217
- chat_text.submit(
218
- chat_fn,
219
- [chat_text, chat_audio, chatbot, lang_dd, auto_tts_chk],
220
- [chatbot, voice_out, chat_text, chat_audio, spoken_out],
221
- )
222
- def transcribe_to_textbox(audio_path, lang):
223
- status = "🎙️ Aufnahme beendet – verarbeite Audio…"
224
- s = transcribe_audio(audio_path, language=lang)
225
- status = "✅ Verarbeitung abgeschlossen"
226
- return s, s, audio_path, status
227
- chat_audio.change(
228
- transcribe_to_textbox,
229
- [chat_audio, lang_dd],
230
- [chat_text, spoken_out, record_player, status_md],
231
- )
232
- send_btn.click(
233
- chat_fn,
234
- [chat_text, chat_audio, chatbot, lang_dd, auto_tts_chk],
235
- [chatbot, voice_out, chat_text, chat_audio, spoken_out],
236
- )
237
- def toggle_mode(m):
238
- status = "Audio-Modus aktiv" if m=="Audio" else "Text-Modus aktiv"
239
- return gr.update(visible=(m=="Text")), gr.update(visible=(m=="Audio")), status
240
- mode_radio.change(toggle_mode, [mode_radio], [chat_text, chat_audio, status_md])
241
- def clear_record(p):
242
- try:
243
- import os
244
- if isinstance(p, str) and os.path.exists(p):
245
- os.remove(p)
246
- except:
247
- pass
248
- return None
249
- stop_rec_btn.click(clear_record, [record_player], [record_player])
250
-
251
- # Button: Antwort erneut vorlesen
252
- read_btn = gr.Button("🔁 Antwort erneut vorlesen")
253
- read_btn.click(
254
- read_last_answer,
255
- [chatbot],
256
- [voice_out],
257
- )
258
-
259
- # Chat löschen
260
- clear_btn = gr.Button("Chat zurücksetzen")
261
- clear_btn.click(
262
- lambda: ([], None, "", None, ""),
263
- None,
264
- [chatbot, voice_out, chat_text, chat_audio, spoken_out],
265
- )
266
-
267
- # Quellen & Dokumente kompakt unterhalb
268
- with gr.Accordion("Quellen & Dokumente", open=False):
269
  gr.Markdown("### 📄 Prüfungsordnung (PDF)")
270
- PDF(pdf_meta["pdf_url"], height=250)
271
- gr.Markdown("### 📘 Hochschulgesetz NRW")
272
- if isinstance(hg_url, str) and hg_url.startswith("http"):
273
- gr.Markdown(f"[Im Viewer öffnen]({hg_url})")
274
- else:
275
- gr.Markdown("Viewer-Link nicht verfügbar.")
 
 
 
 
 
 
 
 
 
 
276
 
277
  if __name__ == "__main__":
278
  demo.queue().launch(ssr_mode=False, show_error=True)
 
1
+ # app.py – Prüfungsrechts-Chatbot mit OpenAI (Supabase RAG)
2
+
 
3
  import gradio as gr
 
4
 
5
+ from load_documents import load_documents, PDF_URL, HG_HTML_URL
6
  from split_documents import split_documents
7
  from vectorstore import build_vectorstore
8
  from retriever import get_retriever
 
10
  from rag_pipeline import answer
11
  from speech_io import transcribe_audio, synthesize_speech
12
 
 
 
13
  # =====================================================
14
+ # INITIALISIERUNG (beim Start der Space einmalig)
15
  # =====================================================
16
 
17
+ print("🔹 Lade Dokumente aus Supabase …")
18
+ _docs = load_documents()
 
 
 
19
 
20
+ print("🔹 Splitte Dokumente …")
21
+ _chunks = split_documents(_docs)
22
 
23
+ print("🔹 Baue VectorStore …")
24
+ _vs = build_vectorstore(_chunks)
25
 
26
+ print("🔹 Erzeuge Retriever …")
27
+ _retriever = get_retriever(_vs)
28
 
29
+ print("🔹 Lade OpenAI LLM …")
30
+ _llm = load_llm()
 
 
31
 
32
  # =====================================================
33
+ # Quellen formatieren – Markdown im Chat
34
  # =====================================================
35
+
36
+ def format_sources_markdown(sources):
37
+ if not sources:
38
  return ""
39
 
40
+ lines = ["", "### 📚 Quellen (verwendete Dokumentstellen):"]
41
+
42
+ for s in sources:
43
+ sid = s["id"]
44
+ src = s["source"]
45
+ page = s["page"]
46
+ url = s["url"]
47
+ snippet = s["snippet"]
48
+
49
+ if page:
50
+ title = f"Quelle {sid} – {src}, Seite {page}"
51
+ else:
52
+ title = f"Quelle {sid} – {src}"
53
 
54
+ if url:
55
+ base = f"- [{title}]({url})"
56
+ else:
57
+ base = f"- {title}"
 
58
 
59
+ lines.append(base)
60
+ if snippet:
61
+ lines.append(f" > {snippet}")
62
+
63
+ return "\n".join(lines)
64
 
65
  # =====================================================
66
+ # TEXT CHATBOT
67
  # =====================================================
 
 
 
 
 
 
 
 
 
 
 
68
 
69
+ def chatbot_text(user_message, history):
70
+ if not user_message:
71
+ return history, ""
72
 
73
+ answer_text, sources = answer(
74
+ question=user_message,
75
+ retriever=_retriever,
76
+ chat_model=_llm,
77
+ )
78
+
79
+ quellen_block = format_sources_markdown(sources)
80
+ bot_msg = answer_text + "\n\n" + quellen_block
81
 
 
82
  history = history + [
83
+ {"role": "user", "content": user_message},
84
  {"role": "assistant", "content": bot_msg},
85
  ]
86
 
87
+ return history, ""
88
+
89
+ # =====================================================
90
+ # VOICE CHATBOT
91
+ # =====================================================
92
+
93
+ def chatbot_voice(audio_path, history):
94
+ text = transcribe_audio(audio_path)
95
+ if not text:
96
+ return history, None, ""
97
+
98
+ history = history + [{"role": "user", "content": text}]
99
+
100
+ answer_text, sources = answer(
101
+ question=text,
102
+ retriever=_retriever,
103
+ chat_model=_llm,
104
+ )
105
+
106
+ quellen_block = format_sources_markdown(sources)
107
+ bot_msg = answer_text + "\n\n" + quellen_block
108
+
109
+ history = history + [{"role": "assistant", "content": bot_msg}]
110
+
111
+ audio = synthesize_speech(bot_msg)
112
 
113
+ return history, audio, ""
 
114
 
115
  # =====================================================
116
+ # Wieder-Vorlesen der letzten Antwort
117
  # =====================================================
118
+
119
  def read_last_answer(history):
120
  if not history:
121
  return None
122
 
123
  for msg in reversed(history):
124
+ if msg["role"] == "assistant":
125
+ return synthesize_speech(msg["content"])
 
126
  return None
127
 
128
  # =====================================================
129
+ # UI (Gradio)
130
  # =====================================================
131
+
132
+ with gr.Blocks(title="Prüfungsrechts-Chatbot (Supabase + OpenAI)") as demo:
133
+
134
+ gr.Markdown("# 🧑‍⚖️ Prüfungsrechts-Chatbot (Supabase RAG + OpenAI)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  gr.Markdown(
136
+ "Fragen zum Prüfungsrecht (Prüfungsordnung + Hochschulgesetz NRW). "
137
+ "Antworten mit Quellenangabe und Direktlinks."
 
138
  )
139
 
140
+ with gr.Row():
141
+
142
+ # ---------- LINKER BEREICH: CHAT ----------
143
+ with gr.Column(scale=2):
144
+
145
+ chatbot = gr.Chatbot(
146
+ type="messages",
147
+ label="Chat",
148
+ height=550,
149
+ )
150
+
151
+ msg = gr.Textbox(
152
+ label="Frage eingeben",
153
+ placeholder="Stelle deine Frage zum Prüfungsrecht …",
 
 
 
154
  autofocus=True,
 
155
  )
156
+ msg.submit(chatbot_text, [msg, chatbot], [chatbot, msg])
157
+
158
+ send_btn = gr.Button("Senden (Text)")
159
+ send_btn.click(chatbot_text, [msg, chatbot], [chatbot, msg])
160
+
161
+ gr.Markdown("### 🎙️ Spracheingabe")
162
+
163
+ voice_in = gr.Audio(sources=["microphone"], type="filepath")
164
+ voice_out = gr.Audio(label="Vorgelesene Antwort", type="numpy")
165
+
166
+ send_voice_btn = gr.Button("Sprechen & Senden")
167
+ send_voice_btn.click(
168
+ chatbot_voice,
169
+ [voice_in, chatbot],
170
+ [chatbot, voice_out, msg],
171
  )
172
+
173
+ read_btn = gr.Button("Antwort erneut vorlesen")
174
+ read_btn.click(read_last_answer, [chatbot], [voice_out])
175
+
176
+ clear_btn = gr.Button("Chat löschen")
177
+ clear_btn.click(lambda: [], None, chatbot)
178
+
179
+ # ---------- RECHTER BEREICH: VIEWER ----------
180
+ with gr.Column(scale=1):
181
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  gr.Markdown("### 📄 Prüfungsordnung (PDF)")
183
+ gr.HTML(
184
+ f"""
185
+ <iframe src="{PDF_URL}"
186
+ style="width:100%; height:330px; border:none;">
187
+ </iframe>
188
+ """
189
+ )
190
+
191
+ gr.Markdown("### 📘 Hochschulgesetz NRW (Paragraph-Viewer)")
192
+ gr.HTML(
193
+ f"""
194
+ <iframe src="{HG_HTML_URL}"
195
+ style="width:100%; height:330px; border:none;">
196
+ </iframe>
197
+ """
198
+ )
199
 
200
  if __name__ == "__main__":
201
  demo.queue().launch(ssr_mode=False, show_error=True)