Nguyen5 commited on
Commit
c151194
·
1 Parent(s): 0fff3f2
Files changed (1) hide show
  1. app.py +71 -59
app.py CHANGED
@@ -1,44 +1,52 @@
1
- # app.py – Prüfungsrechts-Chatbot mit OpenAI (Supabase RAG)
 
 
2
 
3
  import gradio as gr
 
 
4
 
5
- from load_documents import load_documents, PDF_URL, HG_HTML_URL
6
  from split_documents import split_documents
7
  from vectorstore import build_vectorstore
8
  from retriever import get_retriever
9
  from llm import load_llm
10
- from rag_pipeline import answer
 
11
  from speech_io import transcribe_audio, synthesize_speech
12
 
13
  # =====================================================
14
- # INITIALISIERUNG (beim Start der Space einmalig)
15
  # =====================================================
16
 
17
- print("🔹 Lade Dokumente aus Supabase …")
18
  _docs = load_documents()
19
 
20
- print("🔹 Splitte Dokumente ")
21
  _chunks = split_documents(_docs)
22
 
23
- print("🔹 Baue VectorStore ")
24
  _vs = build_vectorstore(_chunks)
25
 
26
- print("🔹 Erzeuge Retriever ")
27
  _retriever = get_retriever(_vs)
28
 
29
- print("🔹 Lade OpenAI LLM ")
30
  _llm = load_llm()
31
 
 
 
 
 
32
  # =====================================================
33
- # Quellen formatieren – Markdown im Chat
34
  # =====================================================
35
 
36
  def format_sources_markdown(sources):
37
  if not sources:
38
  return ""
39
 
40
- lines = ["", "### 📚 Quellen (verwendete Dokumentstellen):"]
41
-
42
  for s in sources:
43
  sid = s["id"]
44
  src = s["source"]
@@ -46,17 +54,18 @@ def format_sources_markdown(sources):
46
  url = s["url"]
47
  snippet = s["snippet"]
48
 
49
- if page:
50
- title = f"Quelle {sid} – {src}, Seite {page}"
51
- else:
52
- title = f"Quelle {sid} – {src}"
53
 
54
  if url:
55
  base = f"- [{title}]({url})"
56
  else:
57
  base = f"- {title}"
58
 
 
 
 
59
  lines.append(base)
 
60
  if snippet:
61
  lines.append(f" > {snippet}")
62
 
@@ -77,11 +86,10 @@ def chatbot_text(user_message, history):
77
  )
78
 
79
  quellen_block = format_sources_markdown(sources)
80
- bot_msg = answer_text + "\n\n" + quellen_block
81
 
82
  history = history + [
83
  {"role": "user", "content": user_message},
84
- {"role": "assistant", "content": bot_msg},
85
  ]
86
 
87
  return history, ""
@@ -91,29 +99,32 @@ def chatbot_text(user_message, history):
91
  # =====================================================
92
 
93
  def chatbot_voice(audio_path, history):
 
94
  text = transcribe_audio(audio_path)
95
  if not text:
96
  return history, None, ""
97
 
 
98
  history = history + [{"role": "user", "content": text}]
99
 
 
100
  answer_text, sources = answer(
101
  question=text,
102
  retriever=_retriever,
103
  chat_model=_llm,
104
  )
105
-
106
  quellen_block = format_sources_markdown(sources)
107
- bot_msg = answer_text + "\n\n" + quellen_block
108
 
 
109
  history = history + [{"role": "assistant", "content": bot_msg}]
110
 
 
111
  audio = synthesize_speech(bot_msg)
112
 
113
  return history, audio, ""
114
 
115
  # =====================================================
116
- # Wieder-Vorlesen der letzten Antwort
117
  # =====================================================
118
 
119
  def read_last_answer(history):
@@ -123,78 +134,79 @@ def read_last_answer(history):
123
  for msg in reversed(history):
124
  if msg["role"] == "assistant":
125
  return synthesize_speech(msg["content"])
 
126
  return None
127
 
128
  # =====================================================
129
- # UI (Gradio)
130
  # =====================================================
131
 
132
- with gr.Blocks(title="Prüfungsrechts-Chatbot (Supabase + OpenAI)") as demo:
133
-
134
- gr.Markdown("# 🧑‍⚖️ Prüfungsrechts-Chatbot (Supabase RAG + OpenAI)")
135
  gr.Markdown(
136
- "Fragen zum Prüfungsrecht (Prüfungsordnung + Hochschulgesetz NRW). "
137
- "Antworten mit Quellenangabe und Direktlinks."
 
138
  )
139
 
140
  with gr.Row():
141
-
142
- # ---------- LINKER BEREICH: CHAT ----------
143
  with gr.Column(scale=2):
144
-
145
- chatbot = gr.Chatbot(
146
- label="Chat",
147
- height=550,
148
- )
149
 
150
  msg = gr.Textbox(
151
  label="Frage eingeben",
152
  placeholder="Stelle deine Frage zum Prüfungsrecht …",
153
- autofocus=True,
154
  )
155
- msg.submit(chatbot_text, [msg, chatbot], [chatbot, msg])
 
 
 
 
 
 
156
 
157
  send_btn = gr.Button("Senden (Text)")
158
- send_btn.click(chatbot_text, [msg, chatbot], [chatbot, msg])
 
 
 
 
159
 
 
160
  gr.Markdown("### 🎙️ Spracheingabe")
161
-
162
  voice_in = gr.Audio(sources=["microphone"], type="filepath")
163
  voice_out = gr.Audio(label="Vorgelesene Antwort", type="numpy")
164
 
165
- send_voice_btn = gr.Button("Sprechen & Senden")
166
- send_voice_btn.click(
167
  chatbot_voice,
168
  [voice_in, chatbot],
169
- [chatbot, voice_out, msg],
170
  )
171
 
172
- read_btn = gr.Button("Antwort erneut vorlesen")
173
- read_btn.click(read_last_answer, [chatbot], [voice_out])
 
 
 
 
174
 
175
- clear_btn = gr.Button("Chat löschen")
176
  clear_btn.click(lambda: [], None, chatbot)
177
 
178
- # ---------- RECHTER BEREICH: VIEWER ----------
179
- with gr.Column(scale=1):
 
180
 
 
181
  gr.Markdown("### 📄 Prüfungsordnung (PDF)")
182
- gr.HTML(
183
- f"""
184
- <iframe src="{PDF_URL}"
185
- style="width:100%; height:330px; border:none;">
186
- </iframe>
187
- """
188
- )
189
 
190
- gr.Markdown("### 📘 Hochschulgesetz NRW (Paragraph-Viewer)")
191
  gr.HTML(
192
- f"""
193
- <iframe src="{HG_HTML_URL}"
194
- style="width:100%; height:330px; border:none;">
195
- </iframe>
196
- """
197
  )
198
 
 
199
  if __name__ == "__main__":
200
  demo.queue().launch(ssr_mode=False, show_error=True)
 
1
+ - app.py:
2
+ # app.py – Prüfungsrechts-Chatbot (RAG + Sprachmodus)
3
+ # Version 26.11 – ohne Modi, stabil für Text + Voice
4
 
5
  import gradio as gr
6
+ from gradio_pdf import PDF
7
+ from huggingface_hub import hf_hub_download
8
 
9
+ from load_documents import load_documents, DATASET, PDF_FILE, HTML_FILE
10
  from split_documents import split_documents
11
  from vectorstore import build_vectorstore
12
  from retriever import get_retriever
13
  from llm import load_llm
14
+ from rag_pipeline import answer, PDF_BASE_URL, LAW_URL
15
+
16
  from speech_io import transcribe_audio, synthesize_speech
17
 
18
  # =====================================================
19
+ # INITIALISIERUNG (global)
20
  # =====================================================
21
 
22
+ print("🔹 Lade Dokumente ...")
23
  _docs = load_documents()
24
 
25
+ print("🔹 Splitte Dokumente ...")
26
  _chunks = split_documents(_docs)
27
 
28
+ print("🔹 Baue VectorStore (FAISS) ...")
29
  _vs = build_vectorstore(_chunks)
30
 
31
+ print("🔹 Erzeuge Retriever ...")
32
  _retriever = get_retriever(_vs)
33
 
34
+ print("🔹 Lade LLM ...")
35
  _llm = load_llm()
36
 
37
+ print("🔹 Lade Dateien für Viewer …")
38
+ _pdf_path = hf_hub_download(DATASET, PDF_FILE, repo_type="dataset")
39
+ _html_path = hf_hub_download(DATASET, HTML_FILE, repo_type="dataset")
40
+
41
  # =====================================================
42
+ # Quellen formatieren – Markdown für Chat
43
  # =====================================================
44
 
45
  def format_sources_markdown(sources):
46
  if not sources:
47
  return ""
48
 
49
+ lines = ["", "**📚 Quellen (genutzte Dokumentstellen):**"]
 
50
  for s in sources:
51
  sid = s["id"]
52
  src = s["source"]
 
54
  url = s["url"]
55
  snippet = s["snippet"]
56
 
57
+ title = f"Quelle {sid} – {src}"
 
 
 
58
 
59
  if url:
60
  base = f"- [{title}]({url})"
61
  else:
62
  base = f"- {title}"
63
 
64
+ if page and "Prüfungsordnung" in src:
65
+ base += f", Seite {page}"
66
+
67
  lines.append(base)
68
+
69
  if snippet:
70
  lines.append(f" > {snippet}")
71
 
 
86
  )
87
 
88
  quellen_block = format_sources_markdown(sources)
 
89
 
90
  history = history + [
91
  {"role": "user", "content": user_message},
92
+ {"role": "assistant", "content": answer_text + quellen_block},
93
  ]
94
 
95
  return history, ""
 
99
  # =====================================================
100
 
101
  def chatbot_voice(audio_path, history):
102
+ # 1. Speech → Text
103
  text = transcribe_audio(audio_path)
104
  if not text:
105
  return history, None, ""
106
 
107
+ # Lưu vào lịch sử chat
108
  history = history + [{"role": "user", "content": text}]
109
 
110
+ # 2. RAG trả lời
111
  answer_text, sources = answer(
112
  question=text,
113
  retriever=_retriever,
114
  chat_model=_llm,
115
  )
 
116
  quellen_block = format_sources_markdown(sources)
 
117
 
118
+ bot_msg = answer_text + quellen_block
119
  history = history + [{"role": "assistant", "content": bot_msg}]
120
 
121
+ # 3. Text → Speech
122
  audio = synthesize_speech(bot_msg)
123
 
124
  return history, audio, ""
125
 
126
  # =====================================================
127
+ # LAST ANSWER TTS
128
  # =====================================================
129
 
130
  def read_last_answer(history):
 
134
  for msg in reversed(history):
135
  if msg["role"] == "assistant":
136
  return synthesize_speech(msg["content"])
137
+
138
  return None
139
 
140
  # =====================================================
141
+ # UI – GRADIO
142
  # =====================================================
143
 
144
+ with gr.Blocks(title="Prüfungsrechts-Chatbot (RAG + Sprache)") as demo:
145
+ gr.Markdown("# 🧑‍⚖️ Prüfungsrechts-Chatbot")
 
146
  gr.Markdown(
147
+ "Dieser Chatbot beantwortet Fragen **ausschließlich** aus der "
148
+ "Prüfungsordnung (PDF) und dem Hochschulgesetz NRW (Website). "
149
+ "Du kannst Text eingeben oder direkt ins Mikrofon sprechen."
150
  )
151
 
152
  with gr.Row():
 
 
153
  with gr.Column(scale=2):
154
+ chatbot = gr.Chatbot(type="messages", label="Chat", height=500)
 
 
 
 
155
 
156
  msg = gr.Textbox(
157
  label="Frage eingeben",
158
  placeholder="Stelle deine Frage zum Prüfungsrecht …",
 
159
  )
160
+
161
+ # TEXT SENDEN
162
+ msg.submit(
163
+ chatbot_text,
164
+ [msg, chatbot],
165
+ [chatbot, msg]
166
+ )
167
 
168
  send_btn = gr.Button("Senden (Text)")
169
+ send_btn.click(
170
+ chatbot_text,
171
+ [msg, chatbot],
172
+ [chatbot, msg]
173
+ )
174
 
175
+ # SPRACHEINGABE
176
  gr.Markdown("### 🎙️ Spracheingabe")
 
177
  voice_in = gr.Audio(sources=["microphone"], type="filepath")
178
  voice_out = gr.Audio(label="Vorgelesene Antwort", type="numpy")
179
 
180
+ voice_btn = gr.Button("Sprechen & senden")
181
+ voice_btn.click(
182
  chatbot_voice,
183
  [voice_in, chatbot],
184
+ [chatbot, voice_out, msg]
185
  )
186
 
187
+ read_btn = gr.Button("🔁 Antwort erneut vorlesen")
188
+ read_btn.click(
189
+ read_last_answer,
190
+ [chatbot],
191
+ [voice_out]
192
+ )
193
 
194
+ clear_btn = gr.Button("Chat zurücksetzen")
195
  clear_btn.click(lambda: [], None, chatbot)
196
 
197
+ # =====================
198
+ # RECHTE SPALTE: Viewer
199
+ # =====================
200
 
201
+ with gr.Column(scale=1):
202
  gr.Markdown("### 📄 Prüfungsordnung (PDF)")
203
+ PDF(_pdf_path, height=350)
 
 
 
 
 
 
204
 
205
+ gr.Markdown("### 📘 Hochschulgesetz NRW (Website)")
206
  gr.HTML(
207
+ f'<iframe src="{LAW_URL}" style="width:100%;height:350px;border:none;"></iframe>'
 
 
 
 
208
  )
209
 
210
+
211
  if __name__ == "__main__":
212
  demo.queue().launch(ssr_mode=False, show_error=True)