Nguyen5 commited on
Commit
9534de3
·
1 Parent(s): 160e79a
Files changed (3) hide show
  1. app.py +85 -64
  2. ingest.py +49 -21
  3. rag_pipeline.py +34 -51
app.py CHANGED
@@ -1,133 +1,154 @@
1
- # app.py
2
  import os
3
- import base64
4
  import gradio as gr
5
  from openai import OpenAI
6
 
7
- from supabase_client import load_file_bytes
8
  from rag_pipeline import rag_answer
9
 
10
  client = OpenAI()
11
  BUCKET = os.environ["SUPABASE_BUCKET"]
12
 
13
- # ------------------------------------------
14
- # Public URLs để mở PDF/HTML khi nhấn Quelle
15
- # ------------------------------------------
16
- PDF_URL = f"{os.environ['SUPABASE_URL']}/storage/v1/object/public/{BUCKET}/pruefungsordnung.pdf"
17
- HG_URL = f"{os.environ['SUPABASE_URL']}/storage/v1/object/public/{BUCKET}/hochschulgesetz.html"
18
 
 
 
 
 
 
 
 
19
 
20
- # ------------------------------------------
21
- # Viewer PDF base64
22
- # ------------------------------------------
23
- def encode_pdf_src():
24
- pdf_bytes = load_file_bytes(BUCKET, "pruefungsordnung.pdf")
25
- b64 = base64.b64encode(pdf_bytes).decode("utf-8")
26
- return f"data:application/pdf;base64,{b64}"
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- # ------------------------------------------
30
- # HTML viewer
31
- # ------------------------------------------
32
- def encode_html():
33
- html_bytes = load_file_bytes(BUCKET, "hochschulgesetz.html")
34
- return html_bytes.decode("utf-8", errors="ignore")
35
 
 
36
 
37
- # ------------------------------------------
38
- # Speech-to-text FIXED
39
- # ------------------------------------------
40
- def transcribe(audio_path):
41
- if audio_path is None:
 
42
  return ""
43
  with open(audio_path, "rb") as f:
44
  result = client.audio.transcriptions.create(
45
  model="whisper-1",
46
  file=f,
47
- language="de", # ép tiếng Đức
48
- temperature=0.0 # ổn định kết quả
49
  )
50
  return (result.text or "").strip()
51
 
52
 
53
- # ------------------------------------------
54
- # MAIN CHAT FUNCTION
55
- # ------------------------------------------
56
  def chat_fn(text, audio, history):
57
  text = (text or "").strip()
58
 
59
- # 1) Ưu tiên text, không dùng audio nếu text có
60
  if text:
61
  question = text
62
  elif audio is not None:
63
  question = transcribe(audio)
64
  else:
65
- return history, "<p>Bitte Text oder Mikrofon benutzen.</p>", None
66
 
67
  if not question:
68
- return history, "<p>Spracherkennung fehlgeschlagen.</p>", None
69
 
70
- # 2) RAG
71
  answer, docs = rag_answer(question, history or [])
72
 
73
- # 3) Build Quellen (click được)
74
  html = "<ol>"
75
  for i, d in enumerate(docs):
76
- meta = d.get("metadata", {}) or {}
77
  src = meta.get("source", "?")
78
-
79
- if "Prüfungsordnung" in src:
80
- link = PDF_URL
81
- else:
82
- link = HG_URL
83
-
84
  page = meta.get("page", None)
85
  page_info = f"(Seite {page})" if page else ""
86
-
87
  snippet = (d.get("content") or "")[:200]
88
 
89
- html += f"""
90
- <li>
91
- <a href="{link}" target="_blank">
92
- <b>Quelle {i+1}: {src} {page_info}</b>
93
- </a><br>
94
- {snippet}...
95
- </li>
96
- """
 
 
 
 
 
97
  html += "</ol>"
98
 
99
- # 4) Gradio message history
100
  new_history = (history or []) + [
101
  {"role": "user", "content": question},
102
  {"role": "assistant", "content": answer},
103
  ]
104
 
105
- # Reset audio input
106
  return new_history, html, gr.update(value=None)
107
 
108
 
109
- # ------------------------------------------
110
- # UI LAYOUT
111
- # ------------------------------------------
112
  with gr.Blocks() as demo:
113
  gr.Markdown("# ⚖️ Sprachbasierter Chatbot für Prüfungsrecht")
114
 
115
  with gr.Row():
116
  with gr.Column(scale=3):
117
- chatbot = gr.Chatbot(label="Chat (RAG)")
118
- text_input = gr.Textbox(label="Text Eingabe")
119
- audio_input = gr.Audio(type="filepath", label="Spracheingabe (Mikrofon)")
 
 
 
 
 
 
120
  send_btn = gr.Button("Senden")
121
 
122
  with gr.Column(scale=2):
123
- gr.Markdown("### 📄 Prüfungsordnung PDF")
124
  gr.HTML(
125
- f"<iframe src='{encode_pdf_src()}' width='100%' height='250'></iframe>"
 
126
  )
127
 
128
- gr.Markdown("### 📜 Hochschulgesetz NRW")
129
  gr.HTML(
130
- f"<div style='overflow:auto;height:250px;'>{encode_html()}</div>"
 
131
  )
132
 
133
  sources_html = gr.HTML()
 
1
+ # app.py — UI mit klickbaren Quellen & Voice-Eingabe
2
  import os
 
3
  import gradio as gr
4
  from openai import OpenAI
5
 
6
+ from supabase_client import supabase
7
  from rag_pipeline import rag_answer
8
 
9
  client = OpenAI()
10
  BUCKET = os.environ["SUPABASE_BUCKET"]
11
 
 
 
 
 
 
12
 
13
+ # --------------------------------------------------------
14
+ # Viewer HTML aus Supabase-Dokumenten bauen
15
+ # --------------------------------------------------------
16
+ def build_viewer_html():
17
+ """Baut HTML-Viewer aus Tabelle documents mit anchor_id."""
18
+ resp = supabase.table("documents").select("content, metadata").limit(2000).execute()
19
+ data = resp.data or []
20
 
21
+ po_blocks = []
22
+ hg_blocks = []
 
 
 
 
 
23
 
24
+ for row in data:
25
+ content = row.get("content") or ""
26
+ meta = row.get("metadata") or {}
27
+ src = meta.get("source", "")
28
+ anchor_id = meta.get("anchor_id")
29
+ page = meta.get("page", None)
30
+ page_info = f"(Seite {page})" if page else ""
31
+
32
+ block_html = (
33
+ f"<div id='{anchor_id}' style='margin-bottom: 1rem;'>"
34
+ f"<b>{src} {page_info}</b><br>{content}</div>"
35
+ )
36
+
37
+ if "Prüfungsordnung" in src:
38
+ po_blocks.append(block_html)
39
+ elif "Hochschulgesetz" in src:
40
+ hg_blocks.append(block_html)
41
+
42
+ po_html = "<h3>Prüfungsordnung</h3>" + "".join(po_blocks)
43
+ hg_html = "<h3>Hochschulgesetz NRW</h3>" + "".join(hg_blocks)
44
+
45
+ return po_html, hg_html
46
 
 
 
 
 
 
 
47
 
48
+ PO_HTML, HG_HTML = build_viewer_html()
49
 
50
+
51
+ # --------------------------------------------------------
52
+ # Speech-to-Text (Whisper, DE)
53
+ # --------------------------------------------------------
54
+ def transcribe(audio_path: str) -> str:
55
+ if not audio_path:
56
  return ""
57
  with open(audio_path, "rb") as f:
58
  result = client.audio.transcriptions.create(
59
  model="whisper-1",
60
  file=f,
61
+ language="de",
62
+ temperature=0.0,
63
  )
64
  return (result.text or "").strip()
65
 
66
 
67
+ # --------------------------------------------------------
68
+ # Chat-Funktion
69
+ # --------------------------------------------------------
70
  def chat_fn(text, audio, history):
71
  text = (text or "").strip()
72
 
73
+ # 1) Priorität: Text. Nur wenn kein Text Audio
74
  if text:
75
  question = text
76
  elif audio is not None:
77
  question = transcribe(audio)
78
  else:
79
+ return history, "<p>Bitte Text eingeben oder Mikrofon benutzen.</p>", None
80
 
81
  if not question:
82
+ return history, "<p>Spracherkennung fehlgeschlagen. Bitte erneut sprechen.</p>", None
83
 
84
+ # 2) RAG-Antwort
85
  answer, docs = rag_answer(question, history or [])
86
 
87
+ # 3) Quellen-HTML mit klickbaren Anchors
88
  html = "<ol>"
89
  for i, d in enumerate(docs):
90
+ meta = d.get("metadata") or {}
91
  src = meta.get("source", "?")
 
 
 
 
 
 
92
  page = meta.get("page", None)
93
  page_info = f"(Seite {page})" if page else ""
94
+ anchor_id = meta.get("anchor_id")
95
  snippet = (d.get("content") or "")[:200]
96
 
97
+ if anchor_id:
98
+ link = f"#{anchor_id}"
99
+ html += (
100
+ f"<li>"
101
+ f"<a href='{link}'><b>Quelle {i+1}: {src} {page_info}</b></a><br>"
102
+ f"{snippet}..."
103
+ f"</li>"
104
+ )
105
+ else:
106
+ html += (
107
+ f"<li><b>Quelle {i+1}: {src} {page_info}</b><br>"
108
+ f"{snippet}...</li>"
109
+ )
110
  html += "</ol>"
111
 
112
+ # 4) History im messages-Format (für Gradio)
113
  new_history = (history or []) + [
114
  {"role": "user", "content": question},
115
  {"role": "assistant", "content": answer},
116
  ]
117
 
118
+ # Reset Audio nach dem Senden
119
  return new_history, html, gr.update(value=None)
120
 
121
 
122
+ # --------------------------------------------------------
123
+ # UI Layout
124
+ # --------------------------------------------------------
125
  with gr.Blocks() as demo:
126
  gr.Markdown("# ⚖️ Sprachbasierter Chatbot für Prüfungsrecht")
127
 
128
  with gr.Row():
129
  with gr.Column(scale=3):
130
+ chatbot = gr.Chatbot(label="Chat (Prüfungsrecht)")
131
+ text_input = gr.Textbox(
132
+ label="Text-Eingabe",
133
+ placeholder="Frage hier eintippen ..."
134
+ )
135
+ audio_input = gr.Audio(
136
+ type="filepath",
137
+ label="Spracheingabe (Mikrofon)"
138
+ )
139
  send_btn = gr.Button("Senden")
140
 
141
  with gr.Column(scale=2):
142
+ gr.Markdown("### 📄 Prüfungsordnung (mit Ankern)")
143
  gr.HTML(
144
+ f"<div style='overflow:auto; height:250px; "
145
+ f"border:1px solid #ccc; padding:10px;'>{PO_HTML}</div>"
146
  )
147
 
148
+ gr.Markdown("### 📜 Hochschulgesetz NRW (mit Ankern)")
149
  gr.HTML(
150
+ f"<div style='overflow:auto; height:250px; "
151
+ f"border:1px solid #ccc; padding:10px;'>{HG_HTML}</div>"
152
  )
153
 
154
  sources_html = gr.HTML()
ingest.py CHANGED
@@ -1,4 +1,4 @@
1
- # ingest.py
2
  import os
3
  from io import BytesIO
4
  from bs4 import BeautifulSoup
@@ -6,13 +6,14 @@ from pypdf import PdfReader
6
 
7
  from supabase_client import supabase, load_file_bytes
8
  from langchain_openai import OpenAIEmbeddings
9
- from langchain_community.vectorstores import SupabaseVectorStore
10
  from langchain_core.documents import Document
11
  from langchain_text_splitters import RecursiveCharacterTextSplitter
12
 
13
  BUCKET = os.environ["SUPABASE_BUCKET"]
14
 
 
15
  def load_pdf_docs():
 
16
  pdf_bytes = load_file_bytes(BUCKET, "pruefungsordnung.pdf")
17
  reader = PdfReader(BytesIO(pdf_bytes))
18
 
@@ -22,12 +23,17 @@ def load_pdf_docs():
22
  docs.append(
23
  Document(
24
  page_content=text,
25
- metadata={"source": "Prüfungsordnung", "page": i + 1},
 
 
 
26
  )
27
  )
28
  return docs
29
 
 
30
  def load_html_docs():
 
31
  html_bytes = load_file_bytes(BUCKET, "hochschulgesetz.html")
32
  html_str = html_bytes.decode("utf-8", errors="ignore")
33
  soup = BeautifulSoup(html_str, "html.parser")
@@ -40,34 +46,56 @@ def load_html_docs():
40
  )
41
  ]
42
 
 
43
  def chunk_docs(docs):
 
44
  splitter = RecursiveCharacterTextSplitter(
45
- chunk_size=1000,
46
- chunk_overlap=150
47
  )
48
  return splitter.split_documents(docs)
49
 
50
- def main():
 
 
51
  pdf_docs = load_pdf_docs()
52
- html_docs = load_html_docs()
53
- all_docs = pdf_docs + html_docs
54
 
 
55
  chunks = chunk_docs(all_docs)
 
56
 
57
- embeddings = OpenAIEmbeddings(
58
- model="text-embedding-3-small"
59
- )
 
 
 
 
 
 
 
 
60
 
61
- SupabaseVectorStore.from_documents(
62
- chunks,
63
- embeddings,
64
- client=supabase,
65
- table_name="documents",
66
- query_name="match_documents",
67
- chunk_size=200,
68
- )
 
 
 
 
 
 
 
 
 
69
 
70
- print("Ingest OK (no local files).")
71
 
72
  if __name__ == "__main__":
73
- main()
 
1
+ # ingest.py — Ingest mit anchor_id für jeden Absatz
2
  import os
3
  from io import BytesIO
4
  from bs4 import BeautifulSoup
 
6
 
7
  from supabase_client import supabase, load_file_bytes
8
  from langchain_openai import OpenAIEmbeddings
 
9
  from langchain_core.documents import Document
10
  from langchain_text_splitters import RecursiveCharacterTextSplitter
11
 
12
  BUCKET = os.environ["SUPABASE_BUCKET"]
13
 
14
+
15
  def load_pdf_docs():
16
+ """Lädt Prüfungsordnung.pdf aus Supabase (in-memory) und erzeugt pro Seite ein Document."""
17
  pdf_bytes = load_file_bytes(BUCKET, "pruefungsordnung.pdf")
18
  reader = PdfReader(BytesIO(pdf_bytes))
19
 
 
23
  docs.append(
24
  Document(
25
  page_content=text,
26
+ metadata={
27
+ "source": "Prüfungsordnung",
28
+ "page": i + 1,
29
+ },
30
  )
31
  )
32
  return docs
33
 
34
+
35
  def load_html_docs():
36
+ """Lädt hochschulgesetz.html aus Supabase und extrahiert reinen Text."""
37
  html_bytes = load_file_bytes(BUCKET, "hochschulgesetz.html")
38
  html_str = html_bytes.decode("utf-8", errors="ignore")
39
  soup = BeautifulSoup(html_str, "html.parser")
 
46
  )
47
  ]
48
 
49
+
50
  def chunk_docs(docs):
51
+ """Chunking in sinnvolle Absätze."""
52
  splitter = RecursiveCharacterTextSplitter(
53
+ chunk_size=800,
54
+ chunk_overlap=150,
55
  )
56
  return splitter.split_documents(docs)
57
 
58
+
59
+ def ingest():
60
+ print("📥 Lade Dokumente aus Supabase...")
61
  pdf_docs = load_pdf_docs()
62
+ hg_docs = load_html_docs()
63
+ all_docs = pdf_docs + hg_docs
64
 
65
+ print(f"📄 Rohdokumente geladen: {len(all_docs)}")
66
  chunks = chunk_docs(all_docs)
67
+ print(f"✂️ Zu Chunks gesplittet: {len(chunks)}")
68
 
69
+ # anchor_id vergeben
70
+ po_idx = 1
71
+ hg_idx = 1
72
+ for d in chunks:
73
+ src = d.metadata.get("source", "")
74
+ if "Prüfungsordnung" in src:
75
+ d.metadata["anchor_id"] = f"po_{po_idx}"
76
+ po_idx += 1
77
+ elif "Hochschulgesetz" in src:
78
+ d.metadata["anchor_id"] = f"hg_{hg_idx}"
79
+ hg_idx += 1
80
 
81
+ embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
82
+
83
+ print("🧠 Erzeuge Embeddings & schreibe nach Supabase (Tabelle documents)...")
84
+ for i, d in enumerate(chunks):
85
+ emb = embeddings.embed_query(d.page_content)
86
+ supabase.table("documents").insert(
87
+ {
88
+ "content": d.page_content,
89
+ "metadata": d.metadata,
90
+ "embedding": emb,
91
+ }
92
+ ).execute()
93
+
94
+ if (i + 1) % 50 == 0:
95
+ print(f" → {i+1}/{len(chunks)} Chunks gespeichert")
96
+
97
+ print("✅ Ingest abgeschlossen – Dokumente mit anchor_id in Supabase gespeichert.")
98
 
 
99
 
100
  if __name__ == "__main__":
101
+ ingest()
rag_pipeline.py CHANGED
@@ -1,4 +1,4 @@
1
- # rag_pipeline.py
2
  import os
3
  from datetime import date
4
  from typing import Any, List
@@ -11,28 +11,19 @@ client = OpenAI()
11
  embedder = OpenAIEmbeddings(model="text-embedding-3-small")
12
 
13
 
14
- # --------------------------------------------------------
15
- # Lấy tài liệu liên quan từ Supabase bằng RPC match_documents
16
- # --------------------------------------------------------
17
- def get_relevant_docs(query: str, k: int = 4) -> List[dict]:
18
  embedding = embedder.embed_query(query)
19
-
20
  resp = supabase.rpc(
21
  "match_documents",
22
- {
23
- "query_embedding": embedding,
24
- "filter": {}, # hiện tại không filter thêm
25
- },
26
  ).execute()
27
-
28
  data = resp.data or []
29
  return data[:k]
30
 
31
 
32
- # --------------------------------------------------------
33
- # Lưu lịch sử vào bảng chat_history
34
- # --------------------------------------------------------
35
  def save_message(role: str, message: str) -> None:
 
36
  today = date.today().isoformat()
37
  supabase.table("chat_history").insert(
38
  {
@@ -43,66 +34,59 @@ def save_message(role: str, message: str) -> None:
43
  ).execute()
44
 
45
 
46
- # --------------------------------------------------------
47
- # Chuyển history (list tuple / dict) thành text
48
- # --------------------------------------------------------
49
  def format_history(history: Any) -> str:
 
50
  if not history:
51
  return ""
52
- text = ""
53
  for turn in history:
54
- # tuple / list: (user, assistant)
55
- if isinstance(turn, (list, tuple)) and len(turn) >= 2:
56
- user_msg = str(turn[0])
57
- bot_msg = str(turn[1])
58
- text += f"User: {user_msg}\nAssistant: {bot_msg}\n"
59
- # dict kiểu {"role": "...", "content": "..."} – phòng khi Gradio thay đổi
60
- elif isinstance(turn, dict) and "role" in turn and "content" in turn:
61
- role = turn["role"]
62
- content = str(turn["content"])
63
- if role == "user":
64
- text += f"User: {content}\n"
65
- elif role == "assistant":
66
- text += f"Assistant: {content}\n"
67
- # các format khác bỏ qua
68
- return text
69
-
70
-
71
- # --------------------------------------------------------
72
- # Hàm RAG chính
73
- # --------------------------------------------------------
74
  def rag_answer(question: str, history: Any):
75
- # 1) Lấy tài liệu từ vectorstore Supabase
76
  docs = get_relevant_docs(question)
77
 
78
- # 2) Build context string
79
  context_parts = []
80
  for i, d in enumerate(docs):
81
  meta = d.get("metadata") or {}
82
  src = meta.get("source", "Quelle")
83
- page = meta.get("page", None)
84
- page_info = f"(Seite {page})" if page is not None else ""
85
  text = d.get("content") or ""
86
- context_parts.append(f"[Quelle {i+1}] {src} {page_info}\n{text}")
 
 
87
  context = "\n\n".join(context_parts) if context_parts else "Keine relevanten Dokumente gefunden."
88
 
89
- # 3) History text
90
  history_text = format_history(history)
91
 
92
- # 4) System + User prompt
93
  system_prompt = (
94
- "Du bist ein Sprachbasierter Chatbot für Prüfungsrecht an einer Hochschule. "
95
- "Du beantwortest Fragen ausschließlich auf Basis der bereitgestellten Dokumente "
96
  "(Prüfungsordnung, Hochschulgesetz NRW). "
97
- "Wenn die Dokumente keine Antwort liefern, sag ehrlich, dass du es nicht weißt. "
98
- "Zitiere relevante Stellen als [Quelle 1], [Quelle 2] usw."
99
  )
100
 
101
  user_content = (
102
  f"Frage: {question}\n\n"
103
  f"Bisheriger Chatverlauf:\n{history_text}\n\n"
104
  f"Relevante Auszüge aus den Dokumenten:\n{context}\n\n"
105
- "Bitte beantworte die Frage präzise und mit Quellenangaben."
 
 
 
106
  )
107
 
108
  messages = [
@@ -118,7 +102,6 @@ def rag_answer(question: str, history: Any):
118
 
119
  answer = completion.choices[0].message.content
120
 
121
- # 5) Lưu lịch sử vào Supabase
122
  save_message("user", question)
123
  save_message("assistant", answer)
124
 
 
1
+ # rag_pipeline.py — RAG mit Supabase RPC & anchor_id
2
  import os
3
  from datetime import date
4
  from typing import Any, List
 
11
  embedder = OpenAIEmbeddings(model="text-embedding-3-small")
12
 
13
 
14
+ def get_relevant_docs(query: str, k: int = 6) -> List[dict]:
15
+ """Ruft match_documents in Supabase auf und liefert die besten k Treffer."""
 
 
16
  embedding = embedder.embed_query(query)
 
17
  resp = supabase.rpc(
18
  "match_documents",
19
+ {"query_embedding": embedding, "filter": {}},
 
 
 
20
  ).execute()
 
21
  data = resp.data or []
22
  return data[:k]
23
 
24
 
 
 
 
25
  def save_message(role: str, message: str) -> None:
26
+ """Speichert Nachrichten nach Datum gruppiert in chat_history."""
27
  today = date.today().isoformat()
28
  supabase.table("chat_history").insert(
29
  {
 
34
  ).execute()
35
 
36
 
 
 
 
37
  def format_history(history: Any) -> str:
38
+ """History (list von dict oder tuples) zu einfachem Text für den Prompt."""
39
  if not history:
40
  return ""
41
+ out = ""
42
  for turn in history:
43
+ if isinstance(turn, dict) and "role" in turn and "content" in turn:
44
+ r = turn["role"]
45
+ c = str(turn["content"])
46
+ if r == "user":
47
+ out += f"User: {c}\n"
48
+ elif r == "assistant":
49
+ out += f"Assistant: {c}\n"
50
+ elif isinstance(turn, (list, tuple)) and len(turn) >= 2:
51
+ out += f"User: {turn[0]}\nAssistant: {turn[1]}\n"
52
+ return out
53
+
54
+
 
 
 
 
 
 
 
 
55
  def rag_answer(question: str, history: Any):
56
+ """Gibt (Antworttext, Liste von Dokumentdicts) zurück."""
57
  docs = get_relevant_docs(question)
58
 
59
+ # Kontext
60
  context_parts = []
61
  for i, d in enumerate(docs):
62
  meta = d.get("metadata") or {}
63
  src = meta.get("source", "Quelle")
64
+ page = meta.get("page")
65
+ page_info = f"(Seite {page})" if page else ""
66
  text = d.get("content") or ""
67
+ context_parts.append(
68
+ f"[Quelle {i+1}] {src} {page_info}\n{text}"
69
+ )
70
  context = "\n\n".join(context_parts) if context_parts else "Keine relevanten Dokumente gefunden."
71
 
 
72
  history_text = format_history(history)
73
 
 
74
  system_prompt = (
75
+ "Du bist ein spezialisierter Chatbot für Prüfungsrecht an einer Hochschule. "
76
+ "Du antwortest ausschließlich auf Basis der bereitgestellten Dokumente "
77
  "(Prüfungsordnung, Hochschulgesetz NRW). "
78
+ "Wenn die Dokumente keine klare Antwort liefern, sag ehrlich, dass es in den vorhandenen Unterlagen nicht eindeutig geregelt ist. "
79
+ "Zitiere Quellen immer im Format [Quelle X] und nenne, ob sie aus der Prüfungsordnung oder dem Hochschulgesetz stammen."
80
  )
81
 
82
  user_content = (
83
  f"Frage: {question}\n\n"
84
  f"Bisheriger Chatverlauf:\n{history_text}\n\n"
85
  f"Relevante Auszüge aus den Dokumenten:\n{context}\n\n"
86
+ "Formuliere eine klare, juristisch saubere Antwort. "
87
+ "Gib am Ende deiner Antwort eine Liste der verwendeten Quellen im Format:\n"
88
+ "[Quelle 1: Prüfungsordnung, Seite ..., ggf. Paragraph]\n"
89
+ "[Quelle 2: Hochschulgesetz NRW, Seite ..., ggf. Paragraph]\n"
90
  )
91
 
92
  messages = [
 
102
 
103
  answer = completion.choices[0].message.content
104
 
 
105
  save_message("user", question)
106
  save_message("assistant", answer)
107