Nguyen5 commited on
Commit
33b8aa6
·
1 Parent(s): 564cc72
Files changed (1) hide show
  1. app.py +100 -82
app.py CHANGED
@@ -1,124 +1,142 @@
1
  # app.py
2
- import gradio as gr
3
  import os
 
 
4
  from openai import OpenAI
 
 
5
  from rag_pipeline import rag_answer
6
- from supabase_client import supabase
7
 
8
  client = OpenAI()
 
9
 
 
 
 
 
 
10
 
11
- # -------------------------------------------------------
12
- # Build viewer HTML (Prüfungsordnung + HG NRW), in SAME DOM
13
- # -------------------------------------------------------
14
- def build_viewer_html():
15
- resp = supabase.table("documents").select("content, metadata").execute()
16
- items = resp.data or []
17
-
18
- blocks = []
19
-
20
- for row in items:
21
- meta = row["metadata"]
22
- anchor = meta.get("anchor_id")
23
- source = meta["source"]
24
- page = meta.get("page", "")
25
- page_info = f"(Seite {page})" if page else ""
26
- content = row["content"]
27
-
28
- block = (
29
- f"<div id='{anchor}' style='margin:20px 0;'>"
30
- f"<b>{source} {page_info}</b><br>"
31
- f"{content}"
32
- f"</div>"
33
- )
34
- blocks.append(block)
35
 
36
- return "<br>".join(blocks)
 
 
 
 
 
 
37
 
38
 
39
- DOCUMENTS_HTML = build_viewer_html()
 
 
 
 
 
40
 
41
 
42
- # -------------------------------------------------------
43
- # Whisper (German only)
44
- # -------------------------------------------------------
45
- def transcribe(audio):
46
- if audio is None:
47
- return ""
48
- try:
49
- with open(audio, "rb") as f:
50
- res = client.audio.transcriptions.create(
51
- model="whisper-1",
52
- file=f,
53
- language="de",
54
- temperature=0
55
- )
56
- return (res.text or "").strip()
57
- except:
58
  return ""
 
 
 
 
 
 
 
 
59
 
60
 
61
- # -------------------------------------------------------
62
- # Main Chat
63
- # -------------------------------------------------------
64
- def chat_fn(text, audio, history_md):
65
-
66
- text = text.strip() if text else ""
67
 
 
68
  if text:
69
  question = text
70
- else:
71
  question = transcribe(audio)
 
 
72
 
73
  if not question:
74
- return history_md, "<p>Bitte Text oder Mikrofon benutzen.</p>", gr.update(value=None)
75
 
76
- answer, docs = rag_answer(question, None)
 
77
 
78
- # Build sources with clickable links to anchors in SAME DOM
79
- sources_md = "### Quellen\n"
80
  for i, d in enumerate(docs):
81
- meta = d["metadata"]
82
- anchor = meta["anchor_id"]
83
- src = meta["source"]
84
- page = meta.get("page")
 
 
 
 
 
85
  page_info = f"(Seite {page})" if page else ""
86
 
87
- sources_md += (
88
- f"- [Quelle {i+1}: {src} {page_info}](#{anchor})\n"
89
- )
90
 
91
- answer_md = (
92
- f"### Frage\n{question}\n\n"
93
- f"### Antwort\n{answer}\n\n"
94
- f"{sources_md}\n"
95
- f"---\n\n"
96
- f"## Dokumente\n{DOCUMENTS_HTML}"
97
- )
 
 
98
 
99
- history_new = answer_md # always show latest answer only
 
 
 
 
100
 
101
- return history_new, answer_md, gr.update(value=None)
 
102
 
103
 
104
- # -------------------------------------------------------
105
- # UI Layout — chat + docs IN SAME COLUMN
106
- # -------------------------------------------------------
107
  with gr.Blocks() as demo:
108
  gr.Markdown("# ⚖️ Sprachbasierter Chatbot für Prüfungsrecht")
109
 
110
- chat_display = gr.Markdown("")
 
 
 
 
 
 
 
 
 
 
 
111
 
112
- text_input = gr.Textbox(label="Text Eingabe")
113
- audio_input = gr.Audio(type="filepath", label="Mikrofon")
114
- send_btn = gr.Button("Senden")
 
115
 
116
- answer_preview = gr.Markdown("")
117
 
118
  send_btn.click(
119
  chat_fn,
120
- inputs=[text_input, audio_input, chat_display],
121
- outputs=[chat_display, answer_preview, audio_input]
122
  )
123
 
124
- demo.launch(ssr_mode=False)
 
 
1
  # app.py
 
2
  import os
3
+ import base64
4
+ import gradio as gr
5
  from openai import OpenAI
6
+
7
+ from supabase_client import load_file_bytes
8
  from rag_pipeline import rag_answer
 
9
 
10
  client = OpenAI()
11
+ BUCKET = os.environ["SUPABASE_BUCKET"]
12
 
13
+ # ------------------------------------------
14
+ # Public URLs để mở PDF/HTML khi nhấn Quelle
15
+ # ------------------------------------------
16
+ PDF_URL = f"{os.environ['SUPABASE_URL']}/storage/v1/object/public/{BUCKET}/pruefungsordnung.pdf"
17
+ HG_URL = f"{os.environ['SUPABASE_URL']}/storage/v1/object/public/{BUCKET}/hochschulgesetz.html"
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ # ------------------------------------------
21
+ # Viewer PDF base64
22
+ # ------------------------------------------
23
+ def encode_pdf_src():
24
+ pdf_bytes = load_file_bytes(BUCKET, "pruefungsordnung.pdf")
25
+ b64 = base64.b64encode(pdf_bytes).decode("utf-8")
26
+ return f"data:application/pdf;base64,{b64}"
27
 
28
 
29
+ # ------------------------------------------
30
+ # HTML viewer
31
+ # ------------------------------------------
32
+ def encode_html():
33
+ html_bytes = load_file_bytes(BUCKET, "hochschulgesetz.html")
34
+ return html_bytes.decode("utf-8", errors="ignore")
35
 
36
 
37
+ # ------------------------------------------
38
+ # Speech-to-text FIXED
39
+ # ------------------------------------------
40
+ def transcribe(audio_path):
41
+ if audio_path is None:
 
 
 
 
 
 
 
 
 
 
 
42
  return ""
43
+ with open(audio_path, "rb") as f:
44
+ result = client.audio.transcriptions.create(
45
+ model="whisper-1",
46
+ file=f,
47
+ language="de", # ép tiếng Đức
48
+ temperature=0.0 # ổn định kết quả
49
+ )
50
+ return (result.text or "").strip()
51
 
52
 
53
+ # ------------------------------------------
54
+ # MAIN CHAT FUNCTION
55
+ # ------------------------------------------
56
+ def chat_fn(text, audio, history):
57
+ text = (text or "").strip()
 
58
 
59
+ # 1) Ưu tiên text, không dùng audio nếu text có
60
  if text:
61
  question = text
62
+ elif audio is not None:
63
  question = transcribe(audio)
64
+ else:
65
+ return history, "<p>Bitte Text oder Mikrofon benutzen.</p>", None
66
 
67
  if not question:
68
+ return history, "<p>Spracherkennung fehlgeschlagen.</p>", None
69
 
70
+ # 2) RAG
71
+ answer, docs = rag_answer(question, history or [])
72
 
73
+ # 3) Build Quellen (click được)
74
+ html = "<ol>"
75
  for i, d in enumerate(docs):
76
+ meta = d.get("metadata", {}) or {}
77
+ src = meta.get("source", "?")
78
+
79
+ if "Prüfungsordnung" in src:
80
+ link = PDF_URL
81
+ else:
82
+ link = HG_URL
83
+
84
+ page = meta.get("page", None)
85
  page_info = f"(Seite {page})" if page else ""
86
 
87
+ snippet = (d.get("content") or "")[:200]
 
 
88
 
89
+ html += f"""
90
+ <li>
91
+ <a href="{link}" target="_blank">
92
+ <b>Quelle {i+1}: {src} {page_info}</b>
93
+ </a><br>
94
+ {snippet}...
95
+ </li>
96
+ """
97
+ html += "</ol>"
98
 
99
+ # 4) Gradio message history
100
+ new_history = (history or []) + [
101
+ {"role": "user", "content": question},
102
+ {"role": "assistant", "content": answer},
103
+ ]
104
 
105
+ # Reset audio input
106
+ return new_history, html, gr.update(value=None)
107
 
108
 
109
+ # ------------------------------------------
110
+ # UI LAYOUT
111
+ # ------------------------------------------
112
  with gr.Blocks() as demo:
113
  gr.Markdown("# ⚖️ Sprachbasierter Chatbot für Prüfungsrecht")
114
 
115
+ with gr.Row():
116
+ with gr.Column(scale=3):
117
+ chatbot = gr.Chatbot(label="Chat (RAG)")
118
+ text_input = gr.Textbox(label="Text Eingabe")
119
+ audio_input = gr.Audio(type="filepath", label="Spracheingabe (Mikrofon)")
120
+ send_btn = gr.Button("Senden")
121
+
122
+ with gr.Column(scale=2):
123
+ gr.Markdown("### 📄 Prüfungsordnung PDF")
124
+ gr.HTML(
125
+ f"<iframe src='{encode_pdf_src()}' width='100%' height='250'></iframe>"
126
+ )
127
 
128
+ gr.Markdown("### 📜 Hochschulgesetz NRW")
129
+ gr.HTML(
130
+ f"<div style='overflow:auto;height:250px;'>{encode_html()}</div>"
131
+ )
132
 
133
+ sources_html = gr.HTML()
134
 
135
  send_btn.click(
136
  chat_fn,
137
+ inputs=[text_input, audio_input, chatbot],
138
+ outputs=[chatbot, sources_html, audio_input],
139
  )
140
 
141
+ if __name__ == "__main__":
142
+ demo.launch(ssr_mode=False)