Nguyen5 commited on
Commit
c517565
·
1 Parent(s): 8308ad9
Files changed (1) hide show
  1. app.py +98 -22
app.py CHANGED
@@ -7,72 +7,148 @@ from supabase_client import supabase
7
 
8
  client = OpenAI()
9
 
 
 
 
 
10
  def build_viewer():
 
 
 
 
11
  resp = supabase.table("documents").select("content, metadata").execute()
12
  items = resp.data or []
 
13
  po_html = []
14
  hg_html = []
 
15
  for row in items:
16
  meta = row["metadata"]
17
  src = meta["source"]
18
- anchor = meta["anchor_id"]
19
- page = meta.get("page", "")
20
- block_html = f"<div id='{anchor}'><b>{src} {page}</b><br>{row['content']}</div>"
 
 
 
 
 
 
 
21
  if "Prüfungsordnung" in src:
22
  po_html.append(block_html)
23
  else:
24
  hg_html.append(block_html)
 
25
  return "".join(po_html), "".join(hg_html)
26
 
27
- PO_HTML, HG_HTML = build_viewer()
28
 
 
 
 
29
  def transcribe(audio):
30
  if audio is None:
31
  return ""
32
- with open(audio, "rb") as f:
33
- res = client.audio.transcriptions.create(
34
- model="whisper-1", file=f, language="de", temperature=0
35
- )
36
- return res.text.strip()
 
 
 
 
 
 
37
 
 
 
 
 
38
  def chat_fn(text, audio, history):
 
39
  text = text.strip() if text else ""
 
 
40
  if text:
41
  q = text
42
  else:
43
  q = transcribe(audio)
44
 
 
 
 
 
45
  answer, docs = rag_answer(q, history or [])
46
 
 
47
  html = "<ol>"
48
  for i, d in enumerate(docs):
49
  meta = d["metadata"]
50
- anchor = meta["anchor_id"]
51
  snippet = d["content"][:200]
52
- html += f"<li><a href='#{anchor}'><b>Quelle {i+1}</b></a><br>{snippet}...</li>"
 
 
 
 
 
 
 
 
53
  html += "</ol>"
54
 
 
55
  new_hist = (history or []) + [
56
  {"role": "user", "content": q},
57
- {"role": "assistant", "content": answer}
58
  ]
59
 
60
- return new_hist, html, gr.update(value=None) # reset audio
 
61
 
 
 
 
 
62
  with gr.Blocks() as demo:
63
- chatbot = gr.Chatbot()
64
- text_input = gr.Textbox(label="Text Eingabe")
65
- audio_input = gr.Audio(type="filepath", label="Mikrofon")
66
- send = gr.Button("Senden")
67
 
68
- po_view = gr.HTML(f"<div style='height:250px; overflow:auto'>{PO_HTML}</div>")
69
- hg_view = gr.HTML(f"<div style='height:250px; overflow:auto'>{HG_HTML}</div>")
70
- sources = gr.HTML()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
- send.click(
 
73
  chat_fn,
74
  inputs=[text_input, audio_input, chatbot],
75
  outputs=[chatbot, sources, audio_input]
76
  )
77
 
78
- demo.launch()
 
7
 
8
  client = OpenAI()
9
 
10
+
11
+ # -------------------------------------------
12
+ # DYNAMIC VIEWER – luôn load từ Supabase MỖI LẦN
13
+ # -------------------------------------------
14
  def build_viewer():
15
+ """
16
+ Tạo viewer HTML từ bảng Documents mới nhất.
17
+ Viewer KHÔNG static, luôn lấy dữ liệu cập nhật.
18
+ """
19
  resp = supabase.table("documents").select("content, metadata").execute()
20
  items = resp.data or []
21
+
22
  po_html = []
23
  hg_html = []
24
+
25
  for row in items:
26
  meta = row["metadata"]
27
  src = meta["source"]
28
+ anchor = meta.get("anchor_id")
29
+ page = meta.get("page")
30
+ page_info = f"(Seite {page})" if page else ""
31
+
32
+ block_html = (
33
+ f"<div id='{anchor}' style='margin-bottom:14px;'>"
34
+ f"<b>{src} {page_info}</b><br>{row['content']}"
35
+ f"</div>"
36
+ )
37
+
38
  if "Prüfungsordnung" in src:
39
  po_html.append(block_html)
40
  else:
41
  hg_html.append(block_html)
42
+
43
  return "".join(po_html), "".join(hg_html)
44
 
 
45
 
46
+ # -------------------------------------------
47
+ # WHISPER – fixed ghi âm tiếng Đức + reset
48
+ # -------------------------------------------
49
  def transcribe(audio):
50
  if audio is None:
51
  return ""
52
+ try:
53
+ with open(audio, "rb") as f:
54
+ res = client.audio.transcriptions.create(
55
+ model="whisper-1",
56
+ file=f,
57
+ language="de",
58
+ temperature=0.0
59
+ )
60
+ return (res.text or "").strip()
61
+ except Exception:
62
+ return ""
63
 
64
+
65
+ # -------------------------------------------
66
+ # MAIN CHAT FUNCTION
67
+ # -------------------------------------------
68
  def chat_fn(text, audio, history):
69
+
70
  text = text.strip() if text else ""
71
+
72
+ # ƯU TIÊN TEXT – nếu có text thì bỏ audio
73
  if text:
74
  q = text
75
  else:
76
  q = transcribe(audio)
77
 
78
+ if not q:
79
+ return history, "<p>Bitte Text oder Mikrofon benutzen.</p>", gr.update(value=None)
80
+
81
+ # RAG
82
  answer, docs = rag_answer(q, history or [])
83
 
84
+ # Build nguồn
85
  html = "<ol>"
86
  for i, d in enumerate(docs):
87
  meta = d["metadata"]
88
+ anchor = meta.get("anchor_id")
89
  snippet = d["content"][:200]
90
+
91
+ html += (
92
+ f"<li>"
93
+ f"<a href='#{anchor}' style='font-weight:bold;'>"
94
+ f"Quelle {i+1}"
95
+ f"</a><br>"
96
+ f"{snippet}..."
97
+ f"</li>"
98
+ )
99
  html += "</ol>"
100
 
101
+ # HISTORY kiểu messages (Gradio)
102
  new_hist = (history or []) + [
103
  {"role": "user", "content": q},
104
+ {"role": "assistant", "content": answer},
105
  ]
106
 
107
+ # RESET AUDIO hoàn toàn
108
+ return new_hist, html, gr.update(value=None)
109
 
110
+
111
+ # -------------------------------------------
112
+ # UI
113
+ # -------------------------------------------
114
  with gr.Blocks() as demo:
 
 
 
 
115
 
116
+ gr.Markdown("# ⚖️ Sprachbasierter Chatbot für Prüfungsrecht")
117
+
118
+ with gr.Row():
119
+ with gr.Column(scale=3):
120
+ chatbot = gr.Chatbot(label="Chat")
121
+
122
+ text_input = gr.Textbox(label="Text Eingabe", placeholder="Frage hier eingeben...")
123
+
124
+ audio_input = gr.Audio(
125
+ type="filepath",
126
+ label="Spracheingabe (Mikrofon)"
127
+ )
128
+
129
+ send_btn = gr.Button("Senden")
130
+
131
+ with gr.Column(scale=2):
132
+ # viewer luôn load dữ liệu mới nhất
133
+ po_html, hg_html = build_viewer()
134
+
135
+ po_view = gr.HTML(
136
+ f"<div style='height:250px;overflow:auto;border:1px solid #ccc;"
137
+ f"padding:10px'>{po_html}</div>"
138
+ )
139
+
140
+ hg_view = gr.HTML(
141
+ f"<div style='height:250px;overflow:auto;border:1px solid #ccc;"
142
+ f"padding:10px'>{hg_html}</div>"
143
+ )
144
+
145
+ sources = gr.HTML()
146
 
147
+ # Chat event
148
+ send_btn.click(
149
  chat_fn,
150
  inputs=[text_input, audio_input, chatbot],
151
  outputs=[chatbot, sources, audio_input]
152
  )
153
 
154
+ demo.launch(ssr_mode=False)