Nguyen5 commited on
Commit
85a2072
·
1 Parent(s): 33b8aa6
Files changed (1) hide show
  1. app.py +22 -14
app.py CHANGED
@@ -16,7 +16,6 @@ BUCKET = os.environ["SUPABASE_BUCKET"]
16
  PDF_URL = f"{os.environ['SUPABASE_URL']}/storage/v1/object/public/{BUCKET}/pruefungsordnung.pdf"
17
  HG_URL = f"{os.environ['SUPABASE_URL']}/storage/v1/object/public/{BUCKET}/hochschulgesetz.html"
18
 
19
-
20
  # ------------------------------------------
21
  # Viewer PDF base64
22
  # ------------------------------------------
@@ -25,7 +24,6 @@ def encode_pdf_src():
25
  b64 = base64.b64encode(pdf_bytes).decode("utf-8")
26
  return f"data:application/pdf;base64,{b64}"
27
 
28
-
29
  # ------------------------------------------
30
  # HTML viewer
31
  # ------------------------------------------
@@ -33,7 +31,6 @@ def encode_html():
33
  html_bytes = load_file_bytes(BUCKET, "hochschulgesetz.html")
34
  return html_bytes.decode("utf-8", errors="ignore")
35
 
36
-
37
  # ------------------------------------------
38
  # Speech-to-text FIXED
39
  # ------------------------------------------
@@ -49,7 +46,6 @@ def transcribe(audio_path):
49
  )
50
  return (result.text or "").strip()
51
 
52
-
53
  # ------------------------------------------
54
  # MAIN CHAT FUNCTION
55
  # ------------------------------------------
@@ -70,19 +66,29 @@ def chat_fn(text, audio, history):
70
  # 2) RAG
71
  answer, docs = rag_answer(question, history or [])
72
 
73
- # 3) Build Quellen (click được)
74
  html = "<ol>"
75
  for i, d in enumerate(docs):
76
  meta = d.get("metadata", {}) or {}
77
  src = meta.get("source", "?")
 
 
78
 
 
79
  if "Prüfungsordnung" in src:
80
- link = PDF_URL
 
 
 
 
 
81
  else:
82
- link = HG_URL
83
-
84
- page = meta.get("page", None)
85
- page_info = f"(Seite {page})" if page else ""
 
 
86
 
87
  snippet = (d.get("content") or "")[:200]
88
 
@@ -96,16 +102,15 @@ def chat_fn(text, audio, history):
96
  """
97
  html += "</ol>"
98
 
99
- # 4) Gradio message history
100
  new_history = (history or []) + [
101
  {"role": "user", "content": question},
102
  {"role": "assistant", "content": answer},
103
  ]
104
 
105
- # Reset audio input
106
  return new_history, html, gr.update(value=None)
107
 
108
-
109
  # ------------------------------------------
110
  # UI LAYOUT
111
  # ------------------------------------------
@@ -116,7 +121,10 @@ with gr.Blocks() as demo:
116
  with gr.Column(scale=3):
117
  chatbot = gr.Chatbot(label="Chat (RAG)")
118
  text_input = gr.Textbox(label="Text Eingabe")
119
- audio_input = gr.Audio(type="filepath", label="Spracheingabe (Mikrofon)")
 
 
 
120
  send_btn = gr.Button("Senden")
121
 
122
  with gr.Column(scale=2):
 
16
  PDF_URL = f"{os.environ['SUPABASE_URL']}/storage/v1/object/public/{BUCKET}/pruefungsordnung.pdf"
17
  HG_URL = f"{os.environ['SUPABASE_URL']}/storage/v1/object/public/{BUCKET}/hochschulgesetz.html"
18
 
 
19
  # ------------------------------------------
20
  # Viewer PDF base64
21
  # ------------------------------------------
 
24
  b64 = base64.b64encode(pdf_bytes).decode("utf-8")
25
  return f"data:application/pdf;base64,{b64}"
26
 
 
27
  # ------------------------------------------
28
  # HTML viewer
29
  # ------------------------------------------
 
31
  html_bytes = load_file_bytes(BUCKET, "hochschulgesetz.html")
32
  return html_bytes.decode("utf-8", errors="ignore")
33
 
 
34
  # ------------------------------------------
35
  # Speech-to-text FIXED
36
  # ------------------------------------------
 
46
  )
47
  return (result.text or "").strip()
48
 
 
49
  # ------------------------------------------
50
  # MAIN CHAT FUNCTION
51
  # ------------------------------------------
 
66
  # 2) RAG
67
  answer, docs = rag_answer(question, history or [])
68
 
69
+ # 3) Build Quellen (click được, phân biệt PDF vs HTML)
70
  html = "<ol>"
71
  for i, d in enumerate(docs):
72
  meta = d.get("metadata", {}) or {}
73
  src = meta.get("source", "?")
74
+ page = meta.get("page", None)
75
+ anchor_id = meta.get("anchor_id") # từ ingest.py
76
 
77
+ # Chọn link tùy nguồn
78
  if "Prüfungsordnung" in src:
79
+ # cố gắng nhảy đúng Seite
80
+ if page:
81
+ link = f"{PDF_URL}#page={page}"
82
+ else:
83
+ link = PDF_URL
84
+ page_info = f"(Seite {page})" if page else ""
85
  else:
86
+ # Hochschulgesetz NRW – dùng anchor_id trong hochschulgesetz.html
87
+ if anchor_id:
88
+ link = f"{HG_URL}#{anchor_id}"
89
+ else:
90
+ link = HG_URL
91
+ page_info = "" # HTML không có page
92
 
93
  snippet = (d.get("content") or "")[:200]
94
 
 
102
  """
103
  html += "</ol>"
104
 
105
+ # 4) Gradio message history (kiểu messages)
106
  new_history = (history or []) + [
107
  {"role": "user", "content": question},
108
  {"role": "assistant", "content": answer},
109
  ]
110
 
111
+ # Reset audio input (xóa sóng cũ)
112
  return new_history, html, gr.update(value=None)
113
 
 
114
  # ------------------------------------------
115
  # UI LAYOUT
116
  # ------------------------------------------
 
121
  with gr.Column(scale=3):
122
  chatbot = gr.Chatbot(label="Chat (RAG)")
123
  text_input = gr.Textbox(label="Text Eingabe")
124
+ audio_input = gr.Audio(
125
+ type="filepath",
126
+ label="Spracheingabe (Mikrofon)"
127
+ )
128
  send_btn = gr.Button("Senden")
129
 
130
  with gr.Column(scale=2):