kaburia commited on
Commit
c22f1bf
·
1 Parent(s): 18ecb0d
Files changed (2) hide show
  1. app.py +88 -22
  2. file_n.html +147 -0
app.py CHANGED
@@ -34,15 +34,16 @@ PERSISTED_HISTORY = load_history()
34
  # Default verbatim mode flag (quotes only, no generative summarization)
35
  VERBATIM_MODE_DEFAULT = True
36
 
37
- def _citation(meta):
38
  """Return a concise citation token for a metadata dict.
39
- Example: [EnergyPolicy2018 p.17]
 
40
  """
41
  src_raw = os.path.basename(meta.get('source', 'Unknown'))
42
- # Strip common extensions for brevity
43
- src = os.path.splitext(src_raw)[0]
44
  page = meta.get('page_label') or meta.get('page') or 'unknown'
45
- return f"[{src} p.{page}]"
46
 
47
  def _extract_quotes(query: str, docs, max_quotes: int = 12):
48
  import re, math
@@ -73,7 +74,7 @@ def _extract_quotes(query: str, docs, max_quotes: int = 12):
73
  if key in seen:
74
  continue
75
  seen.add(key)
76
- out.append(f" \"{s}\" {_citation(meta)}")
77
  if len(out) >= max_quotes:
78
  break
79
  return out
@@ -199,13 +200,25 @@ def _extract_enumerated_objectives(text: str):
199
  ordered.append((lab, body))
200
  return ordered
201
 
202
- def _format_objectives_markdown(objs, meta_docs):
203
  if not objs:
204
  return None
205
- hdr = "### Policy Objectives (Verbatim)\n" + f"Extracted {len(objs)} objective(s) from source document(s).\n\n"
206
- bullets = [f"({lab}) {txt}" for lab, txt in objs]
207
- src_note = "> Source documents: " + ", ".join(sorted(meta_docs)) if meta_docs else ""
208
- return hdr + "\n".join(bullets) + ("\n\n" + src_note if src_note else "")
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
  def chat_response(message, history, verbatim_mode=True):
211
  """
@@ -291,6 +304,7 @@ def chat_response(message, history, verbatim_mode=True):
291
  top_docs = consolidated
292
 
293
  if verbatim_mode:
 
294
  # Specialized extraction for enumerated objectives if user asks for objectives
295
  wants_objectives = 'objective' in message.lower()
296
  objectives_output = None
@@ -315,7 +329,7 @@ def chat_response(message, history, verbatim_mode=True):
315
  seen_lab.add(lab)
316
  dedup.append((lab, body))
317
  if len(dedup) >= 3: # threshold to treat as valid objective list
318
- md = _format_objectives_markdown(dedup, doc_names)
319
  if md:
320
  objectives_output = md
321
  yield md
@@ -329,11 +343,18 @@ def chat_response(message, history, verbatim_mode=True):
329
  rows = _extract_quote_records(message, top_docs)
330
  if not rows:
331
  return "Not found in sources."
 
 
 
 
 
 
332
  table_md = _quote_records_to_table(rows)
333
  doc_set = sorted({r['Document'] for r in rows})
334
- header = f"### Comparative Excerpts\nExtracted {len(rows)} objective-related sentence(s) from {len(doc_set)} document(s).\n\n"
335
- guidance = "> Columns: Document, Page, Excerpt (truncated), Citation."
336
- answer = header + guidance + "\n\n" + table_md
 
337
  yield answer
338
  try:
339
  log_exchange(message, answer, meta={"mode": "verbatim_compare", "docs": doc_set})
@@ -341,7 +362,36 @@ def chat_response(message, history, verbatim_mode=True):
341
  pass
342
  return
343
  else:
344
- quotes = _extract_quotes(message, top_docs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  if not quotes:
346
  return "Not found in sources."
347
  # Summarize document + page coverage
@@ -355,14 +405,14 @@ def chat_response(message, history, verbatim_mode=True):
355
  pg = m.get('page_label') or m.get('page')
356
  if pg is not None:
357
  pages.add(str(pg))
358
- coverage = f"from {len(doc_ids)} document(s)"
359
  if want_page is not None:
360
  coverage += f" (page {want_page})"
361
  elif pages:
362
  coverage += f" across pages {', '.join(sorted(pages))}"
363
- header = f"### Verbatim Policy Excerpts\nFound {len(quotes)} excerpt(s) {coverage}.\n\n"
364
- formatting_note = "> Each bullet is an exact sentence-level excerpt with its source citation."
365
- answer = header + formatting_note + "\n\n" + "\n".join(quotes)
366
  yield answer
367
  try:
368
  log_exchange(message, answer, meta={"mode": "verbatim", "page": want_page, "docs": doc_ids})
@@ -404,11 +454,16 @@ def chat_response(message, history, verbatim_mode=True):
404
  heading_added = False
405
  for chunk in stream_llm_response(messages):
406
  if not heading_added:
407
- # Prepend a heading once for readability
408
- chunk = "### Answer\n" + chunk.lstrip()
409
  heading_added = True
410
  response += chunk
411
  yield response
 
 
 
 
 
 
412
  # After final response, log exchange persistently
413
  try:
414
  log_exchange(message, response, meta={"pages": [getattr(d.metadata,'page_label', None) if hasattr(d,'metadata') else None for d in top_docs]})
@@ -484,6 +539,17 @@ with gr.Blocks(title="Kenya Policy Assistant - Chat", theme=gr.themes.Soft()) as
484
  # 🏛️ Kenya Policy Assistant - Interactive Chat
485
  Ask questions about Kenya's policies and have a conversation! I can help you understand policy documents with sentiment and coherence analysis.
486
  """)
 
 
 
 
 
 
 
 
 
 
 
487
 
488
  with gr.Row():
489
  with gr.Column(scale=3):
 
34
  # Default verbatim mode flag (quotes only, no generative summarization)
35
  VERBATIM_MODE_DEFAULT = True
36
 
37
+ def _citation(meta, alias_map=None):
38
  """Return a concise citation token for a metadata dict.
39
+ Example: [S1 p.17] where S1 alias maps to full doc name in Sources section.
40
+ Falls back to base filename when no alias_map provided.
41
  """
42
  src_raw = os.path.basename(meta.get('source', 'Unknown'))
43
+ base = os.path.splitext(src_raw)[0]
44
+ label = alias_map.get(base, base) if alias_map else base
45
  page = meta.get('page_label') or meta.get('page') or 'unknown'
46
+ return f"[{label} p.{page}]"
47
 
48
  def _extract_quotes(query: str, docs, max_quotes: int = 12):
49
  import re, math
 
74
  if key in seen:
75
  continue
76
  seen.add(key)
77
+ out.append(f"- \"{s}\" {_citation(meta)}")
78
  if len(out) >= max_quotes:
79
  break
80
  return out
 
200
  ordered.append((lab, body))
201
  return ordered
202
 
203
+ def _format_objectives_markdown(objs, meta_docs, alias_map=None):
204
  if not objs:
205
  return None
206
+ hdr = f"**Policy Objectives** ({len(objs)})\n"
207
+ bullets = [f"{i+1}. ({lab}) {txt}" for i,(lab, txt) in enumerate(objs)]
208
+ src_note = "\n\nSources:\n" + "\n".join([f"- {alias_map.get(d,d)}" for d in sorted(meta_docs)]) if meta_docs else ""
209
+ return hdr + "\n".join(bullets) + src_note
210
+
211
+ def _build_alias_map(docs):
212
+ bases = []
213
+ for d in docs:
214
+ meta = getattr(d,'metadata',{})
215
+ base = os.path.splitext(os.path.basename(meta.get('source','Unknown')))[0]
216
+ if base not in bases:
217
+ bases.append(base)
218
+ alias_map = {}
219
+ for idx, b in enumerate(bases, start=1):
220
+ alias_map[b] = f"S{idx}"
221
+ return alias_map
222
 
223
  def chat_response(message, history, verbatim_mode=True):
224
  """
 
304
  top_docs = consolidated
305
 
306
  if verbatim_mode:
307
+ alias_map = _build_alias_map(top_docs)
308
  # Specialized extraction for enumerated objectives if user asks for objectives
309
  wants_objectives = 'objective' in message.lower()
310
  objectives_output = None
 
329
  seen_lab.add(lab)
330
  dedup.append((lab, body))
331
  if len(dedup) >= 3: # threshold to treat as valid objective list
332
+ md = _format_objectives_markdown(dedup, doc_names, alias_map=alias_map)
333
  if md:
334
  objectives_output = md
335
  yield md
 
343
  rows = _extract_quote_records(message, top_docs)
344
  if not rows:
345
  return "Not found in sources."
346
+ # Replace Document column with alias
347
+ alias_map_rows = _build_alias_map(top_docs)
348
+ for r in rows:
349
+ r['Document'] = alias_map_rows.get(r['Document'], r['Document'])
350
+ # Rebuild citation using alias
351
+ # Extract meta again not stored; citation already present keep as is for now
352
  table_md = _quote_records_to_table(rows)
353
  doc_set = sorted({r['Document'] for r in rows})
354
+ header = f"**Comparative Excerpts** ({len(rows)} sentences)\n"
355
+ guidance = "Columns: Document alias, Page, Excerpt, Citation."
356
+ sources_section = "\n\nSources:\n" + "\n".join([f"- {alias_map_rows.get(k,k)}: {k}" for k in sorted(alias_map_rows)])
357
+ answer = header + guidance + "\n\n" + table_md + sources_section
358
  yield answer
359
  try:
360
  log_exchange(message, answer, meta={"mode": "verbatim_compare", "docs": doc_set})
 
362
  pass
363
  return
364
  else:
365
+ # Rebuild quotes with alias citation for cleanliness
366
+ quotes_raw = []
367
+ import re, math
368
+ terms = [t.lower() for t in re.findall(r"[A-Za-z0-9]+", message) if len(t)>2]
369
+ term_set = set(terms)
370
+ scored=[]
371
+ for d in top_docs:
372
+ meta = getattr(d,'metadata',{})
373
+ sentences = re.split(r"(?<=[\.!?])\s+", d.page_content)
374
+ for sent in sentences:
375
+ s = sent.strip()
376
+ if not s:
377
+ continue
378
+ toks=[w.lower() for w in re.findall(r"[A-Za-z0-9]+", s)]
379
+ if not toks:
380
+ continue
381
+ overlap = len(term_set.intersection(toks))
382
+ if overlap==0:
383
+ continue
384
+ score = overlap / math.log(len(toks)+1,2)
385
+ scored.append((score,s,meta))
386
+ scored.sort(key=lambda x:x[0], reverse=True)
387
+ seen=set(); quotes=[]
388
+ for score, s, meta in scored:
389
+ key=(s, meta.get('source'), meta.get('page_label'))
390
+ if key in seen: continue
391
+ seen.add(key)
392
+ quotes.append(f"- \"{s}\" {_citation(meta, alias_map)}")
393
+ if len(quotes)>=12: break
394
+ quotes = quotes
395
  if not quotes:
396
  return "Not found in sources."
397
  # Summarize document + page coverage
 
405
  pg = m.get('page_label') or m.get('page')
406
  if pg is not None:
407
  pages.add(str(pg))
408
+ coverage = f"{len(quotes)} excerpt(s) from {len(doc_ids)} document(s)"
409
  if want_page is not None:
410
  coverage += f" (page {want_page})"
411
  elif pages:
412
  coverage += f" across pages {', '.join(sorted(pages))}"
413
+ header = f"**Verbatim Excerpts** ({coverage})\n"
414
+ sources_section = "\n\nSources:\n" + "\n".join([f"- {_citation({'source': sid}, alias_map).split()[0][1:-1]}: {sid}" for sid in doc_ids])
415
+ answer = header + "\n".join(quotes) + sources_section
416
  yield answer
417
  try:
418
  log_exchange(message, answer, meta={"mode": "verbatim", "page": want_page, "docs": doc_ids})
 
454
  heading_added = False
455
  for chunk in stream_llm_response(messages):
456
  if not heading_added:
457
+ chunk = "**Answer**\n" + chunk.lstrip()
 
458
  heading_added = True
459
  response += chunk
460
  yield response
461
+ # Append sources block (non-streamed) for clarity
462
+ alias_map_final = _build_alias_map(top_docs)
463
+ if alias_map_final:
464
+ sources_block = "\n\nSources:\n" + "\n".join([f"- {a}: {doc}" for doc,a in {v:k for k,v in alias_map_final.items()}.items()])
465
+ response += sources_block
466
+ yield response
467
  # After final response, log exchange persistently
468
  try:
469
  log_exchange(message, response, meta={"pages": [getattr(d.metadata,'page_label', None) if hasattr(d,'metadata') else None for d in top_docs]})
 
539
  # 🏛️ Kenya Policy Assistant - Interactive Chat
540
  Ask questions about Kenya's policies and have a conversation! I can help you understand policy documents with sentiment and coherence analysis.
541
  """)
542
+ # Embedded external policy-agent chatbot widget (as requested)
543
+ gr.HTML('''<script async
544
+ src="https://q77iuwf7ncfemoonbzon2iyd.agents.do-ai.run/static/chatbot/widget.js"
545
+ data-agent-id="fcad9141-8590-11f0-b074-4e013e2ddde4"
546
+ data-chatbot-id="oTQKgtWMkQLbLVw7CIHkbxw25Pu9jekn"
547
+ data-name="policy-agent Chatbot"
548
+ data-primary-color="#031B4E"
549
+ data-secondary-color="#E5E8ED"
550
+ data-button-background-color="#0061EB"
551
+ data-starting-message="Hello! I am your policy analysis bot made to help you comb through the policies."
552
+ data-logo="/static/chatbot/icons/default-agent.svg"></script>''')
553
 
554
  with gr.Row():
555
  with gr.Column(scale=3):
file_n.html ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <meta name="viewport" content="width=device-width,initial-scale=1" />
6
+ <title>Policy-Agent Chatbot – Single-File Demo</title>
7
+ <meta name="description" content="Drop-in demo page embedding the Policy-Agent chatbot widget." />
8
+
9
+ <style>
10
+ :root{
11
+ --brand:#031B4E;
12
+ --accent:#0061EB;
13
+ --muted:#E5E8ED;
14
+ --ink:#0b1426;
15
+ --bg:#f7f9fc;
16
+ }
17
+ *{box-sizing:border-box}
18
+ html,body{height:100%}
19
+ body{
20
+ margin:0;
21
+ font:16px/1.5 system-ui,-apple-system,Segoe UI,Roboto,Inter,Arial,sans-serif;
22
+ color:var(--ink);
23
+ background:var(--bg);
24
+ }
25
+ header{
26
+ background:linear-gradient(135deg,var(--brand),#0b3a9d 65%);
27
+ color:#fff;
28
+ padding:28px 20px;
29
+ }
30
+ header .wrap{max-width:1000px;margin:0 auto;display:flex;align-items:center;gap:16px}
31
+ .logo{
32
+ width:40px;height:40px;border-radius:10px;background:#fff;display:grid;place-items:center;
33
+ color:var(--brand);font-weight:700
34
+ }
35
+ h1{margin:0;font-size:1.5rem}
36
+ main{max-width:1000px;margin:30px auto;padding:0 20px}
37
+ .card{
38
+ background:#fff;border:1px solid var(--muted);border-radius:14px;
39
+ box-shadow:0 6px 16px rgba(3,27,78,.06);overflow:hidden
40
+ }
41
+ .card .hero{
42
+ padding:24px 24px 8px 24px;display:flex;flex-wrap:wrap;gap:18px;align-items:center
43
+ }
44
+ .hero h2{flex:1 1 320px;margin:0;font-size:1.35rem}
45
+ .pill{
46
+ display:inline-flex;align-items:center;gap:8px;
47
+ background:var(--muted);color:#334;
48
+ padding:8px 12px;border-radius:999px;font-size:.9rem
49
+ }
50
+ .body{padding:12px 24px 24px 24px;color:#425466}
51
+ .cta{
52
+ display:flex;gap:12px;flex-wrap:wrap;margin-top:12px
53
+ }
54
+ .btn{
55
+ appearance:none;border:0;cursor:pointer;
56
+ background:var(--accent);color:#fff;padding:12px 16px;border-radius:10px;
57
+ font-weight:600;box-shadow:0 4px 12px rgba(0,97,235,.25);transition:transform .06s ease
58
+ }
59
+ .btn:active{transform:translateY(1px)}
60
+ .btn.secondary{
61
+ background:#fff;color:#0b3a9d;border:1px solid var(--muted);box-shadow:none
62
+ }
63
+ footer{color:#6b7280;text-align:center;font-size:.85rem;margin:28px 0}
64
+ code{background:#f2f4f8;padding:2px 6px;border-radius:6px}
65
+ .support{margin-top:8px;font-size:.9rem;color:#667}
66
+ </style>
67
+ </head>
68
+ <body>
69
+ <header>
70
+ <div class="wrap">
71
+ <div class="logo">PA</div>
72
+ <h1>Policy-Agent Chatbot – Embedded Widget</h1>
73
+ </div>
74
+ </header>
75
+
76
+ <main>
77
+ <section class="card">
78
+ <div class="hero">
79
+ <h2>Turn-key conversational policy analysis, embedded on a single page.</h2>
80
+ <span class="pill">Agent: <strong>policy-agent</strong></span>
81
+ <span class="pill">Mode: Widget</span>
82
+ </div>
83
+ <div class="body">
84
+ <p>
85
+ This page demonstrates how to embed your DigitalOcean-hosted agent. The floating chat
86
+ launcher will appear once the widget script loads. Everything—HTML, CSS, and JS—is in this file.
87
+ </p>
88
+ <div class="cta">
89
+ <button id="openChat" class="btn">Open Chatbot</button>
90
+ <button id="closeChat" class="btn secondary">Close Chatbot</button>
91
+ </div>
92
+ <p class="support">
93
+ If you don’t see the launcher, check your network/CSP and that the
94
+ <code>data-agent-id</code> is correct.
95
+ </p>
96
+ </div>
97
+ </section>
98
+ <footer>
99
+ © <span id="y"></span> Policy-Agent Demo. All rights reserved.
100
+ </footer>
101
+ </main>
102
+
103
+ <!-- Your chatbot widget: embed once, near the end of <body> -->
104
+ <script async
105
+ src="https://q77iuwf7ncfemoonbzon2iyd.agents.do-ai.run/static/chatbot/widget.js"
106
+ data-agent-id="fcad9141-8590-11f0-b074-4e013e2ddde4"
107
+ data-chatbot-id="oTQKgtWMkQLbLVw7CIHkbxw25Pu9jekn"
108
+ data-name="policy-agent Chatbot"
109
+ data-primary-color="#031B4E"
110
+ data-secondary-color="#E5E8ED"
111
+ data-button-background-color="#0061EB"
112
+ data-starting-message="Hello! I am your policy analysis bot made to help you comb through the policies."
113
+ data-logo="/static/chatbot/icons/default-agent.svg">
114
+ </script>
115
+
116
+ <!-- Small helper script: tries to open/close the widget if a public API is exposed -->
117
+ <script>
118
+ // Footer year
119
+ document.getElementById('y').textContent = new Date().getFullYear();
120
+
121
+ // Some widgets expose a global with open()/close(). We try politely; if not, we no-op.
122
+ function getWidget(){
123
+ // Common patterns; adjust to your vendor if they document an API.
124
+ return window.PolicyAgentWidget || window.ChatbotWidget || window.AgentWidget || null;
125
+ }
126
+
127
+ document.getElementById('openChat').addEventListener('click', () => {
128
+ const w = getWidget();
129
+ if (w && typeof w.open === 'function') { w.open(); }
130
+ // If no API, the user can click the floating launcher; this is just progressive enhancement.
131
+ });
132
+
133
+ document.getElementById('closeChat').addEventListener('click', () => {
134
+ const w = getWidget();
135
+ if (w && typeof w.close === 'function') { w.close(); }
136
+ });
137
+
138
+ // Basic CSP hint (optional): log if the script fails to load
139
+ window.addEventListener('error', (e) => {
140
+ if (e.target && e.target.tagName === 'SCRIPT' &&
141
+ String(e.target.src).includes('/static/chatbot/widget.js')) {
142
+ console.warn('Chatbot widget failed to load. Check CSP and network allowlists.');
143
+ }
144
+ }, true);
145
+ </script>
146
+ </body>
147
+ </html>