Spaces:

kaburia
/

policy-analysis

Paused

App Files Files Community

kaburia commited on Aug 30, 2025

Commit

c22f1bf

1 Parent(s): 18ecb0d

agent

Browse files

Files changed (2) hide show

app.py +88 -22
file_n.html +147 -0

app.py CHANGED Viewed

@@ -34,15 +34,16 @@ PERSISTED_HISTORY = load_history()
 # Default verbatim mode flag (quotes only, no generative summarization)
 VERBATIM_MODE_DEFAULT = True
-def _citation(meta):
     """Return a concise citation token for a metadata dict.
-    Example: [EnergyPolicy2018 p.17]
     """
     src_raw = os.path.basename(meta.get('source', 'Unknown'))
-    # Strip common extensions for brevity
-    src = os.path.splitext(src_raw)[0]
     page = meta.get('page_label') or meta.get('page') or 'unknown'
-    return f"[{src} p.{page}]"
 def _extract_quotes(query: str, docs, max_quotes: int = 12):
     import re, math
@@ -73,7 +74,7 @@ def _extract_quotes(query: str, docs, max_quotes: int = 12):
         if key in seen:
             continue
         seen.add(key)
-        out.append(f"• \"{s}\" {_citation(meta)}")
         if len(out) >= max_quotes:
             break
     return out
@@ -199,13 +200,25 @@ def _extract_enumerated_objectives(text: str):
         ordered.append((lab, body))
     return ordered
-def _format_objectives_markdown(objs, meta_docs):
     if not objs:
         return None
-    hdr = "### Policy Objectives (Verbatim)\n" + f"Extracted {len(objs)} objective(s) from source document(s).\n\n"
-    bullets = [f"({lab}) {txt}" for lab, txt in objs]
-    src_note = "> Source documents: " + ", ".join(sorted(meta_docs)) if meta_docs else ""
-    return hdr + "\n".join(bullets) + ("\n\n" + src_note if src_note else "")
 def chat_response(message, history, verbatim_mode=True):
     """
@@ -291,6 +304,7 @@ def chat_response(message, history, verbatim_mode=True):
                 top_docs = consolidated
         if verbatim_mode:
             # Specialized extraction for enumerated objectives if user asks for objectives
             wants_objectives = 'objective' in message.lower()
             objectives_output = None
@@ -315,7 +329,7 @@ def chat_response(message, history, verbatim_mode=True):
                     seen_lab.add(lab)
                     dedup.append((lab, body))
                 if len(dedup) >= 3:  # threshold to treat as valid objective list
-                    md = _format_objectives_markdown(dedup, doc_names)
                     if md:
                         objectives_output = md
                         yield md
@@ -329,11 +343,18 @@ def chat_response(message, history, verbatim_mode=True):
                 rows = _extract_quote_records(message, top_docs)
                 if not rows:
                     return "Not found in sources."
                 table_md = _quote_records_to_table(rows)
                 doc_set = sorted({r['Document'] for r in rows})
-                header = f"### Comparative Excerpts\nExtracted {len(rows)} objective-related sentence(s) from {len(doc_set)} document(s).\n\n"
-                guidance = "> Columns: Document, Page, Excerpt (truncated), Citation."
-                answer = header + guidance + "\n\n" + table_md
                 yield answer
                 try:
                     log_exchange(message, answer, meta={"mode": "verbatim_compare", "docs": doc_set})
@@ -341,7 +362,36 @@ def chat_response(message, history, verbatim_mode=True):
                     pass
                 return
             else:
-                quotes = _extract_quotes(message, top_docs)
                 if not quotes:
                     return "Not found in sources."
                 # Summarize document + page coverage
@@ -355,14 +405,14 @@ def chat_response(message, history, verbatim_mode=True):
                     pg = m.get('page_label') or m.get('page')
                     if pg is not None:
                         pages.add(str(pg))
-                coverage = f"from {len(doc_ids)} document(s)"
                 if want_page is not None:
                     coverage += f" (page {want_page})"
                 elif pages:
                     coverage += f" across pages {', '.join(sorted(pages))}"
-                header = f"### Verbatim Policy Excerpts\nFound {len(quotes)} excerpt(s) {coverage}.\n\n"
-                formatting_note = "> Each bullet is an exact sentence-level excerpt with its source citation."
-                answer = header + formatting_note + "\n\n" + "\n".join(quotes)
                 yield answer
                 try:
                     log_exchange(message, answer, meta={"mode": "verbatim", "page": want_page, "docs": doc_ids})
@@ -404,11 +454,16 @@ def chat_response(message, history, verbatim_mode=True):
         heading_added = False
         for chunk in stream_llm_response(messages):
             if not heading_added:
-                # Prepend a heading once for readability
-                chunk = "### Answer\n" + chunk.lstrip()
                 heading_added = True
             response += chunk
             yield response
         # After final response, log exchange persistently
         try:
             log_exchange(message, response, meta={"pages": [getattr(d.metadata,'page_label', None) if hasattr(d,'metadata') else None for d in top_docs]})
@@ -484,6 +539,17 @@ with gr.Blocks(title="Kenya Policy Assistant - Chat", theme=gr.themes.Soft()) as
     # 🏛️ Kenya Policy Assistant - Interactive Chat
     Ask questions about Kenya's policies and have a conversation! I can help you understand policy documents with sentiment and coherence analysis.
     """)
     with gr.Row():
         with gr.Column(scale=3):

 # Default verbatim mode flag (quotes only, no generative summarization)
 VERBATIM_MODE_DEFAULT = True
+def _citation(meta, alias_map=None):
     """Return a concise citation token for a metadata dict.
+    Example: [S1 p.17] where S1 alias maps to full doc name in Sources section.
+    Falls back to base filename when no alias_map provided.
     """
     src_raw = os.path.basename(meta.get('source', 'Unknown'))
+    base = os.path.splitext(src_raw)[0]
+    label = alias_map.get(base, base) if alias_map else base
     page = meta.get('page_label') or meta.get('page') or 'unknown'
+    return f"[{label} p.{page}]"
 def _extract_quotes(query: str, docs, max_quotes: int = 12):
     import re, math
         if key in seen:
             continue
         seen.add(key)
+        out.append(f"- \"{s}\" {_citation(meta)}")
         if len(out) >= max_quotes:
             break
     return out
         ordered.append((lab, body))
     return ordered
+def _format_objectives_markdown(objs, meta_docs, alias_map=None):
     if not objs:
         return None
+    hdr = f"**Policy Objectives** ({len(objs)})\n"
+    bullets = [f"{i+1}. ({lab}) {txt}" for i,(lab, txt) in enumerate(objs)]
+    src_note = "\n\nSources:\n" + "\n".join([f"- {alias_map.get(d,d)}" for d in sorted(meta_docs)]) if meta_docs else ""
+    return hdr + "\n".join(bullets) + src_note
+def _build_alias_map(docs):
+    bases = []
+    for d in docs:
+        meta = getattr(d,'metadata',{})
+        base = os.path.splitext(os.path.basename(meta.get('source','Unknown')))[0]
+        if base not in bases:
+            bases.append(base)
+    alias_map = {}
+    for idx, b in enumerate(bases, start=1):
+        alias_map[b] = f"S{idx}"
+    return alias_map
 def chat_response(message, history, verbatim_mode=True):
     """
                 top_docs = consolidated
         if verbatim_mode:
+            alias_map = _build_alias_map(top_docs)
             # Specialized extraction for enumerated objectives if user asks for objectives
             wants_objectives = 'objective' in message.lower()
             objectives_output = None
                     seen_lab.add(lab)
                     dedup.append((lab, body))
                 if len(dedup) >= 3:  # threshold to treat as valid objective list
+                    md = _format_objectives_markdown(dedup, doc_names, alias_map=alias_map)
                     if md:
                         objectives_output = md
                         yield md
                 rows = _extract_quote_records(message, top_docs)
                 if not rows:
                     return "Not found in sources."
+                # Replace Document column with alias
+                alias_map_rows = _build_alias_map(top_docs)
+                for r in rows:
+                    r['Document'] = alias_map_rows.get(r['Document'], r['Document'])
+                    # Rebuild citation using alias
+                    # Extract meta again not stored; citation already present keep as is for now
                 table_md = _quote_records_to_table(rows)
                 doc_set = sorted({r['Document'] for r in rows})
+                header = f"**Comparative Excerpts** ({len(rows)} sentences)\n"
+                guidance = "Columns: Document alias, Page, Excerpt, Citation."
+                sources_section = "\n\nSources:\n" + "\n".join([f"- {alias_map_rows.get(k,k)}: {k}" for k in sorted(alias_map_rows)])
+                answer = header + guidance + "\n\n" + table_md + sources_section
                 yield answer
                 try:
                     log_exchange(message, answer, meta={"mode": "verbatim_compare", "docs": doc_set})
                     pass
                 return
             else:
+                # Rebuild quotes with alias citation for cleanliness
+                quotes_raw = []
+                import re, math
+                terms = [t.lower() for t in re.findall(r"[A-Za-z0-9]+", message) if len(t)>2]
+                term_set = set(terms)
+                scored=[]
+                for d in top_docs:
+                    meta = getattr(d,'metadata',{})
+                    sentences = re.split(r"(?<=[\.!?])\s+", d.page_content)
+                    for sent in sentences:
+                        s = sent.strip()
+                        if not s:
+                            continue
+                        toks=[w.lower() for w in re.findall(r"[A-Za-z0-9]+", s)]
+                        if not toks:
+                            continue
+                        overlap = len(term_set.intersection(toks))
+                        if overlap==0:
+                            continue
+                        score = overlap / math.log(len(toks)+1,2)
+                        scored.append((score,s,meta))
+                scored.sort(key=lambda x:x[0], reverse=True)
+                seen=set(); quotes=[]
+                for score, s, meta in scored:
+                    key=(s, meta.get('source'), meta.get('page_label'))
+                    if key in seen: continue
+                    seen.add(key)
+                    quotes.append(f"- \"{s}\" {_citation(meta, alias_map)}")
+                    if len(quotes)>=12: break
+                quotes = quotes
                 if not quotes:
                     return "Not found in sources."
                 # Summarize document + page coverage
                     pg = m.get('page_label') or m.get('page')
                     if pg is not None:
                         pages.add(str(pg))
+                coverage = f"{len(quotes)} excerpt(s) from {len(doc_ids)} document(s)"
                 if want_page is not None:
                     coverage += f" (page {want_page})"
                 elif pages:
                     coverage += f" across pages {', '.join(sorted(pages))}"
+                header = f"**Verbatim Excerpts** ({coverage})\n"
+                sources_section = "\n\nSources:\n" + "\n".join([f"- {_citation({'source': sid}, alias_map).split()[0][1:-1]}: {sid}" for sid in doc_ids])
+                answer = header + "\n".join(quotes) + sources_section
                 yield answer
                 try:
                     log_exchange(message, answer, meta={"mode": "verbatim", "page": want_page, "docs": doc_ids})
         heading_added = False
         for chunk in stream_llm_response(messages):
             if not heading_added:
+                chunk = "**Answer**\n" + chunk.lstrip()
                 heading_added = True
             response += chunk
             yield response
+        # Append sources block (non-streamed) for clarity
+        alias_map_final = _build_alias_map(top_docs)
+        if alias_map_final:
+            sources_block = "\n\nSources:\n" + "\n".join([f"- {a}: {doc}" for doc,a in {v:k for k,v in alias_map_final.items()}.items()])
+            response += sources_block
+            yield response
         # After final response, log exchange persistently
         try:
             log_exchange(message, response, meta={"pages": [getattr(d.metadata,'page_label', None) if hasattr(d,'metadata') else None for d in top_docs]})
     # 🏛️ Kenya Policy Assistant - Interactive Chat
     Ask questions about Kenya's policies and have a conversation! I can help you understand policy documents with sentiment and coherence analysis.
     """)
+    # Embedded external policy-agent chatbot widget (as requested)
+    gr.HTML('''<script async
+    src="https://q77iuwf7ncfemoonbzon2iyd.agents.do-ai.run/static/chatbot/widget.js"
+    data-agent-id="fcad9141-8590-11f0-b074-4e013e2ddde4"
+    data-chatbot-id="oTQKgtWMkQLbLVw7CIHkbxw25Pu9jekn"
+    data-name="policy-agent Chatbot"
+    data-primary-color="#031B4E"
+    data-secondary-color="#E5E8ED"
+    data-button-background-color="#0061EB"
+    data-starting-message="Hello! I am your policy analysis bot made to help you comb through the policies."
+    data-logo="/static/chatbot/icons/default-agent.svg"></script>''')
     with gr.Row():
         with gr.Column(scale=3):

file_n.html ADDED Viewed

	@@ -0,0 +1,147 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width,initial-scale=1" />
+  <title>Policy-Agent Chatbot – Single-File Demo</title>
+  <meta name="description" content="Drop-in demo page embedding the Policy-Agent chatbot widget." />
+  <style>
+    :root{
+      --brand:#031B4E;
+      --accent:#0061EB;
+      --muted:#E5E8ED;
+      --ink:#0b1426;
+      --bg:#f7f9fc;
+    }
+    *{box-sizing:border-box}
+    html,body{height:100%}
+    body{
+      margin:0;
+      font:16px/1.5 system-ui,-apple-system,Segoe UI,Roboto,Inter,Arial,sans-serif;
+      color:var(--ink);
+      background:var(--bg);
+    }
+    header{
+      background:linear-gradient(135deg,var(--brand),#0b3a9d 65%);
+      color:#fff;
+      padding:28px 20px;
+    }
+    header .wrap{max-width:1000px;margin:0 auto;display:flex;align-items:center;gap:16px}
+    .logo{
+      width:40px;height:40px;border-radius:10px;background:#fff;display:grid;place-items:center;
+      color:var(--brand);font-weight:700
+    }
+    h1{margin:0;font-size:1.5rem}
+    main{max-width:1000px;margin:30px auto;padding:0 20px}
+    .card{
+      background:#fff;border:1px solid var(--muted);border-radius:14px;
+      box-shadow:0 6px 16px rgba(3,27,78,.06);overflow:hidden
+    }
+    .card .hero{
+      padding:24px 24px 8px 24px;display:flex;flex-wrap:wrap;gap:18px;align-items:center
+    }
+    .hero h2{flex:1 1 320px;margin:0;font-size:1.35rem}
+    .pill{
+      display:inline-flex;align-items:center;gap:8px;
+      background:var(--muted);color:#334;
+      padding:8px 12px;border-radius:999px;font-size:.9rem
+    }
+    .body{padding:12px 24px 24px 24px;color:#425466}
+    .cta{
+      display:flex;gap:12px;flex-wrap:wrap;margin-top:12px
+    }
+    .btn{
+      appearance:none;border:0;cursor:pointer;
+      background:var(--accent);color:#fff;padding:12px 16px;border-radius:10px;
+      font-weight:600;box-shadow:0 4px 12px rgba(0,97,235,.25);transition:transform .06s ease
+    }
+    .btn:active{transform:translateY(1px)}
+    .btn.secondary{
+      background:#fff;color:#0b3a9d;border:1px solid var(--muted);box-shadow:none
+    }
+    footer{color:#6b7280;text-align:center;font-size:.85rem;margin:28px 0}
+    code{background:#f2f4f8;padding:2px 6px;border-radius:6px}
+    .support{margin-top:8px;font-size:.9rem;color:#667}
+  </style>
+</head>
+<body>
+  <header>
+    <div class="wrap">
+      <div class="logo">PA</div>
+      <h1>Policy-Agent Chatbot – Embedded Widget</h1>
+    </div>
+  </header>
+  <main>
+    <section class="card">
+      <div class="hero">
+        <h2>Turn-key conversational policy analysis, embedded on a single page.</h2>
+        <span class="pill">Agent: <strong>policy-agent</strong></span>
+        <span class="pill">Mode: Widget</span>
+      </div>
+      <div class="body">
+        <p>
+          This page demonstrates how to embed your DigitalOcean-hosted agent. The floating chat
+          launcher will appear once the widget script loads. Everything—HTML, CSS, and JS—is in this file.
+        </p>
+        <div class="cta">
+          <button id="openChat" class="btn">Open Chatbot</button>
+          <button id="closeChat" class="btn secondary">Close Chatbot</button>
+        </div>
+        <p class="support">
+          If you don’t see the launcher, check your network/CSP and that the
+          <code>data-agent-id</code> is correct.
+        </p>
+      </div>
+    </section>
+    <footer>
+      © <span id="y"></span> Policy-Agent Demo. All rights reserved.
+    </footer>
+  </main>
+  <!-- Your chatbot widget: embed once, near the end of <body> -->
+  <script async
+    src="https://q77iuwf7ncfemoonbzon2iyd.agents.do-ai.run/static/chatbot/widget.js"
+    data-agent-id="fcad9141-8590-11f0-b074-4e013e2ddde4"
+    data-chatbot-id="oTQKgtWMkQLbLVw7CIHkbxw25Pu9jekn"
+    data-name="policy-agent Chatbot"
+    data-primary-color="#031B4E"
+    data-secondary-color="#E5E8ED"
+    data-button-background-color="#0061EB"
+    data-starting-message="Hello! I am your policy analysis bot made to help you comb through the policies."
+    data-logo="/static/chatbot/icons/default-agent.svg">
+  </script>
+  <!-- Small helper script: tries to open/close the widget if a public API is exposed -->
+  <script>
+    // Footer year
+    document.getElementById('y').textContent = new Date().getFullYear();
+    // Some widgets expose a global with open()/close(). We try politely; if not, we no-op.
+    function getWidget(){
+      // Common patterns; adjust to your vendor if they document an API.
+      return window.PolicyAgentWidget || window.ChatbotWidget || window.AgentWidget || null;
+    }
+    document.getElementById('openChat').addEventListener('click', () => {
+      const w = getWidget();
+      if (w && typeof w.open === 'function') { w.open(); }
+      // If no API, the user can click the floating launcher; this is just progressive enhancement.
+    });
+    document.getElementById('closeChat').addEventListener('click', () => {
+      const w = getWidget();
+      if (w && typeof w.close === 'function') { w.close(); }
+    });
+    // Basic CSP hint (optional): log if the script fails to load
+    window.addEventListener('error', (e) => {
+      if (e.target && e.target.tagName === 'SCRIPT' &&
+          String(e.target.src).includes('/static/chatbot/widget.js')) {
+        console.warn('Chatbot widget failed to load. Check CSP and network allowlists.');
+      }
+    }, true);
+  </script>
+</body>
+</html>