Spaces:

dipsikha25
/

chatbot

Running

App Files Files Community

dipsikha25 commited on 12 days ago

Commit

723d44c

verified ·

1 Parent(s): 4953f43

Update app.py

Browse files

Files changed (1) hide show

app.py +1017 -149

app.py CHANGED Viewed

@@ -1,231 +1,1099 @@
-# =========================================================
-# AUTO-INSTALL PACKAGES
-# =========================================================
-import importlib.util
-import subprocess
 import sys
-def ensure_package(pkg):
-    if importlib.util.find_spec(pkg) is None:
-        subprocess.check_call([sys.executable, "-m", "pip", "install", pkg])
-ensure_package("openpyxl")
-# =========================================================
-# IMPORTS
-# =========================================================
-import re
-import unicodedata
-from pathlib import Path
-from difflib import SequenceMatcher
-import pandas as pd
-import gradio as gr
 from langchain_community.document_loaders import PyPDFLoader
-# =========================================================
-# CONFIG
-# =========================================================
-PDF_FILE = "data.pdf"
-DEFAULT_KPI_EXCEL = "CIA Consolidated KPIs_MetricsGovernance (1).xlsx"
 # =========================================================
-# TEXT HELPERS
 # =========================================================
-def normalize_loose(text: str):
     if not text:
-        return ""
-    text = unicodedata.normalize("NFKC", text.lower())
     text = re.sub(r'[^a-z0-9]+', ' ', text)
-    return re.sub(r'\s+', ' ', text).strip()
-def is_truthy_excel_value(value):
-    if pd.isna(value):
-        return False
-    return str(value).strip().lower() in ['yes','y','true','1','x']
-# =========================================================
-# ✅ EXCEL LOADER (FIXED WITH YOUR HEADERS)
-# =========================================================
-def load_kpi_excel_mapping(excel_path: str):
-    if not Path(excel_path).exists():
-        print(f"❌ Excel not found: {excel_path}")
-        return {}
-    df = pd.read_excel(excel_path, sheet_name='KPI Glossary', engine='openpyxl')
-    df.columns = [str(c).strip() for c in df.columns]
-    print("✅ Columns detected:", df.columns.tolist())
-    mapping = {}
-    for idx, row in df.iterrows():
-        # ✅ USE YOUR REAL COLUMN NAMES
-        kpi = str(row.get("KPI_Name", "")).strip()
-        definition = str(row.get("KPI_Definitions", "")).strip()
-        measure = str(row.get("PowerBI Field/Measure", "")).strip()
-        formula = str(row.get("KPI_DAX", "")).strip()
-        if not kpi:
-            continue
-        record = {
-            "kpi_name": kpi,
-            "measure_name": measure,
-            "formula": formula,
-            "business_logic": definition,
-            "report_sources": []
-        }
-        # ✅ Automatically capture all YES columns
-        for col in df.columns:
-            if is_truthy_excel_value(row[col]):
-                record["report_sources"].append(col)
-        mapping[normalize_loose(kpi)] = record
-    print(f"✅ Loaded {len(mapping)} KPIs from Excel")
-    return mapping
 # =========================================================
-# ✅ SMART FUZZY SEARCH (FIXES YOUR ISSUE)
 # =========================================================
-def similarity(a, b):
-    return SequenceMatcher(None, a, b).ratio()
-def lookup_excel(query, mapping):
-    query_key = normalize_loose(query)
-    best_match = None
-    best_score = 0
-    for key, value in mapping.items():
-        score = similarity(query_key, key)
-        query_tokens = set(query_key.split())
-        key_tokens = set(key.split())
-        overlap = len(query_tokens & key_tokens) / max(len(query_tokens), 1)
-        final_score = (score * 0.6) + (overlap * 0.4)
-        if final_score > best_score:
-            best_score = final_score
-            best_match = value
-    print(f"🔎 Best match score: {best_score}")
-    if best_score > 0.6:
-        return best_match
-    return None
-# =========================================================
-# PDF FALLBACK
-# =========================================================
-def load_pdf():
-    if not Path(PDF_FILE).exists():
-        return []
-    return PyPDFLoader(PDF_FILE).load()
-PDF_DOCS = load_pdf()
-def extract_from_pdf(query):
-    q = normalize_loose(query)
-    for doc in PDF_DOCS:
-        text = normalize_loose(doc.page_content)
-        if q in text:
-            return {
-                "kpi_name": query,
-                "business_logic": doc.page_content[:500],
-                "formula": "Extracted from PDF",
-                "report_sources": []
-            }
-    return None
 # =========================================================
-# FORMAT OUTPUT
 # =========================================================
-def render_badges(sources):
-    if not sources:
-        return "Not mapped"
-    return " | ".join(sources[:15])  # limit long lists
 # =========================================================
-# MAIN ANSWER FUNCTION
 # =========================================================
-def get_answer(question, excel_mapping):
-    if not question or not question.strip():
-        return "Enter KPI", "-", "-", "-"
-    # ✅ 1. Excel search
-    excel_data = lookup_excel(question, excel_mapping)
-    if excel_data:
         return (
-            f"✅ KPI: {excel_data['kpi_name']}",
-            excel_data["business_logic"] or "-",
-            excel_data["formula"] or "-",
-            render_badges(excel_data["report_sources"])
         )
-    # ✅ 2. PDF fallback
-    pdf_data = extract_from_pdf(question)
-    if pdf_data:
         return (
-            f"📄 KPI (PDF): {pdf_data['kpi_name']}",
-            pdf_data["business_logic"],
-            pdf_data["formula"],
-            "Not mapped"
         )
-    return "❌ KPI not found", "-", "-", "-"
-# =========================================================
-# LOAD EXCEL
-# =========================================================
-EXCEL_MAPPING = load_kpi_excel_mapping(DEFAULT_KPI_EXCEL)
 # =========================================================
-# UI
 # =========================================================
-with gr.Blocks() as demo:
-    gr.Markdown("# 💊 Pharma KPI Copilot (Excel Driven)")
-    question = gr.Textbox(label="Enter KPI Name")
-    btn = gr.Button("Search")
-    kpi = gr.Textbox(label="KPI")
-    logic = gr.Textbox(label="Business Logic")
-    formula = gr.Textbox(label="Formula")
-    reports = gr.Textbox(label="Reports / Usage")
-    def run(q):
-        return get_answer(q, EXCEL_MAPPING)
-    btn.click(run, inputs=[question],
-              outputs=[kpi, logic, formula, reports])
 demo.launch()

+# app.py
+# Pharma KPI Copilot
+# - Auto-loads KPI Glossary Excel from same folder as app.py
+# - Reads PDF for KPI definition / formula / notes
+# - Fixes Excel mapping so report names show instead of "Not mapped"
+# - Displays report / offering values as colored badges
+# - Installs openpyxl automatically if missing
+import os
+import re
 import sys
+import subprocess
+import importlib.util
+import unicodedata
+from pathlib import Path
+from difflib import SequenceMatcher
+def ensure_package(package_name: str):
+    if importlib.util.find_spec(package_name) is None:
+        print(f"Package '{package_name}' not found. Installing...")
+        subprocess.check_call([sys.executable, '-m', 'pip', 'install', package_name])
+        print(f"Package '{package_name}' installed successfully.")
+# Required for pandas Excel engine
+ensure_package('openpyxl')
+import gradio as gr
+import pandas as pd
 from langchain_community.document_loaders import PyPDFLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+os.environ['TOKENIZERS_PARALLELISM'] = 'false'
+SERVICENOW_INCIDENT_URL = os.getenv(
+    'SERVICENOW_INCIDENT_URL',
+    'https://sanofiservices.service-now.com/onesupport?id=sc_cat_item&sys_id=a5c743d39761b19cbb28fa871153afc3',
+)
+PDF_FILE = 'data.pdf'
+DEFAULT_KPI_EXCEL = 'CIA Consolidated KPIs_MetricsGovernance (1).xlsx'
+REPORT_FLAG_COLUMNS = [
+    'SFE', 'B360', 'OMNICHANNEL', 'C360', 'E&C', 'AC',
+    'Field Reporting', 'Content Reporting', 'Above Country', 'Country'
+]
+EXTRA_INFO_COLUMNS = [
+    'Placement in Offering', 'Calculated at:', 'Domain', 'Interaction', 'Channels', 'PowerBI Field/Measure'
+]
+MANUAL_ALIAS_MAP = {
+    # 'hcp reach in occp': 'HCPs in OCCP',
+}
 # =========================================================
+# 1) TEXT HELPERS
 # =========================================================
+def fix_pdf_text(text: str) -> str:
     if not text:
+        return ''
+    text = unicodedata.normalize('NFKC', text)
+    replacements = {
+        'ﬁ': 'fi', 'ﬂ': 'fl', '“': '"', '”': '"', '’': "'", '‘': "'", '–': '-', '—': '-', '\u00ad': '',
+    }
+    for bad, good in replacements.items():
+        text = text.replace(bad, good)
+    text = re.sub(r'(?<=\w)[θΘϑϴƟɵ](?=\w)', 'ti', text)
+    return text
+def normalize_exact(text: str) -> str:
+    text = fix_pdf_text(text or '').lower().strip()
+    return re.sub(r'\s+', ' ', text)
+def singularize_token(token: str) -> str:
+    token = token.strip().lower()
+    if len(token) > 4 and token.endswith('ies'):
+        return token[:-3] + 'y'
+    if len(token) > 3 and token.endswith('s') and not token.endswith('ss'):
+        return token[:-1]
+    return token
+def normalize_loose(text: str) -> str:
+    text = fix_pdf_text(text or '').lower().strip()
+    text = text.replace('#', ' ').replace('%', ' ')
     text = re.sub(r'[^a-z0-9]+', ' ', text)
+    text = re.sub(r'\s+', ' ', text).strip()
+    if not text:
+        return ''
+    return ' '.join(singularize_token(tok) for tok in text.split())
+def tokenize_loose(text: str):
+    loose = normalize_loose(text)
+    return loose.split() if loose else []
+STOPWORDS = {
+    'a', 'an', 'the', 'in', 'of', 'with', 'and', 'or', 'for', 'to', 'by', 'on',
+    'this', 'that', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
+    'what', 'how', 'why', 'show', 'give', 'tell', 'me', 'please', 'explain',
+    'search', 'find', 'calculated', 'computed', 'measured', 'formula', 'mean', 'important',
+}
+def significant_tokens(text: str):
+    toks = tokenize_loose(text)
+    sig = [t for t in toks if t not in STOPWORDS]
+    return sig if sig else toks
+def clean_user_query(text: str) -> str:
+    text = fix_pdf_text(text or '').strip()
+    text = re.sub(r'[?]+$', '', text).strip()
+    patterns = [
+        r'^what is\s+', r'^what s\s+', r'^show me\s+', r'^give me\s+', r'^tell me\s+',
+        r'^explain\s+', r'^find\s+', r'^search\s+for\s+', r'^how is\s+', r'^why is\s+',
+    ]
+    lowered = text.lower()
+    for pat in patterns:
+        lowered = re.sub(pat, '', lowered).strip()
+    return lowered.strip()
+def clean_formula_text(text: str) -> str:
+    text = fix_pdf_text(text or '').lower()
+    text = re.sub(r'--.*', '', text)
+    text = re.sub(r'\s+', '', text)
+    return text
+def html_escape(text: str) -> str:
+    if text is None:
+        return ''
+    return (
+        str(text)
+        .replace('&', '&amp;')
+        .replace('<', '&lt;')
+        .replace('>', '&gt;')
+        .replace('"', '&quot;')
+    )
+def nl2br(text: str) -> str:
+    return html_escape(fix_pdf_text(text)).replace('\n', '<br>')
+def is_generic_followup_question(text: str) -> bool:
+    q = normalize_exact(text)
+    generic_patterns = [
+        r'^how is this calculated', r'^how is this computed', r'^how is this measured',
+        r'^what is the formula', r'^show formula', r'^show the formula', r'^give formula',
+        r'^why is this important', r'^explain this', r'^what does this mean',
+    ]
+    return any(re.search(p, q) for p in generic_patterns)
+def extract_kpi_name_from_notes(notes_text: str) -> str:
+    if not notes_text:
+        return ''
+    m = re.search(r'\*\*KPI Name:\*\*\s*(.+)', notes_text)
+    return m.group(1).strip() if m else ''
+def resolve_alias(user_query: str):
+    cleaned = clean_user_query(user_query)
+    q = normalize_loose(cleaned)
+    if not q:
+        return user_query, None, None
+    alias_map_norm = {normalize_loose(k): v for k, v in MANUAL_ALIAS_MAP.items()}
+    if q in alias_map_norm:
+        return alias_map_norm[q], q, alias_map_norm[q]
+    return cleaned, None, None
 # =========================================================
+# 2) EXCEL LOADING AND MAPPING
 # =========================================================
+def is_truthy_excel_value(value):
+    if pd.isna(value):
+        return False
+    return str(value).strip().lower() in {'yes', 'y', 'true', '1', 'x'}
+def detect_glossary_header_row(raw_df: pd.DataFrame):
+    """Find the real KPI Glossary header row."""
+    for idx in range(min(len(raw_df), 60)):
+        row_values = [normalize_exact(str(v)).replace('/', ' ') for v in raw_df.iloc[idx].tolist()]
+        if 'metrics kpis' in row_values and 'powerbi field measure' in row_values:
+            return idx
+        joined = ' | '.join(row_values)
+        if 'metrics kpis' in joined and ('powerbi field measure' in joined or 'definitions' in joined):
+            return idx
+    return None
+def build_glossary_dataframe(excel_path: str):
+    raw = pd.read_excel(excel_path, sheet_name='KPI Glossary', header=None, engine='openpyxl')
+    header_row = detect_glossary_header_row(raw)
+    if header_row is None:
+        return None, None
+    header = [str(x).strip() for x in raw.iloc[header_row].tolist()]
+    data = raw.iloc[header_row + 1:].copy().reset_index(drop=True)
+    data.columns = header
+    data = data.dropna(how='all')
+    keep_cols = [str(c).strip() != '' and str(c).strip().lower() != 'nan' for c in data.columns]
+    data = data.loc[:, keep_cols]
+    data.columns = [str(c).strip() for c in data.columns]
+    return data, header_row
+def merge_excel_record(a: dict, b: dict):
+    if not a:
+        return b
+    if not b:
+        return a
+    merged = {
+        'kpi_name': a.get('kpi_name') or b.get('kpi_name', ''),
+        'measure_name': a.get('measure_name') or b.get('measure_name', ''),
+        'report_sources': sorted(set(a.get('report_sources', [])) | set(b.get('report_sources', []))),
+        'extra_info': {},
+        'row_ids': sorted(set(a.get('row_ids', [])) | set(b.get('row_ids', []))),
+    }
+    for col in EXTRA_INFO_COLUMNS:
+        vals = []
+        for rec in (a, b):
+            val = rec.get('extra_info', {}).get(col)
+            if val and val not in vals:
+                vals.append(val)
+        if vals:
+            merged['extra_info'][col] = ' | '.join(vals)
+    return merged
+def add_record_to_mapping(mapping: dict, key: str, record: dict):
+    if not key:
+        return
+    mapping[key] = merge_excel_record(mapping.get(key), record) if key in mapping else record
+def load_kpi_excel_mapping(excel_path: str):
+    if not excel_path or not Path(excel_path).exists():
+        print(f'Excel not found: {excel_path}')
+        return {}
+    try:
+        df, header_row = build_glossary_dataframe(excel_path)
+    except Exception as e:
+        print(f'Could not read KPI Glossary sheet: {e}')
+        return {}
+    if df is None or df.empty:
+        print('Could not detect KPI Glossary header row or data is empty.')
+        return {}
+    print(f'KPI Glossary header row detected at: {header_row}')
+    print(f'KPI Glossary columns detected: {list(df.columns)[:20]}')
+    kpi_col = 'Metrics/KPIs' if 'Metrics/KPIs' in df.columns else None
+    measure_col = 'PowerBI Field/Measure' if 'PowerBI Field/Measure' in df.columns else None
+    if not kpi_col and not measure_col:
+        print('Metrics/KPIs and PowerBI Field/Measure columns not found.')
+        return {}
+    mapping = {}
+    for idx, row in df.iterrows():
+        kpi_name = str(row.get(kpi_col, '')).strip() if kpi_col else ''
+        measure_name = str(row.get(measure_col, '')).strip() if measure_col else ''
+        if not kpi_name and not measure_name:
+            continue
+        report_sources = [col for col in REPORT_FLAG_COLUMNS if col in df.columns and is_truthy_excel_value(row.get(col))]
+        extra_info = {}
+        for col in EXTRA_INFO_COLUMNS:
+            if col in df.columns:
+                val = row.get(col)
+                if pd.notna(val) and str(val).strip():
+                    extra_info[col] = str(val).strip()
+        record = {
+            'kpi_name': kpi_name,
+            'measure_name': measure_name,
+            'report_sources': sorted(set(report_sources)),
+            'extra_info': extra_info,
+            'row_ids': [int(idx)],
+        }
+        if kpi_name:
+            add_record_to_mapping(mapping, normalize_loose(kpi_name), record)
+        if measure_name:
+            add_record_to_mapping(mapping, normalize_loose(measure_name), record)
+    print(f'Final mapped KPI keys: {len(mapping)}')
+    return mapping
+def excel_candidate_keys(*texts):
+    keys = []
+    for t in texts:
+        if not t:
+            continue
+        k = normalize_loose(t)
+        if k and k not in keys:
+            keys.append(k)
+    return keys
+def excel_token_coverage_score(query_key: str, candidate_key: str):
+    q_tokens = significant_tokens(query_key)
+    c_tokens = significant_tokens(candidate_key)
+    if not q_tokens or not c_tokens:
+        return 0.0, 0
+    q_set, c_set = set(q_tokens), set(c_tokens)
+    overlap = q_set & c_set
+    return len(overlap) / max(len(q_set), 1), len(overlap)
+def lookup_kpi_excel_info(kpi_name: str, measure_name: str, excel_mapping: dict, query_text: str = None):
+    if not excel_mapping:
+        return None
+    keys = excel_candidate_keys(query_text, kpi_name, measure_name)
+    result = None
+    # exact lookup
+    for key in keys:
+        if key in excel_mapping:
+            result = merge_excel_record(result, excel_mapping[key]) if result else excel_mapping[key]
+    if result:
+        return result
+    # fuzzy fallback
+    best_key = None
+    best_ratio = 0.0
+    for q in keys:
+        for cand in excel_mapping.keys():
+            coverage, overlap = excel_token_coverage_score(q, cand)
+            ratio = SequenceMatcher(None, q, cand).ratio()
+            if coverage >= 1.0 or ratio >= 0.84 or (overlap >= 2 and ratio >= 0.70):
+                if ratio > best_ratio:
+                    best_ratio = ratio
+                    best_key = cand
+    return excel_mapping.get(best_key) if best_key else None
+def load_default_excel_if_present():
+    return load_kpi_excel_mapping(DEFAULT_KPI_EXCEL) if Path(DEFAULT_KPI_EXCEL).exists() else {}
+# =========================================================
+# 3) PDF LOAD / PARSE
+# =========================================================
+loader = PyPDFLoader(PDF_FILE)
+page_docs = loader.load()
+for d in page_docs:
+    d.page_content = fix_pdf_text(d.page_content)
+splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=220)
+chunk_docs = splitter.split_documents(page_docs)
+def normalize_lines(text: str):
+    return [line.strip() for line in fix_pdf_text(text).splitlines() if line.strip()]
+def is_metadata_line(line: str) -> bool:
+    line = normalize_loose(line)
+    patterns = [
+        r'^name$', r'^kpi id', r'^measure name', r'^description$', r'^definition$',
+        r'^business meaning$', r'^category$', r'^owner$', r'^source$', r'^dashboard$', r'^glossary$',
+    ]
+    return any(re.search(p, line) for p in patterns)
+def looks_like_formula_start(line: str) -> bool:
+    line = fix_pdf_text(line)
+    low = line.lower().strip()
+    formula_starts = [
+        'calculate(', 'sum(', 'count(', 'distinctcount(', 'divide(', 'if(', 'filter(',
+        'removefilters(', 'all(', 'average(', 'var ', 'return', 'switch(', 'countrows(',
+        'summarize(', 'lookupvalue(', 'selectedvalue(',
+    ]
+    if any(fs in low for fs in formula_starts):
+        return True
+    if '[' in line and ']' in line:
+        return True
+    if '=' in line:
+        return True
+    return False
+def extract_named_field(lines, labels):
+    wanted = [normalize_loose(x) for x in labels]
+    for i, line in enumerate(lines):
+        if normalize_loose(line) in wanted and i + 1 < len(lines):
+            return fix_pdf_text(lines[i + 1].strip())
+    return ''
+def extract_label_block(lines, labels):
+    wanted = [normalize_loose(x) for x in labels]
+    start_idx = None
+    for i, line in enumerate(lines):
+        if normalize_loose(line) in wanted:
+            start_idx = i + 1
+            break
+    if start_idx is None:
+        return ''
+    collected = []
+    for j in range(start_idx, len(lines)):
+        current = fix_pdf_text(lines[j].strip())
+        if is_metadata_line(current) and normalize_loose(current) not in wanted:
+            break
+        collected.append(current)
+    return ' '.join(collected).strip()
+def extract_formula(lines):
+    formula_lines = []
+    in_formula = False
+    paren_balance = 0
+    for i, line in enumerate(lines):
+        line = fix_pdf_text(line.strip())
+        if not in_formula and looks_like_formula_start(line):
+            in_formula = True
+            formula_lines.append(line)
+            paren_balance += line.count('(') - line.count(')')
+            continue
+        if in_formula:
+            if is_metadata_line(line) and paren_balance <= 0:
+                break
+            formula_lines.append(line)
+            paren_balance += line.count('(') - line.count(')')
+            if paren_balance <= 0:
+                next_line = fix_pdf_text(lines[i + 1].strip()) if i + 1 < len(lines) else ''
+                if next_line and is_metadata_line(next_line):
+                    break
+    return '\n'.join(formula_lines).strip()
+def remove_formula_lines(lines, formula_text):
+    if not formula_text:
+        return lines
+    formula_lines = {fix_pdf_text(x.strip()) for x in formula_text.splitlines() if x.strip()}
+    return [x for x in lines if fix_pdf_text(x.strip()) not in formula_lines]
+def build_business_meaning(audience, kpi_name, measure_name):
+    base_name = fix_pdf_text(measure_name or kpi_name or 'This KPI')
+    if audience == 'Leadership':
+        return f"{base_name} helps leadership monitor performance and coverage trends for decision-making."
+    if audience == 'Analytics User':
+        return f"{base_name} is used in reporting and should be interpreted with source logic, filters, and exclusions."
+    return f"{base_name} helps business users understand what is being tracked and why it matters."
+def parse_doc_entry(doc, audience, match_info=None, forced_kpi_name=None, excel_mapping=None, query_text=None):
+    context = fix_pdf_text(doc.page_content)
+    lines = normalize_lines(context)
+    formula = extract_formula(lines)
+    non_formula_lines = remove_formula_lines(lines, formula)
+    kpi_name = extract_named_field(non_formula_lines, ['Name'])
+    kpi_id = extract_named_field(non_formula_lines, ['KPI ID from KPI Glossary', 'KPI ID'])
+    measure_name = extract_named_field(non_formula_lines, ['Measure name in the PBI', 'Measure Name'])
+    if forced_kpi_name and (not kpi_name or normalize_loose(kpi_name) == 'not found'):
+        kpi_name = forced_kpi_name
+    definition = extract_label_block(non_formula_lines, ['Description', 'Definition'])
+    if not definition:
+        heur = []
+        for line in non_formula_lines:
+            low = line.lower()
+            if any(x in low for x in ['number of', 'count of', 'unique', '%', 'percent', 'rate of', 'ratio of', 'calculated as']):
+                heur.append(fix_pdf_text(line))
+        definition = ' '.join(heur[:3]).strip() or 'Definition not found clearly in the source extract.'
+    if not formula:
+        formula = 'Formula not found in source extract.'
+    excel_info = lookup_kpi_excel_info(kpi_name, measure_name, excel_mapping or {}, query_text=query_text)
+    report_sources = excel_info.get('report_sources', []) if excel_info else []
+    extra_excel_info = excel_info.get('extra_info', {}) if excel_info else {}
+    matched_rows = excel_info.get('row_ids', []) if excel_info else []
+    notes = []
+    if kpi_name:
+        notes.append(f"**KPI Name:** {fix_pdf_text(kpi_name)}")
+    # if kpi_id:
+        notes.append(f"**KPI ID:** {fix_pdf_text(kpi_id)}")
+    if measure_name:
+        notes.append(f"**Power BI Measure:** {fix_pdf_text(measure_name)}")
+    if report_sources:
+        notes.append(f"**Report / Offering Presence (Yes columns):** {', '.join(report_sources)}")
+    # if matched_rows:
+        notes.append(f"**Matched Excel Row Count:** {len(matched_rows)}")
+    # if extra_excel_info.get('Placement in Offering'):
+        notes.append(f"**Placement in Offering:** {extra_excel_info['Placement in Offering']}")
+    # if extra_excel_info.get('Calculated at:'):
+        notes.append(f"**Calculated at:** {extra_excel_info['Calculated at:']}")
+    # if extra_excel_info.get('Domain'):
+        notes.append(f"**Domain:** {extra_excel_info['Domain']}")
+    # if extra_excel_info.get('Interaction'):
+        notes.append(f"**Interaction:** {extra_excel_info['Interaction']}")
+    # if extra_excel_info.get('Channels'):
+        notes.append(f"**Channels:** {extra_excel_info['Channels']}")
+    # if doc.metadata.get('page') is not None:
+        notes.append(f"**Page:** {doc.metadata['page'] + 1}")
+    # if match_info:
+        notes.append(f"**Primary Search Match:** {match_info}")
+    return {
+        'doc': doc,
+        'page': doc.metadata.get('page'),
+        'context': context,
+        'kpi_name': fix_pdf_text(kpi_name) or 'Not found',
+        'kpi_id': fix_pdf_text(kpi_id) or 'Not found',
+        'measure_name': fix_pdf_text(measure_name) or 'Not found',
+        'definition': fix_pdf_text(definition),
+        'business': build_business_meaning(audience, kpi_name, measure_name),
+        'formula': fix_pdf_text(formula),
+        'notes': '\n\n'.join(notes) if notes else 'No additional notes found.',
+        'report_sources': report_sources,
+        'excel_info': extra_excel_info,
+    }
+PARSED_CHUNKS = [parse_doc_entry(doc, 'Business User') for doc in chunk_docs]
+def entry_key(entry):
+    return (
+        normalize_exact(entry['kpi_name']),
+        normalize_exact(entry['measure_name']),
+        normalize_exact(entry['context'][:300]),
+    )
+def build_indices(entries):
+    kpi_exact_index, measure_exact_index, kpi_loose_index, measure_loose_index = {}, {}, {}, {}
+    seen = set()
+    for entry in entries:
+        key = entry_key(entry)
+        if key in seen:
+            continue
+        seen.add(key)
+        nk_exact = normalize_exact(entry['kpi_name'])
+        nm_exact = normalize_exact(entry['measure_name'])
+        nk_loose = normalize_loose(entry['kpi_name'])
+        nm_loose = normalize_loose(entry['measure_name'])
+        if nk_exact and nk_exact != 'not found':
+            kpi_exact_index.setdefault(nk_exact, []).append(entry)
+        if nm_exact and nm_exact != 'not found':
+            measure_exact_index.setdefault(nm_exact, []).append(entry)
+        if nk_loose and nk_loose != 'not found':
+            kpi_loose_index.setdefault(nk_loose, []).append(entry)
+        if nm_loose and nm_loose != 'not found':
+            measure_loose_index.setdefault(nm_loose, []).append(entry)
+    return kpi_exact_index, measure_exact_index, kpi_loose_index, measure_loose_index
+EXACT_KPI_INDEX, EXACT_MEASURE_INDEX, LOOSE_KPI_INDEX, LOOSE_MEASURE_INDEX = build_indices(PARSED_CHUNKS)
+ALL_LOOSE_KPI_NAMES = sorted(LOOSE_KPI_INDEX.keys())
+ALL_LOOSE_MEASURE_NAMES = sorted(LOOSE_MEASURE_INDEX.keys())
+def token_overlap_score(query_text: str, candidate_text: str):
+    q_tokens = significant_tokens(query_text)
+    c_tokens = significant_tokens(candidate_text)
+    if not q_tokens or not c_tokens:
+        return 0.0, 0, 0
+    q_set, c_set = set(q_tokens), set(c_tokens)
+    overlap = q_set & c_set
+    coverage = len(overlap) / max(len(q_set), 1)
+    return coverage, len(overlap), len(c_set)
+def find_best_exact_like_name(query_text: str):
+    q_exact = normalize_exact(query_text)
+    q_loose = normalize_loose(query_text)
+    if not q_loose:
+        return None, None
+    if q_exact in EXACT_KPI_INDEX:
+        return 'kpi_exact', q_exact
+    if q_exact in EXACT_MEASURE_INDEX:
+        return 'measure_exact', q_exact
+    if q_loose in LOOSE_KPI_INDEX:
+        return 'kpi_loose', q_loose
+    if q_loose in LOOSE_MEASURE_INDEX:
+        return 'measure_loose', q_loose
+    best, best_score = None, -1.0
+    for name in ALL_LOOSE_KPI_NAMES:
+        coverage, overlap_count, candidate_size = token_overlap_score(q_loose, name)
+        if coverage == 1.0 and overlap_count >= 2:
+            score = overlap_count * 10 - max(candidate_size - overlap_count, 0)
+            if score > best_score:
+                best_score, best = score, ('kpi_loose', name)
+    for name in ALL_LOOSE_MEASURE_NAMES:
+        coverage, overlap_count, candidate_size = token_overlap_score(q_loose, name)
+        if coverage == 1.0 and overlap_count >= 2:
+            score = overlap_count * 10 - max(candidate_size - overlap_count, 0)
+            if score > best_score:
+                best_score, best = score, ('measure_loose', name)
+    return best if best else (None, None)
+def doc_contains_exact_text(doc, search_text: str) -> bool:
+    return normalize_loose(search_text) in normalize_loose(doc.page_content)
 # =========================================================
+# 4) SEARCH
 # =========================================================
+def choose_primary_entry(query: str, audience: str, excel_mapping=None):
+    cleaned_query = clean_user_query(query)
+    if not cleaned_query:
+        return None, None
+    resolved_query, _, canonical_term = resolve_alias(query)
+    effective_query = canonical_term if canonical_term else resolved_query
+    match_type, canonical_name = find_best_exact_like_name(effective_query)
+    if match_type == 'kpi_exact':
+        chosen = EXACT_KPI_INDEX[canonical_name][0]
+        return parse_doc_entry(chosen['doc'], audience, match_info='Exact KPI name match', excel_mapping=excel_mapping, query_text=effective_query), 100.0
+    if match_type == 'measure_exact':
+        chosen = EXACT_MEASURE_INDEX[canonical_name][0]
+        return parse_doc_entry(chosen['doc'], audience, match_info='Exact PBI measure match', excel_mapping=excel_mapping, query_text=effective_query), 95.0
+    if match_type == 'kpi_loose':
+        chosen = LOOSE_KPI_INDEX[canonical_name][0]
+        return parse_doc_entry(chosen['doc'], audience, match_info='Normalized KPI name match', excel_mapping=excel_mapping, query_text=effective_query), 90.0
+    if match_type == 'measure_loose':
+        chosen = LOOSE_MEASURE_INDEX[canonical_name][0]
+        return parse_doc_entry(chosen['doc'], audience, match_info='Normalized PBI measure match', excel_mapping=excel_mapping, query_text=effective_query), 88.0
+    raw_chunk_hits = [doc for doc in chunk_docs if doc_contains_exact_text(doc, effective_query)]
+    if raw_chunk_hits:
+        chosen_doc = raw_chunk_hits[0]
+        return parse_doc_entry(chosen_doc, audience, match_info='Exact raw text found in PDF chunk', forced_kpi_name=effective_query, excel_mapping=excel_mapping, query_text=effective_query), 75.0
+    raw_page_hits = [doc for doc in page_docs if doc_contains_exact_text(doc, effective_query)]
+    if raw_page_hits:
+        chosen_doc = raw_page_hits[0]
+        return parse_doc_entry(chosen_doc, audience, match_info='Exact raw text found in PDF page', forced_kpi_name=effective_query, excel_mapping=excel_mapping, query_text=effective_query), 70.0
+    return None, None
+def find_second_same_occurrence(primary_entry, audience: str, excel_mapping=None):
+    target_name_loose = normalize_loose(primary_entry['kpi_name'])
+    if not target_name_loose or target_name_loose == 'not found':
+        return None
+    primary_context = normalize_exact(primary_entry['context'][:400])
+    if target_name_loose in LOOSE_KPI_INDEX:
+        candidates = [e for e in LOOSE_KPI_INDEX[target_name_loose] if normalize_exact(e['context'][:400]) != primary_context]
+        if candidates:
+            candidates.sort(key=lambda e: (e['page'] if e['page'] is not None else 99999))
+            return parse_doc_entry(candidates[0]['doc'], audience, excel_mapping=excel_mapping, query_text=primary_entry['kpi_name'])
+    for doc in chunk_docs:
+        if target_name_loose in normalize_loose(doc.page_content) and normalize_exact(doc.page_content[:400]) != primary_context:
+            return parse_doc_entry(doc, audience, forced_kpi_name=primary_entry['kpi_name'], excel_mapping=excel_mapping, query_text=primary_entry['kpi_name'])
+    for doc in page_docs:
+        if target_name_loose in normalize_loose(doc.page_content) and normalize_exact(doc.page_content[:400]) != primary_context:
+            return parse_doc_entry(doc, audience, forced_kpi_name=primary_entry['kpi_name'], excel_mapping=excel_mapping, query_text=primary_entry['kpi_name'])
+    return None
 # =========================================================
+# 5) UI HELPERS
 # =========================================================
+def compare_same(value1, value2, formula=False):
+    return clean_formula_text(value1) == clean_formula_text(value2) if formula else normalize_loose(value1) == normalize_loose(value2)
+def render_badges(sources):
+    if not sources:
+        return "<span class='pill neutral'>Not mapped</span>"
+    colors = ['info', 'success', 'warning', 'neutral']
+    pills = []
+    for i, src in enumerate(sources):
+        color = colors[i % len(colors)]
+        pills.append(f"<span class='pill {color}'>{html_escape(src)}</span>")
+    return ' '.join(pills)
+def field_diff_html(left_text, right_text, formula=False):
+    left_text = fix_pdf_text(left_text or '')
+    right_text = fix_pdf_text(right_text or '')
+    if compare_same(left_text, right_text, formula=formula):
+        return "<div class='diff-box same'>No difference. Both occurrences match for this field.</div>"
+    left_lines = [ln for ln in left_text.splitlines() if ln.strip()] or ['Not found']
+    right_lines = [ln for ln in right_text.splitlines() if ln.strip()] or ['Not found']
+    removed = [x for x in left_lines if x not in right_lines]
+    added = [x for x in right_lines if x not in left_lines]
+    removed_html = ''.join(f"<li>{html_escape(line)}</li>" for line in removed[:12]) or '<li>No unique lines found.</li>'
+    added_html = ''.join(f"<li>{html_escape(line)}</li>" for line in added[:12]) or '<li>No unique lines found.</li>'
+    return f"""
+    <div class='diff-box different'>
+        <div class='diff-title'>What differs</div>
+        <div class='diff-grid'>
+            <div class='diff-col'><div class='diff-col-title'>Only in Occurrence 1</div><ul>{removed_html}</ul></div>
+            <div class='diff-col'><div class='diff-col-title'>Only in Occurrence 2</div><ul>{added_html}</ul></div>
+        </div>
+    </div>
+    """
+def build_summary_cards(entry1, entry2=None, retrieval_score=None):
+    def badge(text, kind='default'):
+        return f"<span class='pill {kind}'>{html_escape(text)}</span>"
+    page1 = f"Page {entry1['page'] + 1}" if entry1 and entry1['page'] is not None else 'Page not found'
+    report_badges = render_badges(entry1.get('report_sources', []))
+    cards = [
+        f"<div class='summary-card'><div class='summary-label'>KPI Name</div><div class='summary-value'>{html_escape(entry1['kpi_name'])}</div><div class='summary-sub'>{badge(page1, 'info')}</div></div>",
+        f"<div class='summary-card'><div class='summary-label'>KPI ID</div><div class='summary-value'>{html_escape(entry1['kpi_id'])}</div><div class='summary-sub'>{badge('Glossary reference', 'neutral')}</div></div>",
+        f"<div class='summary-card'><div class='summary-label'>PBI Measure</div><div class='summary-value'>{html_escape(entry1['measure_name'])}</div><div class='summary-sub'>{badge('Primary result', 'success')}</div></div>",
+        f"<div class='summary-card'><div class='summary-label'>Report / Offering</div><div class='summary-value badge-wrap'>{report_badges}</div><div class='summary-sub'>{badge('Yes columns from Excel', 'neutral')}</div></div>",
+    ]
+    compare_hint = 'One occurrence found'
+    compare_kind = 'neutral'
+    if entry2:
+        same_all = (
+            compare_same(entry1['kpi_name'], entry2['kpi_name']) and
+            compare_same(entry1['kpi_id'], entry2['kpi_id']) and
+            compare_same(entry1['measure_name'], entry2['measure_name']) and
+            compare_same(entry1['definition'], entry2['definition']) and
+            compare_same(entry1['formula'], entry2['formula'], formula=True)
+        )
+        compare_hint = 'Exact name match found' if same_all else 'Exact name match found (differences detected)'
+        compare_kind = 'success' if same_all else 'warning'
+    checked_text = '2 exact-name matches checked' if entry2 else 'No second exact-name match'
+    if retrieval_score is not None:
+        checked_text = f"search score {retrieval_score:.1f}"
+    cards.append(
+        f"<div class='summary-card'><div class='summary-label'>Comparison Status</div><div class='summary-value'>{html_escape(compare_hint)}</div><div class='summary-sub'>{badge(checked_text, compare_kind)}</div></div>"
+    )
+    return "<div class='summary-grid'>" + ''.join(cards) + "</div>"
+def build_side_by_side_comparison(entry1, entry2):
+    if not entry1 and not entry2:
+        return "<div class='empty-state'>No relevant KPI entry found.</div>"
+    if entry1 and not entry2:
+        page_text = f"Page {entry1['page'] + 1}" if entry1['page'] is not None else 'Unknown page'
+        kpi_text = html_escape(entry1['kpi_name'])
+        return f"<div class='compare-wrap single'><div class='compare-banner neutral'>Primary result shown for <b>{kpi_text}</b> ({html_escape(page_text)}). No second occurrence with the <b>exact same KPI name</b> was found.</div></div>"
+    same_all = (
+        compare_same(entry1['kpi_name'], entry2['kpi_name']) and
+        compare_same(entry1['kpi_id'], entry2['kpi_id']) and
+        compare_same(entry1['measure_name'], entry2['measure_name']) and
+        compare_same(entry1['definition'], entry2['definition']) and
+        compare_same(entry1['formula'], entry2['formula'], formula=True)
+    )
+    overall_class = 'success' if same_all else 'warning'
+    overall_text = 'Exact same KPI name found in two places' if same_all else 'Exact same KPI name found in two places, but details differ'
+    page1 = f"Page {entry1['page'] + 1}" if entry1['page'] is not None else 'Unknown'
+    page2 = f"Page {entry2['page'] + 1}" if entry2['page'] is not None else 'Unknown'
+    rows = []
+    fields = [
+        ('KPI Name', entry1['kpi_name'], entry2['kpi_name'], False),
+        ('KPI ID', entry1['kpi_id'], entry2['kpi_id'], False),
+        ('Power BI Measure', entry1['measure_name'], entry2['measure_name'], False),
+        ('Definition', entry1['definition'], entry2['definition'], False),
+        ('Formula', entry1['formula'], entry2['formula'], True),
+    ]
+    for label, left_val, right_val, is_formula in fields:
+        left_val, right_val = fix_pdf_text(left_val or 'Not found'), fix_pdf_text(right_val or 'Not found')
+        status = 'same' if compare_same(left_val, right_val, formula=is_formula) else 'different'
+        diff_panel = field_diff_html(left_val, right_val, formula=is_formula)
+        code_class = 'code-block' if is_formula else ''
+        rows.append(f"""
+            <div class='compare-row {status}'>
+                <div class='compare-field'><div class='field-name'>{html_escape(label)}</div><div class='field-status {status}'>{'SAME' if status == 'same' else 'DIFFERENT'}</div></div>
+                <div class='compare-cell'><div class='cell-title'>Occurrence 1</div><div class='cell-content {code_class}'>{nl2br(left_val)}</div></div>
+                <div class='compare-cell'><div class='cell-title'>Occurrence 2</div><div class='cell-content {code_class}'>{nl2br(right_val)}</div></div>
+            </div>
+            <div class='diff-row'>{diff_panel}</div>
+        """)
+    return f"""
+    <div class='compare-wrap'>
+        <div class='compare-banner {overall_class}'>{html_escape(overall_text)}</div>
+        <div class='compare-head'>
+            <div class='head-card'><div class='head-label'>Occurrence 1</div><div class='head-page'>{html_escape(page1)}</div><div class='head-name'>{html_escape(entry1['kpi_name'])}</div></div>
+            <div class='head-card'><div class='head-label'>Occurrence 2</div><div class='head-page'>{html_escape(page2)}</div><div class='head-name'>{html_escape(entry2['kpi_name'])}</div></div>
+        </div>
+        <div class='compare-table'>{''.join(rows)}</div>
+    </div>
+    """
+# =========================================================
+# 6) FEEDBACK FLOW
+# =========================================================
+def run_search_and_prepare_feedback(question, audience, excel_mapping):
+    results = get_answer(question, audience, excel_mapping=excel_mapping)
+    current_kpi_name = ''
+    if isinstance(results, tuple) and len(results) >= 5:
+        current_kpi_name = extract_kpi_name_from_notes(results[4] or '')
+    return results + (
+        current_kpi_name,
+        gr.update(visible=True), gr.update(value=None, visible=True),
+        gr.update(visible=False), gr.update(value=None), gr.update(value='', visible=False),
+        gr.update(visible=False), gr.update(value=''), gr.update(visible=False), gr.update(value=None),
+        gr.update(value='', visible=False), gr.update(value='', visible=False),
+    )
+def clear_feedback_only():
+    return (
+        gr.update(visible=False), gr.update(value=None, visible=False),
+        gr.update(visible=False), gr.update(value=None), gr.update(value='', visible=False),
+        gr.update(visible=False), gr.update(value=''), gr.update(visible=False), gr.update(value=None),
+        gr.update(value='', visible=False), gr.update(value='', visible=False),
+    )
+def on_satisfaction_change(choice):
+    if choice == 'Yes':
         return (
+            gr.update(visible=True), gr.update(visible=False), gr.update(visible=False),
+            gr.update(value='', visible=False), gr.update(value='Please rate the definition from 1 to 5.', visible=True),
         )
+    if choice == 'No':
+        return (
+            gr.update(visible=False), gr.update(visible=True), gr.update(visible=False),
+            gr.update(value='', visible=False), gr.update(value='Please ask more so the app can try again.', visible=True),
+        )
+    return (
+        gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
+        gr.update(value='', visible=False), gr.update(value='', visible=False),
+    )
+def submit_rating(rating):
+    if rating is None:
+        return gr.update(value='Please select a rating from 1 to 5.', visible=True)
+    return gr.update(value=f"Thanks for the feedback. You rated the definition **{rating}/5**.", visible=True)
+def run_followup_search(followup_question, audience, current_kpi_name, excel_mapping):
+    if not followup_question or not followup_question.strip():
         return (
+            gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
+            gr.update(value=current_kpi_name), gr.update(visible=True), gr.update(value='No', visible=True),
+            gr.update(visible=False), gr.update(value=None), gr.update(value='', visible=False),
+            gr.update(visible=True), gr.update(value=''), gr.update(visible=True), gr.update(value=None),
+            gr.update(value='Please type a follow-up question before submitting.', visible=True), gr.update(value='', visible=False),
         )
+    effective_followup = current_kpi_name if current_kpi_name and is_generic_followup_question(followup_question) else followup_question
+    used_context = effective_followup != followup_question
+    results = get_answer(effective_followup, audience, excel_mapping=excel_mapping)
+    new_current_kpi = current_kpi_name or ''
+    if isinstance(results, tuple) and len(results) >= 5:
+        extracted = extract_kpi_name_from_notes(results[4] or '')
+        if extracted:
+            new_current_kpi = extracted
+    helper_message = 'If you are still not satisfied, choose below to raise an incident.'
+    if used_context and current_kpi_name:
+        helper_message = f"Used KPI context from the previous result: **{current_kpi_name}**. If you are still not satisfied, choose below to raise an incident."
+    return results + (
+        new_current_kpi, gr.update(visible=True), gr.update(value='No', visible=True),
+        gr.update(visible=False), gr.update(value=None), gr.update(value='', visible=False),
+        gr.update(visible=True), gr.update(value=followup_question), gr.update(visible=True), gr.update(value=None),
+        gr.update(value=helper_message, visible=True), gr.update(value='', visible=False),
+    )
+def on_still_not_satisfied_change(choice):
+    if choice == 'Yes':
+        html = f"<div class='incident-box'><div class='incident-title'>Still not satisfied?</div><div class='incident-text'>You can raise an incident in ServiceNow for further help.</div><a class='incident-link' href='{html_escape(SERVICENOW_INCIDENT_URL)}' target='_blank' rel='noopener noreferrer'>Raise Incident in ServiceNow</a></div>"
+        return gr.update(value=html, visible=True), gr.update(value='You selected to raise an incident for further support.', visible=True)
+    if choice == 'No':
+        return gr.update(value='', visible=False), gr.update(value='Glad the follow-up helped.', visible=True)
+    return gr.update(value='', visible=False), gr.update(value='', visible=False)
 # =========================================================
+# 7) MAIN ANSWER
 # =========================================================
+def get_answer(question, audience, excel_mapping=None):
+    if not question or not question.strip():
+        return ('<div class="empty-state">Ask a KPI question to see the summary cards.</div>', 'Please enter a KPI question.', '', '', '', '<div class="empty-state">No comparison available.</div>')
+    primary_entry, best_score = choose_primary_entry(question, audience, excel_mapping=excel_mapping)
+    if primary_entry is None:
+        workbook_note = DEFAULT_KPI_EXCEL if Path(DEFAULT_KPI_EXCEL).exists() else f"{DEFAULT_KPI_EXCEL} not found next to the app file"
+        return (
+            '<div class="empty-state">No KPI found. The app auto-loads the KPI Glossary Excel and should print the Yes columns for the matching KPI row, but this KPI could not be matched safely.</div>',
+            'No KPI found for the searched text.', '', '',
+            f"**Search Tried:** `{fix_pdf_text(clean_user_query(question))}`\n\n**Excel Auto-load:** {workbook_note}\n\nIf the KPI text is present visually in the PDF but still not found, the PDF extraction may be breaking the text across lines/chunks.",
+            '<div class="empty-state">No comparison available because the primary KPI was not found.</div>',
+        )
+    second_entry = find_second_same_occurrence(primary_entry, audience, excel_mapping=excel_mapping)
+    summary_html = build_summary_cards(primary_entry, second_entry, retrieval_score=best_score)
+    comparison_html = build_side_by_side_comparison(primary_entry, second_entry)
+    return summary_html, primary_entry['definition'], primary_entry['business'], primary_entry['formula'], primary_entry['notes'], comparison_html
+def clear_all(default_mapping):
+    return (
+        '', 'Business User', '<div class="empty-state">Ask a KPI question to see the summary cards.</div>',
+        '', '', '', '', '<div class="empty-state">Comparison results will appear here.</div>',
+        default_mapping, '', *clear_feedback_only(),
+    )
+# =========================================================
+# 8) UI
+# =========================================================
+CUSTOM_CSS = """
+<style>
+:root {
+  --bg1: #f6f8ff; --bg2: #fafdff; --bg3: #eef4ff; --card: rgba(255,255,255,0.82);
+  --card-strong: rgba(255,255,255,0.94); --stroke: rgba(99, 102, 241, 0.14); --text: #14213d;
+  --muted: #667085; --primary: #5b5bd6; --primary-2: #7c4dff; --success-bg: #ecfdf3;
+  --success-text: #067647; --warning-bg: #fff7ed; --warning-text: #c2410c; --neutral-bg: #f8fafc;
+  --neutral-text: #475467; --shadow: 0 18px 40px rgba(34, 55, 110, 0.10);
+}
+body, .gradio-container { background: linear-gradient(135deg, var(--bg1) 0%, var(--bg2) 45%, var(--bg3) 100%) !important; }
+.gradio-container { max-width: 1500px !important; padding-top: 18px !important; }
+.hero { background: linear-gradient(135deg, rgba(91,91,214,0.14), rgba(124,77,255,0.08), rgba(59,130,246,0.06)); border: 1px solid rgba(124,77,255,0.14); box-shadow: var(--shadow); border-radius: 26px; padding: 26px 30px; margin-bottom: 18px; backdrop-filter: blur(10px); }
+.hero-title { font-size: 34px; font-weight: 800; color: var(--text); margin: 0 0 8px 0; }
+.hero-subtitle { font-size: 15px; color: var(--muted); margin: 0; line-height: 1.65; }
+.panel { background: var(--card) !important; border: 1px solid var(--stroke) !important; border-radius: 22px !important; box-shadow: var(--shadow) !important; padding: 16px !important; backdrop-filter: blur(12px); }
+textarea, input, .gr-textbox, .gr-dropdown, .gr-radio { border-radius: 16px !important; }
+button.primary, button[class*='primary'] { background: linear-gradient(135deg, var(--primary), var(--primary-2)) !important; border: none !important; color: white !important; border-radius: 16px !important; box-shadow: 0 10px 22px rgba(91,91,214,0.22) !important; }
+button.secondary { border-radius: 16px !important; }
+button[role='tab'][aria-selected='true'] { color: var(--primary) !important; border-bottom: 3px solid var(--primary) !important; }
+.kpi-note { background: rgba(255,255,255,0.68); border: 1px dashed rgba(91,91,214,0.18); border-radius: 16px; padding: 12px 14px; color: var(--muted); font-size: 13px; margin-top: 8px; }
+.summary-grid { display: grid; grid-template-columns: repeat(5, minmax(0, 1fr)); gap: 14px; margin-bottom: 16px; }
+.summary-card { background: linear-gradient(180deg, var(--card-strong), rgba(255,255,255,0.72)); border: 1px solid rgba(91,91,214,0.12); border-radius: 20px; padding: 16px; box-shadow: 0 12px 28px rgba(56,72,122,0.08); min-height: 122px; }
+.summary-label { color: var(--muted); font-size: 12px; font-weight: 700; letter-spacing: .04em; text-transform: uppercase; margin-bottom: 10px; }
+.summary-value { color: var(--text); font-size: 20px; font-weight: 800; line-height: 1.25; word-break: break-word; }
+.summary-sub { margin-top: 14px; }
+.badge-wrap { display:flex; flex-wrap:wrap; gap:8px; align-items:flex-start; }
+.pill { display:inline-flex; align-items:center; gap:6px; padding:7px 11px; border-radius:999px; font-size:12px; font-weight:700; }
+.pill.info { background: rgba(59,130,246,0.12); color:#1d4ed8; }
+.pill.success { background: rgba(16,185,129,0.14); color:#047857; }
+.pill.warning { background: rgba(245,158,11,0.16); color:#b45309; }
+.pill.neutral { background: rgba(100,116,139,0.12); color:#475467; }
+.compare-wrap { display:flex; flex-direction:column; gap:14px; }
+.compare-banner { padding:14px 16px; border-radius:16px; font-weight:800; font-size:14px; border:1px solid transparent; }
+.compare-banner.success { background: var(--success-bg); color: var(--success-text); }
+.compare-banner.warning { background: var(--warning-bg); color: var(--warning-text); }
+.compare-banner.neutral { background: var(--neutral-bg); color: var(--neutral-text); }
+.compare-head { display:grid; grid-template-columns: repeat(2, minmax(0,1fr)); gap:14px; }
+.head-card { background: rgba(255,255,255,0.82); border:1px solid rgba(99,102,241,0.12); border-radius:18px; padding:16px; }
+.head-label { color: var(--muted); font-size:12px; font-weight:700; text-transform:uppercase; letter-spacing:.04em; }
+.head-page { color: var(--primary); font-size:13px; font-weight:700; margin-top:6px; }
+.head-name { color: var(--text); font-size:18px; font-weight:800; margin-top:8px; }
+.compare-table { display:flex; flex-direction:column; gap:12px; }
+.compare-row { display:grid; grid-template-columns:220px 1fr 1fr; gap:12px; align-items:stretch; }
+.compare-field, .compare-cell { background: rgba(255,255,255,0.82); border:1px solid rgba(99,102,241,0.10); border-radius:18px; padding:14px; }
+.compare-row.same .compare-field { background: linear-gradient(180deg, #f0fdf4, #ffffff); }
+.compare-row.different .compare-field { background: linear-gradient(180deg, #fff7ed, #ffffff); }
+.field-name { color: var(--text); font-weight:800; font-size:15px; }
+.field-status { display:inline-block; margin-top:12px; padding:6px 10px; border-radius:999px; font-size:11px; font-weight:800; letter-spacing:.05em; }
+.field-status.same { background: rgba(16,185,129,0.14); color:#047857; }
+.field-status.different { background: rgba(245,158,11,0.16); color:#b45309; }
+.cell-title { color: var(--muted); font-size:12px; font-weight:700; text-transform:uppercase; letter-spacing:.04em; margin-bottom:8px; }
+.cell-content { color: var(--text); font-size:14px; line-height:1.6; white-space:normal; word-break:break-word; }
+.code-block { font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, 'Liberation Mono', monospace; background:#f8fafc; border:1px solid rgba(148,163,184,0.16); border-radius:14px; padding:12px; white-space:pre-wrap; }
+.diff-box { background: rgba(255,255,255,0.76); border:1px solid rgba(99,102,241,0.10); border-radius:18px; padding:14px; }
+.diff-box.same { color:#047857; background: rgba(236,253,243,0.82); }
+.diff-box.different { background: rgba(255,247,237,0.78); }
+.diff-title { font-size:13px; font-weight:800; color: var(--text); margin-bottom:10px; }
+.diff-grid { display:grid; grid-template-columns: repeat(2, minmax(0,1fr)); gap:12px; }
+.diff-col { background: rgba(255,255,255,0.85); border-radius:14px; padding:12px; border:1px dashed rgba(99,102,241,0.12); }
+.diff-col-title { font-size:12px; font-weight:800; color: var(--muted); margin-bottom:8px; text-transform:uppercase; }
+.diff-col ul { margin:0; padding-left:18px; }
+.diff-col li { margin:6px 0; color: var(--text); font-size:13px; }
+.feedback-box { background: rgba(255,255,255,0.76); border:1px solid rgba(99,102,241,0.10); border-radius:18px; padding:16px; margin-top:14px; }
+.feedback-title { font-size:16px; font-weight:800; color: var(--text); margin-bottom:8px; }
+.incident-box { background: rgba(255,247,237,0.78); border:1px solid rgba(245,158,11,0.22); border-radius:16px; padding:14px; margin-top:10px; }
+.incident-title { font-weight:800; color:#9a3412; margin-bottom:6px; }
+.incident-text { color:#7c2d12; margin-bottom:10px; }
+.incident-link { display:inline-block; padding:10px 14px; border-radius:12px; background:#7c3aed; color:white !important; text-decoration:none; font-weight:700; }
+.empty-state { background: rgba(255,255,255,0.74); border:1px dashed rgba(91,91,214,0.20); border-radius:18px; padding:18px; color: var(--muted); }
+@media (max-width:1300px){ .summary-grid{grid-template-columns:repeat(3,minmax(0,1fr));} }
+@media (max-width:1100px){ .summary-grid{grid-template-columns:repeat(2,minmax(0,1fr));} .compare-row{grid-template-columns:1fr;} .compare-head{grid-template-columns:1fr;} .diff-grid{grid-template-columns:1fr;} }
+@media (max-width:700px){ .summary-grid{grid-template-columns:1fr;} }
+</style>
+"""
+DEFAULT_MAPPING = load_default_excel_if_present()
+DEFAULT_STATUS = (
+    f"Auto-loaded Excel: {DEFAULT_KPI_EXCEL} | mapped KPI keys: {len(DEFAULT_MAPPING)}" if Path(DEFAULT_KPI_EXCEL).exists() else
+    f"Auto-load Excel not found: place '{DEFAULT_KPI_EXCEL}' next to app.py"
+)
+with gr.Blocks() as demo:
+    gr.HTML(CUSTOM_CSS)
+    gr.HTML("""
+    <div class='hero'>
+        <div class='hero-title'>💊 Pharma KPI Chatbot</div>
+        <p class='hero-subtitle'>
+        This is an AI-powered solution for OneCI applications that helps business users quickly access, analyze, and understand key KPIs through natural language queries.
+        It enables faster business insights, improved decision-making, and a more efficient reporting experience across OneCI platforms.
+        </p>
+    </div>
+    """)
+    with gr.Row():
+        with gr.Column(scale=4, elem_classes=['panel']):
+            question = gr.Textbox(label='Ask KPI question', placeholder='e.g. OCCP Interactions', lines=2)
+            audience = gr.Dropdown(choices=['Business User', 'Analytics User', 'Leadership'], value='Business User', label='Explain for')
+            excel_status = gr.Markdown(DEFAULT_STATUS)
+            submit_btn = gr.Button('Submit', variant='primary')
+            clear_btn = gr.Button('Clear')
+        with gr.Column(scale=8, elem_classes=['panel']):
+            summary_cards = gr.HTML('<div class="empty-state">Ask a KPI question to see the summary cards.</div>')
+            with gr.Tab('Definition'):
+                definition = gr.Markdown()
+            with gr.Tab('Business Meaning'):
+                business = gr.Markdown()
+            with gr.Tab('Formula'):
+                formula = gr.Textbox(label='Formula', lines=14)
+            with gr.Tab('Notes'):
+                notes = gr.Markdown()
+            with gr.Tab('Comparison'):
+                comparison = gr.HTML('<div class="empty-state">Comparison results will appear here.</div>')
+            excel_mapping_state = gr.State(DEFAULT_MAPPING)
+            current_kpi_state = gr.State('')
+            with gr.Group(visible=False) as feedback_panel:
+                gr.HTML("<div class='feedback-box'><div class='feedback-title'>Are you satisfied with the definition?</div></div>")
+                satisfied_choice = gr.Radio(choices=['Yes', 'No'], label='Was the definition satisfactory?', visible=True)
+                with gr.Row(visible=False) as rating_row:
+                    rating_value = gr.Radio(choices=['1', '2', '3', '4', '5'], label='Rate the definition (1 to 5)')
+                    rating_submit_btn = gr.Button('Submit Rating')
+                rating_status = gr.Markdown(visible=False)
+                with gr.Column(visible=False) as followup_row:
+                    followup_question = gr.Textbox(label='Ask more', placeholder='Please ask your follow-up question here', lines=3)
+                    followup_submit_btn = gr.Button('Ask More', variant='primary')
+                with gr.Row(visible=False) as still_not_satisfied_row:
+                    still_not_satisfied_choice = gr.Radio(choices=['Yes', 'No'], label='Still not satisfied after the follow-up?')
+                feedback_status = gr.Markdown(visible=False)
+                incident_html = gr.HTML(visible=False)
+    submit_btn.click(
+        fn=run_search_and_prepare_feedback,
+        inputs=[question, audience, excel_mapping_state],
+        outputs=[
+            summary_cards, definition, business, formula, notes, comparison,
+            current_kpi_state,
+            feedback_panel, satisfied_choice, rating_row, rating_value,
+            rating_status, followup_row, followup_question,
+            still_not_satisfied_row, still_not_satisfied_choice,
+            feedback_status, incident_html,
+        ],
+    )
+    satisfied_choice.change(fn=on_satisfaction_change, inputs=[satisfied_choice], outputs=[rating_row, followup_row, still_not_satisfied_row, incident_html, feedback_status])
+    rating_submit_btn.click(fn=submit_rating, inputs=[rating_value], outputs=[rating_status])
+    followup_submit_btn.click(
+        fn=run_followup_search,
+        inputs=[followup_question, audience, current_kpi_state, excel_mapping_state],
+        outputs=[
+            summary_cards, definition, business, formula, notes, comparison,
+            current_kpi_state,
+            feedback_panel, satisfied_choice, rating_row, rating_value,
+            rating_status, followup_row, followup_question,
+            still_not_satisfied_row, still_not_satisfied_choice,
+            feedback_status, incident_html,
+        ],
+    )
+    still_not_satisfied_choice.change(fn=on_still_not_satisfied_change, inputs=[still_not_satisfied_choice], outputs=[incident_html, feedback_status])
+    clear_btn.click(
+        fn=clear_all,
+        inputs=[excel_mapping_state],
+        outputs=[
+            question, audience, summary_cards, definition, business, formula, notes, comparison,
+            excel_mapping_state, current_kpi_state,
+            feedback_panel, satisfied_choice, rating_row, rating_value,
+            rating_status, followup_row, followup_question,
+            still_not_satisfied_row, still_not_satisfied_choice,
+            feedback_status, incident_html,
+        ],
+    )
 demo.launch()