Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,231 +1,1099 @@
|
|
| 1 |
-
#
|
| 2 |
-
#
|
| 3 |
-
#
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
import sys
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
-
def ensure_package(pkg):
|
| 10 |
-
if importlib.util.find_spec(pkg) is None:
|
| 11 |
-
subprocess.check_call([sys.executable, "-m", "pip", "install", pkg])
|
| 12 |
|
| 13 |
-
ensure_package(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
-
# =========================================================
|
| 16 |
-
# IMPORTS
|
| 17 |
-
# =========================================================
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
from pathlib import Path
|
| 22 |
-
from difflib import SequenceMatcher
|
| 23 |
-
import pandas as pd
|
| 24 |
-
import gradio as gr
|
| 25 |
|
|
|
|
|
|
|
| 26 |
from langchain_community.document_loaders import PyPDFLoader
|
|
|
|
| 27 |
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
-
PDF_FILE = "data.pdf"
|
| 33 |
-
DEFAULT_KPI_EXCEL = "CIA Consolidated KPIs_MetricsGovernance (1).xlsx"
|
| 34 |
|
| 35 |
# =========================================================
|
| 36 |
-
# TEXT HELPERS
|
| 37 |
# =========================================================
|
| 38 |
-
|
| 39 |
-
def normalize_loose(text: str):
|
| 40 |
if not text:
|
| 41 |
-
return
|
| 42 |
-
text = unicodedata.normalize(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
text = re.sub(r'[^a-z0-9]+', ' ', text)
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
-
def is_truthy_excel_value(value):
|
| 47 |
-
if pd.isna(value):
|
| 48 |
-
return False
|
| 49 |
-
return str(value).strip().lower() in ['yes','y','true','1','x']
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
|
| 55 |
-
def load_kpi_excel_mapping(excel_path: str):
|
| 56 |
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
-
df = pd.read_excel(excel_path, sheet_name='KPI Glossary', engine='openpyxl')
|
| 62 |
-
df.columns = [str(c).strip() for c in df.columns]
|
| 63 |
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
-
mapping = {}
|
| 67 |
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
-
# ✅ USE YOUR REAL COLUMN NAMES
|
| 71 |
-
kpi = str(row.get("KPI_Name", "")).strip()
|
| 72 |
-
definition = str(row.get("KPI_Definitions", "")).strip()
|
| 73 |
-
measure = str(row.get("PowerBI Field/Measure", "")).strip()
|
| 74 |
-
formula = str(row.get("KPI_DAX", "")).strip()
|
| 75 |
|
| 76 |
-
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
-
record = {
|
| 80 |
-
"kpi_name": kpi,
|
| 81 |
-
"measure_name": measure,
|
| 82 |
-
"formula": formula,
|
| 83 |
-
"business_logic": definition,
|
| 84 |
-
"report_sources": []
|
| 85 |
-
}
|
| 86 |
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
-
mapping[normalize_loose(kpi)] = record
|
| 93 |
|
| 94 |
-
|
| 95 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
|
| 98 |
# =========================================================
|
| 99 |
-
#
|
| 100 |
# =========================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
-
def similarity(a, b):
|
| 103 |
-
return SequenceMatcher(None, a, b).ratio()
|
| 104 |
|
| 105 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
-
query_key = normalize_loose(query)
|
| 108 |
|
| 109 |
-
|
| 110 |
-
|
|
|
|
|
|
|
| 111 |
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
-
|
|
|
|
|
|
|
| 115 |
|
| 116 |
-
|
| 117 |
-
|
| 118 |
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
-
if
|
| 124 |
-
best_score = final_score
|
| 125 |
-
best_match = value
|
| 126 |
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
-
|
| 130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
| 133 |
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
# =========================================================
|
| 137 |
|
| 138 |
-
def load_pdf():
|
| 139 |
-
if not Path(PDF_FILE).exists():
|
| 140 |
-
return []
|
| 141 |
-
return PyPDFLoader(PDF_FILE).load()
|
| 142 |
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
-
def extract_from_pdf(query):
|
| 146 |
-
q = normalize_loose(query)
|
| 147 |
|
| 148 |
-
|
| 149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
|
| 151 |
-
if q in text:
|
| 152 |
-
return {
|
| 153 |
-
"kpi_name": query,
|
| 154 |
-
"business_logic": doc.page_content[:500],
|
| 155 |
-
"formula": "Extracted from PDF",
|
| 156 |
-
"report_sources": []
|
| 157 |
-
}
|
| 158 |
-
return None
|
| 159 |
|
| 160 |
# =========================================================
|
| 161 |
-
#
|
| 162 |
# =========================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
-
def render_badges(sources):
|
| 165 |
-
if not sources:
|
| 166 |
-
return "Not mapped"
|
| 167 |
-
return " | ".join(sources[:15]) # limit long lists
|
| 168 |
|
| 169 |
# =========================================================
|
| 170 |
-
#
|
| 171 |
# =========================================================
|
|
|
|
|
|
|
| 172 |
|
| 173 |
-
def get_answer(question, excel_mapping):
|
| 174 |
|
| 175 |
-
|
| 176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
|
| 178 |
-
# ✅ 1. Excel search
|
| 179 |
-
excel_data = lookup_excel(question, excel_mapping)
|
| 180 |
|
| 181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
return (
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
excel_data["formula"] or "-",
|
| 186 |
-
render_badges(excel_data["report_sources"])
|
| 187 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
|
| 189 |
-
|
| 190 |
-
|
|
|
|
|
|
|
| 191 |
|
| 192 |
-
|
|
|
|
|
|
|
| 193 |
return (
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
|
|
|
| 198 |
)
|
| 199 |
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
|
| 206 |
-
EXCEL_MAPPING = load_kpi_excel_mapping(DEFAULT_KPI_EXCEL)
|
| 207 |
|
| 208 |
# =========================================================
|
| 209 |
-
#
|
| 210 |
# =========================================================
|
|
|
|
|
|
|
|
|
|
| 211 |
|
| 212 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
|
| 214 |
-
gr.Markdown("# 💊 Pharma KPI Copilot (Excel Driven)")
|
| 215 |
|
| 216 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
-
btn = gr.Button("Search")
|
| 219 |
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
|
| 225 |
-
def run(q):
|
| 226 |
-
return get_answer(q, EXCEL_MAPPING)
|
| 227 |
|
| 228 |
-
|
| 229 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
|
| 231 |
demo.launch()
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
# Pharma KPI Copilot
|
| 3 |
+
# - Auto-loads KPI Glossary Excel from same folder as app.py
|
| 4 |
+
# - Reads PDF for KPI definition / formula / notes
|
| 5 |
+
# - Fixes Excel mapping so report names show instead of "Not mapped"
|
| 6 |
+
# - Displays report / offering values as colored badges
|
| 7 |
+
# - Installs openpyxl automatically if missing
|
| 8 |
+
|
| 9 |
+
import os
|
| 10 |
+
import re
|
| 11 |
import sys
|
| 12 |
+
import subprocess
|
| 13 |
+
import importlib.util
|
| 14 |
+
import unicodedata
|
| 15 |
+
from pathlib import Path
|
| 16 |
+
from difflib import SequenceMatcher
|
| 17 |
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
+
def ensure_package(package_name: str):
|
| 20 |
+
if importlib.util.find_spec(package_name) is None:
|
| 21 |
+
print(f"Package '{package_name}' not found. Installing...")
|
| 22 |
+
subprocess.check_call([sys.executable, '-m', 'pip', 'install', package_name])
|
| 23 |
+
print(f"Package '{package_name}' installed successfully.")
|
| 24 |
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
+
# Required for pandas Excel engine
|
| 27 |
+
ensure_package('openpyxl')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
+
import gradio as gr
|
| 30 |
+
import pandas as pd
|
| 31 |
from langchain_community.document_loaders import PyPDFLoader
|
| 32 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 33 |
|
| 34 |
+
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
|
| 35 |
+
|
| 36 |
+
SERVICENOW_INCIDENT_URL = os.getenv(
|
| 37 |
+
'SERVICENOW_INCIDENT_URL',
|
| 38 |
+
'https://sanofiservices.service-now.com/onesupport?id=sc_cat_item&sys_id=a5c743d39761b19cbb28fa871153afc3',
|
| 39 |
+
)
|
| 40 |
+
PDF_FILE = 'data.pdf'
|
| 41 |
+
DEFAULT_KPI_EXCEL = 'CIA Consolidated KPIs_MetricsGovernance (1).xlsx'
|
| 42 |
+
|
| 43 |
+
REPORT_FLAG_COLUMNS = [
|
| 44 |
+
'SFE', 'B360', 'OMNICHANNEL', 'C360', 'E&C', 'AC',
|
| 45 |
+
'Field Reporting', 'Content Reporting', 'Above Country', 'Country'
|
| 46 |
+
]
|
| 47 |
+
|
| 48 |
+
EXTRA_INFO_COLUMNS = [
|
| 49 |
+
'Placement in Offering', 'Calculated at:', 'Domain', 'Interaction', 'Channels', 'PowerBI Field/Measure'
|
| 50 |
+
]
|
| 51 |
+
|
| 52 |
+
MANUAL_ALIAS_MAP = {
|
| 53 |
+
# 'hcp reach in occp': 'HCPs in OCCP',
|
| 54 |
+
}
|
| 55 |
|
|
|
|
|
|
|
| 56 |
|
| 57 |
# =========================================================
|
| 58 |
+
# 1) TEXT HELPERS
|
| 59 |
# =========================================================
|
| 60 |
+
def fix_pdf_text(text: str) -> str:
|
|
|
|
| 61 |
if not text:
|
| 62 |
+
return ''
|
| 63 |
+
text = unicodedata.normalize('NFKC', text)
|
| 64 |
+
replacements = {
|
| 65 |
+
'fi': 'fi', 'fl': 'fl', '“': '"', '”': '"', '’': "'", '‘': "'", '–': '-', '—': '-', '\u00ad': '',
|
| 66 |
+
}
|
| 67 |
+
for bad, good in replacements.items():
|
| 68 |
+
text = text.replace(bad, good)
|
| 69 |
+
text = re.sub(r'(?<=\w)[θΘϑϴƟɵ](?=\w)', 'ti', text)
|
| 70 |
+
return text
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def normalize_exact(text: str) -> str:
|
| 74 |
+
text = fix_pdf_text(text or '').lower().strip()
|
| 75 |
+
return re.sub(r'\s+', ' ', text)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def singularize_token(token: str) -> str:
|
| 79 |
+
token = token.strip().lower()
|
| 80 |
+
if len(token) > 4 and token.endswith('ies'):
|
| 81 |
+
return token[:-3] + 'y'
|
| 82 |
+
if len(token) > 3 and token.endswith('s') and not token.endswith('ss'):
|
| 83 |
+
return token[:-1]
|
| 84 |
+
return token
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def normalize_loose(text: str) -> str:
|
| 88 |
+
text = fix_pdf_text(text or '').lower().strip()
|
| 89 |
+
text = text.replace('#', ' ').replace('%', ' ')
|
| 90 |
text = re.sub(r'[^a-z0-9]+', ' ', text)
|
| 91 |
+
text = re.sub(r'\s+', ' ', text).strip()
|
| 92 |
+
if not text:
|
| 93 |
+
return ''
|
| 94 |
+
return ' '.join(singularize_token(tok) for tok in text.split())
|
| 95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
+
def tokenize_loose(text: str):
|
| 98 |
+
loose = normalize_loose(text)
|
| 99 |
+
return loose.split() if loose else []
|
| 100 |
|
|
|
|
| 101 |
|
| 102 |
+
STOPWORDS = {
|
| 103 |
+
'a', 'an', 'the', 'in', 'of', 'with', 'and', 'or', 'for', 'to', 'by', 'on',
|
| 104 |
+
'this', 'that', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
|
| 105 |
+
'what', 'how', 'why', 'show', 'give', 'tell', 'me', 'please', 'explain',
|
| 106 |
+
'search', 'find', 'calculated', 'computed', 'measured', 'formula', 'mean', 'important',
|
| 107 |
+
}
|
| 108 |
|
|
|
|
|
|
|
| 109 |
|
| 110 |
+
def significant_tokens(text: str):
|
| 111 |
+
toks = tokenize_loose(text)
|
| 112 |
+
sig = [t for t in toks if t not in STOPWORDS]
|
| 113 |
+
return sig if sig else toks
|
| 114 |
|
|
|
|
| 115 |
|
| 116 |
+
def clean_user_query(text: str) -> str:
|
| 117 |
+
text = fix_pdf_text(text or '').strip()
|
| 118 |
+
text = re.sub(r'[?]+$', '', text).strip()
|
| 119 |
+
patterns = [
|
| 120 |
+
r'^what is\s+', r'^what s\s+', r'^show me\s+', r'^give me\s+', r'^tell me\s+',
|
| 121 |
+
r'^explain\s+', r'^find\s+', r'^search\s+for\s+', r'^how is\s+', r'^why is\s+',
|
| 122 |
+
]
|
| 123 |
+
lowered = text.lower()
|
| 124 |
+
for pat in patterns:
|
| 125 |
+
lowered = re.sub(pat, '', lowered).strip()
|
| 126 |
+
return lowered.strip()
|
| 127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
+
def clean_formula_text(text: str) -> str:
|
| 130 |
+
text = fix_pdf_text(text or '').lower()
|
| 131 |
+
text = re.sub(r'--.*', '', text)
|
| 132 |
+
text = re.sub(r'\s+', '', text)
|
| 133 |
+
return text
|
| 134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
+
def html_escape(text: str) -> str:
|
| 137 |
+
if text is None:
|
| 138 |
+
return ''
|
| 139 |
+
return (
|
| 140 |
+
str(text)
|
| 141 |
+
.replace('&', '&')
|
| 142 |
+
.replace('<', '<')
|
| 143 |
+
.replace('>', '>')
|
| 144 |
+
.replace('"', '"')
|
| 145 |
+
)
|
| 146 |
|
|
|
|
| 147 |
|
| 148 |
+
def nl2br(text: str) -> str:
|
| 149 |
+
return html_escape(fix_pdf_text(text)).replace('\n', '<br>')
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
def is_generic_followup_question(text: str) -> bool:
|
| 153 |
+
q = normalize_exact(text)
|
| 154 |
+
generic_patterns = [
|
| 155 |
+
r'^how is this calculated', r'^how is this computed', r'^how is this measured',
|
| 156 |
+
r'^what is the formula', r'^show formula', r'^show the formula', r'^give formula',
|
| 157 |
+
r'^why is this important', r'^explain this', r'^what does this mean',
|
| 158 |
+
]
|
| 159 |
+
return any(re.search(p, q) for p in generic_patterns)
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def extract_kpi_name_from_notes(notes_text: str) -> str:
|
| 163 |
+
if not notes_text:
|
| 164 |
+
return ''
|
| 165 |
+
m = re.search(r'\*\*KPI Name:\*\*\s*(.+)', notes_text)
|
| 166 |
+
return m.group(1).strip() if m else ''
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def resolve_alias(user_query: str):
|
| 170 |
+
cleaned = clean_user_query(user_query)
|
| 171 |
+
q = normalize_loose(cleaned)
|
| 172 |
+
if not q:
|
| 173 |
+
return user_query, None, None
|
| 174 |
+
alias_map_norm = {normalize_loose(k): v for k, v in MANUAL_ALIAS_MAP.items()}
|
| 175 |
+
if q in alias_map_norm:
|
| 176 |
+
return alias_map_norm[q], q, alias_map_norm[q]
|
| 177 |
+
return cleaned, None, None
|
| 178 |
|
| 179 |
|
| 180 |
# =========================================================
|
| 181 |
+
# 2) EXCEL LOADING AND MAPPING
|
| 182 |
# =========================================================
|
| 183 |
+
def is_truthy_excel_value(value):
|
| 184 |
+
if pd.isna(value):
|
| 185 |
+
return False
|
| 186 |
+
return str(value).strip().lower() in {'yes', 'y', 'true', '1', 'x'}
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
def detect_glossary_header_row(raw_df: pd.DataFrame):
|
| 190 |
+
"""Find the real KPI Glossary header row."""
|
| 191 |
+
for idx in range(min(len(raw_df), 60)):
|
| 192 |
+
row_values = [normalize_exact(str(v)).replace('/', ' ') for v in raw_df.iloc[idx].tolist()]
|
| 193 |
+
if 'metrics kpis' in row_values and 'powerbi field measure' in row_values:
|
| 194 |
+
return idx
|
| 195 |
+
joined = ' | '.join(row_values)
|
| 196 |
+
if 'metrics kpis' in joined and ('powerbi field measure' in joined or 'definitions' in joined):
|
| 197 |
+
return idx
|
| 198 |
+
return None
|
| 199 |
|
|
|
|
|
|
|
| 200 |
|
| 201 |
+
def build_glossary_dataframe(excel_path: str):
|
| 202 |
+
raw = pd.read_excel(excel_path, sheet_name='KPI Glossary', header=None, engine='openpyxl')
|
| 203 |
+
header_row = detect_glossary_header_row(raw)
|
| 204 |
+
if header_row is None:
|
| 205 |
+
return None, None
|
| 206 |
+
|
| 207 |
+
header = [str(x).strip() for x in raw.iloc[header_row].tolist()]
|
| 208 |
+
data = raw.iloc[header_row + 1:].copy().reset_index(drop=True)
|
| 209 |
+
data.columns = header
|
| 210 |
+
data = data.dropna(how='all')
|
| 211 |
+
keep_cols = [str(c).strip() != '' and str(c).strip().lower() != 'nan' for c in data.columns]
|
| 212 |
+
data = data.loc[:, keep_cols]
|
| 213 |
+
data.columns = [str(c).strip() for c in data.columns]
|
| 214 |
+
return data, header_row
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
def merge_excel_record(a: dict, b: dict):
|
| 218 |
+
if not a:
|
| 219 |
+
return b
|
| 220 |
+
if not b:
|
| 221 |
+
return a
|
| 222 |
+
merged = {
|
| 223 |
+
'kpi_name': a.get('kpi_name') or b.get('kpi_name', ''),
|
| 224 |
+
'measure_name': a.get('measure_name') or b.get('measure_name', ''),
|
| 225 |
+
'report_sources': sorted(set(a.get('report_sources', [])) | set(b.get('report_sources', []))),
|
| 226 |
+
'extra_info': {},
|
| 227 |
+
'row_ids': sorted(set(a.get('row_ids', [])) | set(b.get('row_ids', []))),
|
| 228 |
+
}
|
| 229 |
+
for col in EXTRA_INFO_COLUMNS:
|
| 230 |
+
vals = []
|
| 231 |
+
for rec in (a, b):
|
| 232 |
+
val = rec.get('extra_info', {}).get(col)
|
| 233 |
+
if val and val not in vals:
|
| 234 |
+
vals.append(val)
|
| 235 |
+
if vals:
|
| 236 |
+
merged['extra_info'][col] = ' | '.join(vals)
|
| 237 |
+
return merged
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
def add_record_to_mapping(mapping: dict, key: str, record: dict):
|
| 241 |
+
if not key:
|
| 242 |
+
return
|
| 243 |
+
mapping[key] = merge_excel_record(mapping.get(key), record) if key in mapping else record
|
| 244 |
|
|
|
|
| 245 |
|
| 246 |
+
def load_kpi_excel_mapping(excel_path: str):
|
| 247 |
+
if not excel_path or not Path(excel_path).exists():
|
| 248 |
+
print(f'Excel not found: {excel_path}')
|
| 249 |
+
return {}
|
| 250 |
|
| 251 |
+
try:
|
| 252 |
+
df, header_row = build_glossary_dataframe(excel_path)
|
| 253 |
+
except Exception as e:
|
| 254 |
+
print(f'Could not read KPI Glossary sheet: {e}')
|
| 255 |
+
return {}
|
| 256 |
|
| 257 |
+
if df is None or df.empty:
|
| 258 |
+
print('Could not detect KPI Glossary header row or data is empty.')
|
| 259 |
+
return {}
|
| 260 |
|
| 261 |
+
print(f'KPI Glossary header row detected at: {header_row}')
|
| 262 |
+
print(f'KPI Glossary columns detected: {list(df.columns)[:20]}')
|
| 263 |
|
| 264 |
+
kpi_col = 'Metrics/KPIs' if 'Metrics/KPIs' in df.columns else None
|
| 265 |
+
measure_col = 'PowerBI Field/Measure' if 'PowerBI Field/Measure' in df.columns else None
|
| 266 |
+
if not kpi_col and not measure_col:
|
| 267 |
+
print('Metrics/KPIs and PowerBI Field/Measure columns not found.')
|
| 268 |
+
return {}
|
| 269 |
|
| 270 |
+
mapping = {}
|
| 271 |
+
for idx, row in df.iterrows():
|
| 272 |
+
kpi_name = str(row.get(kpi_col, '')).strip() if kpi_col else ''
|
| 273 |
+
measure_name = str(row.get(measure_col, '')).strip() if measure_col else ''
|
| 274 |
+
if not kpi_name and not measure_name:
|
| 275 |
+
continue
|
| 276 |
|
| 277 |
+
report_sources = [col for col in REPORT_FLAG_COLUMNS if col in df.columns and is_truthy_excel_value(row.get(col))]
|
|
|
|
|
|
|
| 278 |
|
| 279 |
+
extra_info = {}
|
| 280 |
+
for col in EXTRA_INFO_COLUMNS:
|
| 281 |
+
if col in df.columns:
|
| 282 |
+
val = row.get(col)
|
| 283 |
+
if pd.notna(val) and str(val).strip():
|
| 284 |
+
extra_info[col] = str(val).strip()
|
| 285 |
|
| 286 |
+
record = {
|
| 287 |
+
'kpi_name': kpi_name,
|
| 288 |
+
'measure_name': measure_name,
|
| 289 |
+
'report_sources': sorted(set(report_sources)),
|
| 290 |
+
'extra_info': extra_info,
|
| 291 |
+
'row_ids': [int(idx)],
|
| 292 |
+
}
|
| 293 |
|
| 294 |
+
if kpi_name:
|
| 295 |
+
add_record_to_mapping(mapping, normalize_loose(kpi_name), record)
|
| 296 |
+
if measure_name:
|
| 297 |
+
add_record_to_mapping(mapping, normalize_loose(measure_name), record)
|
| 298 |
|
| 299 |
+
print(f'Final mapped KPI keys: {len(mapping)}')
|
| 300 |
+
return mapping
|
|
|
|
| 301 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
|
| 303 |
+
def excel_candidate_keys(*texts):
|
| 304 |
+
keys = []
|
| 305 |
+
for t in texts:
|
| 306 |
+
if not t:
|
| 307 |
+
continue
|
| 308 |
+
k = normalize_loose(t)
|
| 309 |
+
if k and k not in keys:
|
| 310 |
+
keys.append(k)
|
| 311 |
+
return keys
|
| 312 |
+
|
| 313 |
+
|
| 314 |
+
def excel_token_coverage_score(query_key: str, candidate_key: str):
|
| 315 |
+
q_tokens = significant_tokens(query_key)
|
| 316 |
+
c_tokens = significant_tokens(candidate_key)
|
| 317 |
+
if not q_tokens or not c_tokens:
|
| 318 |
+
return 0.0, 0
|
| 319 |
+
q_set, c_set = set(q_tokens), set(c_tokens)
|
| 320 |
+
overlap = q_set & c_set
|
| 321 |
+
return len(overlap) / max(len(q_set), 1), len(overlap)
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
def lookup_kpi_excel_info(kpi_name: str, measure_name: str, excel_mapping: dict, query_text: str = None):
|
| 325 |
+
if not excel_mapping:
|
| 326 |
+
return None
|
| 327 |
+
keys = excel_candidate_keys(query_text, kpi_name, measure_name)
|
| 328 |
+
result = None
|
| 329 |
+
|
| 330 |
+
# exact lookup
|
| 331 |
+
for key in keys:
|
| 332 |
+
if key in excel_mapping:
|
| 333 |
+
result = merge_excel_record(result, excel_mapping[key]) if result else excel_mapping[key]
|
| 334 |
+
if result:
|
| 335 |
+
return result
|
| 336 |
+
|
| 337 |
+
# fuzzy fallback
|
| 338 |
+
best_key = None
|
| 339 |
+
best_ratio = 0.0
|
| 340 |
+
for q in keys:
|
| 341 |
+
for cand in excel_mapping.keys():
|
| 342 |
+
coverage, overlap = excel_token_coverage_score(q, cand)
|
| 343 |
+
ratio = SequenceMatcher(None, q, cand).ratio()
|
| 344 |
+
if coverage >= 1.0 or ratio >= 0.84 or (overlap >= 2 and ratio >= 0.70):
|
| 345 |
+
if ratio > best_ratio:
|
| 346 |
+
best_ratio = ratio
|
| 347 |
+
best_key = cand
|
| 348 |
+
return excel_mapping.get(best_key) if best_key else None
|
| 349 |
+
|
| 350 |
+
|
| 351 |
+
def load_default_excel_if_present():
|
| 352 |
+
return load_kpi_excel_mapping(DEFAULT_KPI_EXCEL) if Path(DEFAULT_KPI_EXCEL).exists() else {}
|
| 353 |
|
|
|
|
|
|
|
| 354 |
|
| 355 |
+
# =========================================================
|
| 356 |
+
# 3) PDF LOAD / PARSE
|
| 357 |
+
# =========================================================
|
| 358 |
+
loader = PyPDFLoader(PDF_FILE)
|
| 359 |
+
page_docs = loader.load()
|
| 360 |
+
for d in page_docs:
|
| 361 |
+
d.page_content = fix_pdf_text(d.page_content)
|
| 362 |
+
|
| 363 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=220)
|
| 364 |
+
chunk_docs = splitter.split_documents(page_docs)
|
| 365 |
+
|
| 366 |
+
|
| 367 |
+
def normalize_lines(text: str):
|
| 368 |
+
return [line.strip() for line in fix_pdf_text(text).splitlines() if line.strip()]
|
| 369 |
+
|
| 370 |
+
|
| 371 |
+
def is_metadata_line(line: str) -> bool:
|
| 372 |
+
line = normalize_loose(line)
|
| 373 |
+
patterns = [
|
| 374 |
+
r'^name$', r'^kpi id', r'^measure name', r'^description$', r'^definition$',
|
| 375 |
+
r'^business meaning$', r'^category$', r'^owner$', r'^source$', r'^dashboard$', r'^glossary$',
|
| 376 |
+
]
|
| 377 |
+
return any(re.search(p, line) for p in patterns)
|
| 378 |
+
|
| 379 |
+
|
| 380 |
+
def looks_like_formula_start(line: str) -> bool:
|
| 381 |
+
line = fix_pdf_text(line)
|
| 382 |
+
low = line.lower().strip()
|
| 383 |
+
formula_starts = [
|
| 384 |
+
'calculate(', 'sum(', 'count(', 'distinctcount(', 'divide(', 'if(', 'filter(',
|
| 385 |
+
'removefilters(', 'all(', 'average(', 'var ', 'return', 'switch(', 'countrows(',
|
| 386 |
+
'summarize(', 'lookupvalue(', 'selectedvalue(',
|
| 387 |
+
]
|
| 388 |
+
if any(fs in low for fs in formula_starts):
|
| 389 |
+
return True
|
| 390 |
+
if '[' in line and ']' in line:
|
| 391 |
+
return True
|
| 392 |
+
if '=' in line:
|
| 393 |
+
return True
|
| 394 |
+
return False
|
| 395 |
+
|
| 396 |
+
|
| 397 |
+
def extract_named_field(lines, labels):
|
| 398 |
+
wanted = [normalize_loose(x) for x in labels]
|
| 399 |
+
for i, line in enumerate(lines):
|
| 400 |
+
if normalize_loose(line) in wanted and i + 1 < len(lines):
|
| 401 |
+
return fix_pdf_text(lines[i + 1].strip())
|
| 402 |
+
return ''
|
| 403 |
+
|
| 404 |
+
|
| 405 |
+
def extract_label_block(lines, labels):
|
| 406 |
+
wanted = [normalize_loose(x) for x in labels]
|
| 407 |
+
start_idx = None
|
| 408 |
+
for i, line in enumerate(lines):
|
| 409 |
+
if normalize_loose(line) in wanted:
|
| 410 |
+
start_idx = i + 1
|
| 411 |
+
break
|
| 412 |
+
if start_idx is None:
|
| 413 |
+
return ''
|
| 414 |
+
collected = []
|
| 415 |
+
for j in range(start_idx, len(lines)):
|
| 416 |
+
current = fix_pdf_text(lines[j].strip())
|
| 417 |
+
if is_metadata_line(current) and normalize_loose(current) not in wanted:
|
| 418 |
+
break
|
| 419 |
+
collected.append(current)
|
| 420 |
+
return ' '.join(collected).strip()
|
| 421 |
+
|
| 422 |
+
|
| 423 |
+
def extract_formula(lines):
|
| 424 |
+
formula_lines = []
|
| 425 |
+
in_formula = False
|
| 426 |
+
paren_balance = 0
|
| 427 |
+
for i, line in enumerate(lines):
|
| 428 |
+
line = fix_pdf_text(line.strip())
|
| 429 |
+
if not in_formula and looks_like_formula_start(line):
|
| 430 |
+
in_formula = True
|
| 431 |
+
formula_lines.append(line)
|
| 432 |
+
paren_balance += line.count('(') - line.count(')')
|
| 433 |
+
continue
|
| 434 |
+
if in_formula:
|
| 435 |
+
if is_metadata_line(line) and paren_balance <= 0:
|
| 436 |
+
break
|
| 437 |
+
formula_lines.append(line)
|
| 438 |
+
paren_balance += line.count('(') - line.count(')')
|
| 439 |
+
if paren_balance <= 0:
|
| 440 |
+
next_line = fix_pdf_text(lines[i + 1].strip()) if i + 1 < len(lines) else ''
|
| 441 |
+
if next_line and is_metadata_line(next_line):
|
| 442 |
+
break
|
| 443 |
+
return '\n'.join(formula_lines).strip()
|
| 444 |
+
|
| 445 |
+
|
| 446 |
+
def remove_formula_lines(lines, formula_text):
|
| 447 |
+
if not formula_text:
|
| 448 |
+
return lines
|
| 449 |
+
formula_lines = {fix_pdf_text(x.strip()) for x in formula_text.splitlines() if x.strip()}
|
| 450 |
+
return [x for x in lines if fix_pdf_text(x.strip()) not in formula_lines]
|
| 451 |
+
|
| 452 |
+
|
| 453 |
+
def build_business_meaning(audience, kpi_name, measure_name):
|
| 454 |
+
base_name = fix_pdf_text(measure_name or kpi_name or 'This KPI')
|
| 455 |
+
if audience == 'Leadership':
|
| 456 |
+
return f"{base_name} helps leadership monitor performance and coverage trends for decision-making."
|
| 457 |
+
if audience == 'Analytics User':
|
| 458 |
+
return f"{base_name} is used in reporting and should be interpreted with source logic, filters, and exclusions."
|
| 459 |
+
return f"{base_name} helps business users understand what is being tracked and why it matters."
|
| 460 |
+
|
| 461 |
+
|
| 462 |
+
def parse_doc_entry(doc, audience, match_info=None, forced_kpi_name=None, excel_mapping=None, query_text=None):
|
| 463 |
+
context = fix_pdf_text(doc.page_content)
|
| 464 |
+
lines = normalize_lines(context)
|
| 465 |
+
formula = extract_formula(lines)
|
| 466 |
+
non_formula_lines = remove_formula_lines(lines, formula)
|
| 467 |
+
|
| 468 |
+
kpi_name = extract_named_field(non_formula_lines, ['Name'])
|
| 469 |
+
kpi_id = extract_named_field(non_formula_lines, ['KPI ID from KPI Glossary', 'KPI ID'])
|
| 470 |
+
measure_name = extract_named_field(non_formula_lines, ['Measure name in the PBI', 'Measure Name'])
|
| 471 |
+
if forced_kpi_name and (not kpi_name or normalize_loose(kpi_name) == 'not found'):
|
| 472 |
+
kpi_name = forced_kpi_name
|
| 473 |
+
|
| 474 |
+
definition = extract_label_block(non_formula_lines, ['Description', 'Definition'])
|
| 475 |
+
if not definition:
|
| 476 |
+
heur = []
|
| 477 |
+
for line in non_formula_lines:
|
| 478 |
+
low = line.lower()
|
| 479 |
+
if any(x in low for x in ['number of', 'count of', 'unique', '%', 'percent', 'rate of', 'ratio of', 'calculated as']):
|
| 480 |
+
heur.append(fix_pdf_text(line))
|
| 481 |
+
definition = ' '.join(heur[:3]).strip() or 'Definition not found clearly in the source extract.'
|
| 482 |
+
if not formula:
|
| 483 |
+
formula = 'Formula not found in source extract.'
|
| 484 |
+
|
| 485 |
+
excel_info = lookup_kpi_excel_info(kpi_name, measure_name, excel_mapping or {}, query_text=query_text)
|
| 486 |
+
report_sources = excel_info.get('report_sources', []) if excel_info else []
|
| 487 |
+
extra_excel_info = excel_info.get('extra_info', {}) if excel_info else {}
|
| 488 |
+
matched_rows = excel_info.get('row_ids', []) if excel_info else []
|
| 489 |
+
|
| 490 |
+
notes = []
|
| 491 |
+
if kpi_name:
|
| 492 |
+
notes.append(f"**KPI Name:** {fix_pdf_text(kpi_name)}")
|
| 493 |
+
# if kpi_id:
|
| 494 |
+
notes.append(f"**KPI ID:** {fix_pdf_text(kpi_id)}")
|
| 495 |
+
if measure_name:
|
| 496 |
+
notes.append(f"**Power BI Measure:** {fix_pdf_text(measure_name)}")
|
| 497 |
+
if report_sources:
|
| 498 |
+
notes.append(f"**Report / Offering Presence (Yes columns):** {', '.join(report_sources)}")
|
| 499 |
+
# if matched_rows:
|
| 500 |
+
notes.append(f"**Matched Excel Row Count:** {len(matched_rows)}")
|
| 501 |
+
# if extra_excel_info.get('Placement in Offering'):
|
| 502 |
+
notes.append(f"**Placement in Offering:** {extra_excel_info['Placement in Offering']}")
|
| 503 |
+
# if extra_excel_info.get('Calculated at:'):
|
| 504 |
+
notes.append(f"**Calculated at:** {extra_excel_info['Calculated at:']}")
|
| 505 |
+
# if extra_excel_info.get('Domain'):
|
| 506 |
+
notes.append(f"**Domain:** {extra_excel_info['Domain']}")
|
| 507 |
+
# if extra_excel_info.get('Interaction'):
|
| 508 |
+
notes.append(f"**Interaction:** {extra_excel_info['Interaction']}")
|
| 509 |
+
# if extra_excel_info.get('Channels'):
|
| 510 |
+
notes.append(f"**Channels:** {extra_excel_info['Channels']}")
|
| 511 |
+
# if doc.metadata.get('page') is not None:
|
| 512 |
+
notes.append(f"**Page:** {doc.metadata['page'] + 1}")
|
| 513 |
+
# if match_info:
|
| 514 |
+
notes.append(f"**Primary Search Match:** {match_info}")
|
| 515 |
+
|
| 516 |
+
return {
|
| 517 |
+
'doc': doc,
|
| 518 |
+
'page': doc.metadata.get('page'),
|
| 519 |
+
'context': context,
|
| 520 |
+
'kpi_name': fix_pdf_text(kpi_name) or 'Not found',
|
| 521 |
+
'kpi_id': fix_pdf_text(kpi_id) or 'Not found',
|
| 522 |
+
'measure_name': fix_pdf_text(measure_name) or 'Not found',
|
| 523 |
+
'definition': fix_pdf_text(definition),
|
| 524 |
+
'business': build_business_meaning(audience, kpi_name, measure_name),
|
| 525 |
+
'formula': fix_pdf_text(formula),
|
| 526 |
+
'notes': '\n\n'.join(notes) if notes else 'No additional notes found.',
|
| 527 |
+
'report_sources': report_sources,
|
| 528 |
+
'excel_info': extra_excel_info,
|
| 529 |
+
}
|
| 530 |
+
|
| 531 |
+
|
| 532 |
+
PARSED_CHUNKS = [parse_doc_entry(doc, 'Business User') for doc in chunk_docs]
|
| 533 |
+
|
| 534 |
+
|
| 535 |
+
def entry_key(entry):
|
| 536 |
+
return (
|
| 537 |
+
normalize_exact(entry['kpi_name']),
|
| 538 |
+
normalize_exact(entry['measure_name']),
|
| 539 |
+
normalize_exact(entry['context'][:300]),
|
| 540 |
+
)
|
| 541 |
+
|
| 542 |
+
|
| 543 |
+
def build_indices(entries):
|
| 544 |
+
kpi_exact_index, measure_exact_index, kpi_loose_index, measure_loose_index = {}, {}, {}, {}
|
| 545 |
+
seen = set()
|
| 546 |
+
for entry in entries:
|
| 547 |
+
key = entry_key(entry)
|
| 548 |
+
if key in seen:
|
| 549 |
+
continue
|
| 550 |
+
seen.add(key)
|
| 551 |
+
nk_exact = normalize_exact(entry['kpi_name'])
|
| 552 |
+
nm_exact = normalize_exact(entry['measure_name'])
|
| 553 |
+
nk_loose = normalize_loose(entry['kpi_name'])
|
| 554 |
+
nm_loose = normalize_loose(entry['measure_name'])
|
| 555 |
+
if nk_exact and nk_exact != 'not found':
|
| 556 |
+
kpi_exact_index.setdefault(nk_exact, []).append(entry)
|
| 557 |
+
if nm_exact and nm_exact != 'not found':
|
| 558 |
+
measure_exact_index.setdefault(nm_exact, []).append(entry)
|
| 559 |
+
if nk_loose and nk_loose != 'not found':
|
| 560 |
+
kpi_loose_index.setdefault(nk_loose, []).append(entry)
|
| 561 |
+
if nm_loose and nm_loose != 'not found':
|
| 562 |
+
measure_loose_index.setdefault(nm_loose, []).append(entry)
|
| 563 |
+
return kpi_exact_index, measure_exact_index, kpi_loose_index, measure_loose_index
|
| 564 |
+
|
| 565 |
+
|
| 566 |
+
EXACT_KPI_INDEX, EXACT_MEASURE_INDEX, LOOSE_KPI_INDEX, LOOSE_MEASURE_INDEX = build_indices(PARSED_CHUNKS)
|
| 567 |
+
ALL_LOOSE_KPI_NAMES = sorted(LOOSE_KPI_INDEX.keys())
|
| 568 |
+
ALL_LOOSE_MEASURE_NAMES = sorted(LOOSE_MEASURE_INDEX.keys())
|
| 569 |
+
|
| 570 |
+
|
| 571 |
+
def token_overlap_score(query_text: str, candidate_text: str):
|
| 572 |
+
q_tokens = significant_tokens(query_text)
|
| 573 |
+
c_tokens = significant_tokens(candidate_text)
|
| 574 |
+
if not q_tokens or not c_tokens:
|
| 575 |
+
return 0.0, 0, 0
|
| 576 |
+
q_set, c_set = set(q_tokens), set(c_tokens)
|
| 577 |
+
overlap = q_set & c_set
|
| 578 |
+
coverage = len(overlap) / max(len(q_set), 1)
|
| 579 |
+
return coverage, len(overlap), len(c_set)
|
| 580 |
+
|
| 581 |
+
|
| 582 |
+
def find_best_exact_like_name(query_text: str):
|
| 583 |
+
q_exact = normalize_exact(query_text)
|
| 584 |
+
q_loose = normalize_loose(query_text)
|
| 585 |
+
if not q_loose:
|
| 586 |
+
return None, None
|
| 587 |
+
if q_exact in EXACT_KPI_INDEX:
|
| 588 |
+
return 'kpi_exact', q_exact
|
| 589 |
+
if q_exact in EXACT_MEASURE_INDEX:
|
| 590 |
+
return 'measure_exact', q_exact
|
| 591 |
+
if q_loose in LOOSE_KPI_INDEX:
|
| 592 |
+
return 'kpi_loose', q_loose
|
| 593 |
+
if q_loose in LOOSE_MEASURE_INDEX:
|
| 594 |
+
return 'measure_loose', q_loose
|
| 595 |
+
|
| 596 |
+
best, best_score = None, -1.0
|
| 597 |
+
for name in ALL_LOOSE_KPI_NAMES:
|
| 598 |
+
coverage, overlap_count, candidate_size = token_overlap_score(q_loose, name)
|
| 599 |
+
if coverage == 1.0 and overlap_count >= 2:
|
| 600 |
+
score = overlap_count * 10 - max(candidate_size - overlap_count, 0)
|
| 601 |
+
if score > best_score:
|
| 602 |
+
best_score, best = score, ('kpi_loose', name)
|
| 603 |
+
for name in ALL_LOOSE_MEASURE_NAMES:
|
| 604 |
+
coverage, overlap_count, candidate_size = token_overlap_score(q_loose, name)
|
| 605 |
+
if coverage == 1.0 and overlap_count >= 2:
|
| 606 |
+
score = overlap_count * 10 - max(candidate_size - overlap_count, 0)
|
| 607 |
+
if score > best_score:
|
| 608 |
+
best_score, best = score, ('measure_loose', name)
|
| 609 |
+
return best if best else (None, None)
|
| 610 |
+
|
| 611 |
+
|
| 612 |
+
def doc_contains_exact_text(doc, search_text: str) -> bool:
|
| 613 |
+
return normalize_loose(search_text) in normalize_loose(doc.page_content)
|
| 614 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 615 |
|
| 616 |
# =========================================================
|
| 617 |
+
# 4) SEARCH
|
| 618 |
# =========================================================
|
| 619 |
+
def choose_primary_entry(query: str, audience: str, excel_mapping=None):
|
| 620 |
+
cleaned_query = clean_user_query(query)
|
| 621 |
+
if not cleaned_query:
|
| 622 |
+
return None, None
|
| 623 |
+
resolved_query, _, canonical_term = resolve_alias(query)
|
| 624 |
+
effective_query = canonical_term if canonical_term else resolved_query
|
| 625 |
+
match_type, canonical_name = find_best_exact_like_name(effective_query)
|
| 626 |
+
|
| 627 |
+
if match_type == 'kpi_exact':
|
| 628 |
+
chosen = EXACT_KPI_INDEX[canonical_name][0]
|
| 629 |
+
return parse_doc_entry(chosen['doc'], audience, match_info='Exact KPI name match', excel_mapping=excel_mapping, query_text=effective_query), 100.0
|
| 630 |
+
if match_type == 'measure_exact':
|
| 631 |
+
chosen = EXACT_MEASURE_INDEX[canonical_name][0]
|
| 632 |
+
return parse_doc_entry(chosen['doc'], audience, match_info='Exact PBI measure match', excel_mapping=excel_mapping, query_text=effective_query), 95.0
|
| 633 |
+
if match_type == 'kpi_loose':
|
| 634 |
+
chosen = LOOSE_KPI_INDEX[canonical_name][0]
|
| 635 |
+
return parse_doc_entry(chosen['doc'], audience, match_info='Normalized KPI name match', excel_mapping=excel_mapping, query_text=effective_query), 90.0
|
| 636 |
+
if match_type == 'measure_loose':
|
| 637 |
+
chosen = LOOSE_MEASURE_INDEX[canonical_name][0]
|
| 638 |
+
return parse_doc_entry(chosen['doc'], audience, match_info='Normalized PBI measure match', excel_mapping=excel_mapping, query_text=effective_query), 88.0
|
| 639 |
+
|
| 640 |
+
raw_chunk_hits = [doc for doc in chunk_docs if doc_contains_exact_text(doc, effective_query)]
|
| 641 |
+
if raw_chunk_hits:
|
| 642 |
+
chosen_doc = raw_chunk_hits[0]
|
| 643 |
+
return parse_doc_entry(chosen_doc, audience, match_info='Exact raw text found in PDF chunk', forced_kpi_name=effective_query, excel_mapping=excel_mapping, query_text=effective_query), 75.0
|
| 644 |
+
|
| 645 |
+
raw_page_hits = [doc for doc in page_docs if doc_contains_exact_text(doc, effective_query)]
|
| 646 |
+
if raw_page_hits:
|
| 647 |
+
chosen_doc = raw_page_hits[0]
|
| 648 |
+
return parse_doc_entry(chosen_doc, audience, match_info='Exact raw text found in PDF page', forced_kpi_name=effective_query, excel_mapping=excel_mapping, query_text=effective_query), 70.0
|
| 649 |
+
return None, None
|
| 650 |
+
|
| 651 |
+
|
| 652 |
+
def find_second_same_occurrence(primary_entry, audience: str, excel_mapping=None):
|
| 653 |
+
target_name_loose = normalize_loose(primary_entry['kpi_name'])
|
| 654 |
+
if not target_name_loose or target_name_loose == 'not found':
|
| 655 |
+
return None
|
| 656 |
+
primary_context = normalize_exact(primary_entry['context'][:400])
|
| 657 |
+
|
| 658 |
+
if target_name_loose in LOOSE_KPI_INDEX:
|
| 659 |
+
candidates = [e for e in LOOSE_KPI_INDEX[target_name_loose] if normalize_exact(e['context'][:400]) != primary_context]
|
| 660 |
+
if candidates:
|
| 661 |
+
candidates.sort(key=lambda e: (e['page'] if e['page'] is not None else 99999))
|
| 662 |
+
return parse_doc_entry(candidates[0]['doc'], audience, excel_mapping=excel_mapping, query_text=primary_entry['kpi_name'])
|
| 663 |
+
|
| 664 |
+
for doc in chunk_docs:
|
| 665 |
+
if target_name_loose in normalize_loose(doc.page_content) and normalize_exact(doc.page_content[:400]) != primary_context:
|
| 666 |
+
return parse_doc_entry(doc, audience, forced_kpi_name=primary_entry['kpi_name'], excel_mapping=excel_mapping, query_text=primary_entry['kpi_name'])
|
| 667 |
+
for doc in page_docs:
|
| 668 |
+
if target_name_loose in normalize_loose(doc.page_content) and normalize_exact(doc.page_content[:400]) != primary_context:
|
| 669 |
+
return parse_doc_entry(doc, audience, forced_kpi_name=primary_entry['kpi_name'], excel_mapping=excel_mapping, query_text=primary_entry['kpi_name'])
|
| 670 |
+
return None
|
| 671 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 672 |
|
| 673 |
# =========================================================
|
| 674 |
+
# 5) UI HELPERS
|
| 675 |
# =========================================================
|
| 676 |
+
def compare_same(value1, value2, formula=False):
|
| 677 |
+
return clean_formula_text(value1) == clean_formula_text(value2) if formula else normalize_loose(value1) == normalize_loose(value2)
|
| 678 |
|
|
|
|
| 679 |
|
| 680 |
+
def render_badges(sources):
|
| 681 |
+
if not sources:
|
| 682 |
+
return "<span class='pill neutral'>Not mapped</span>"
|
| 683 |
+
colors = ['info', 'success', 'warning', 'neutral']
|
| 684 |
+
pills = []
|
| 685 |
+
for i, src in enumerate(sources):
|
| 686 |
+
color = colors[i % len(colors)]
|
| 687 |
+
pills.append(f"<span class='pill {color}'>{html_escape(src)}</span>")
|
| 688 |
+
return ' '.join(pills)
|
| 689 |
+
|
| 690 |
+
|
| 691 |
+
def field_diff_html(left_text, right_text, formula=False):
|
| 692 |
+
left_text = fix_pdf_text(left_text or '')
|
| 693 |
+
right_text = fix_pdf_text(right_text or '')
|
| 694 |
+
if compare_same(left_text, right_text, formula=formula):
|
| 695 |
+
return "<div class='diff-box same'>No difference. Both occurrences match for this field.</div>"
|
| 696 |
+
left_lines = [ln for ln in left_text.splitlines() if ln.strip()] or ['Not found']
|
| 697 |
+
right_lines = [ln for ln in right_text.splitlines() if ln.strip()] or ['Not found']
|
| 698 |
+
removed = [x for x in left_lines if x not in right_lines]
|
| 699 |
+
added = [x for x in right_lines if x not in left_lines]
|
| 700 |
+
removed_html = ''.join(f"<li>{html_escape(line)}</li>" for line in removed[:12]) or '<li>No unique lines found.</li>'
|
| 701 |
+
added_html = ''.join(f"<li>{html_escape(line)}</li>" for line in added[:12]) or '<li>No unique lines found.</li>'
|
| 702 |
+
return f"""
|
| 703 |
+
<div class='diff-box different'>
|
| 704 |
+
<div class='diff-title'>What differs</div>
|
| 705 |
+
<div class='diff-grid'>
|
| 706 |
+
<div class='diff-col'><div class='diff-col-title'>Only in Occurrence 1</div><ul>{removed_html}</ul></div>
|
| 707 |
+
<div class='diff-col'><div class='diff-col-title'>Only in Occurrence 2</div><ul>{added_html}</ul></div>
|
| 708 |
+
</div>
|
| 709 |
+
</div>
|
| 710 |
+
"""
|
| 711 |
+
|
| 712 |
+
|
| 713 |
+
def build_summary_cards(entry1, entry2=None, retrieval_score=None):
|
| 714 |
+
def badge(text, kind='default'):
|
| 715 |
+
return f"<span class='pill {kind}'>{html_escape(text)}</span>"
|
| 716 |
+
|
| 717 |
+
page1 = f"Page {entry1['page'] + 1}" if entry1 and entry1['page'] is not None else 'Page not found'
|
| 718 |
+
report_badges = render_badges(entry1.get('report_sources', []))
|
| 719 |
+
|
| 720 |
+
cards = [
|
| 721 |
+
f"<div class='summary-card'><div class='summary-label'>KPI Name</div><div class='summary-value'>{html_escape(entry1['kpi_name'])}</div><div class='summary-sub'>{badge(page1, 'info')}</div></div>",
|
| 722 |
+
f"<div class='summary-card'><div class='summary-label'>KPI ID</div><div class='summary-value'>{html_escape(entry1['kpi_id'])}</div><div class='summary-sub'>{badge('Glossary reference', 'neutral')}</div></div>",
|
| 723 |
+
f"<div class='summary-card'><div class='summary-label'>PBI Measure</div><div class='summary-value'>{html_escape(entry1['measure_name'])}</div><div class='summary-sub'>{badge('Primary result', 'success')}</div></div>",
|
| 724 |
+
f"<div class='summary-card'><div class='summary-label'>Report / Offering</div><div class='summary-value badge-wrap'>{report_badges}</div><div class='summary-sub'>{badge('Yes columns from Excel', 'neutral')}</div></div>",
|
| 725 |
+
]
|
| 726 |
+
|
| 727 |
+
compare_hint = 'One occurrence found'
|
| 728 |
+
compare_kind = 'neutral'
|
| 729 |
+
if entry2:
|
| 730 |
+
same_all = (
|
| 731 |
+
compare_same(entry1['kpi_name'], entry2['kpi_name']) and
|
| 732 |
+
compare_same(entry1['kpi_id'], entry2['kpi_id']) and
|
| 733 |
+
compare_same(entry1['measure_name'], entry2['measure_name']) and
|
| 734 |
+
compare_same(entry1['definition'], entry2['definition']) and
|
| 735 |
+
compare_same(entry1['formula'], entry2['formula'], formula=True)
|
| 736 |
+
)
|
| 737 |
+
compare_hint = 'Exact name match found' if same_all else 'Exact name match found (differences detected)'
|
| 738 |
+
compare_kind = 'success' if same_all else 'warning'
|
| 739 |
+
|
| 740 |
+
checked_text = '2 exact-name matches checked' if entry2 else 'No second exact-name match'
|
| 741 |
+
if retrieval_score is not None:
|
| 742 |
+
checked_text = f"search score {retrieval_score:.1f}"
|
| 743 |
+
|
| 744 |
+
cards.append(
|
| 745 |
+
f"<div class='summary-card'><div class='summary-label'>Comparison Status</div><div class='summary-value'>{html_escape(compare_hint)}</div><div class='summary-sub'>{badge(checked_text, compare_kind)}</div></div>"
|
| 746 |
+
)
|
| 747 |
+
return "<div class='summary-grid'>" + ''.join(cards) + "</div>"
|
| 748 |
+
|
| 749 |
+
|
| 750 |
+
def build_side_by_side_comparison(entry1, entry2):
|
| 751 |
+
if not entry1 and not entry2:
|
| 752 |
+
return "<div class='empty-state'>No relevant KPI entry found.</div>"
|
| 753 |
+
if entry1 and not entry2:
|
| 754 |
+
page_text = f"Page {entry1['page'] + 1}" if entry1['page'] is not None else 'Unknown page'
|
| 755 |
+
kpi_text = html_escape(entry1['kpi_name'])
|
| 756 |
+
return f"<div class='compare-wrap single'><div class='compare-banner neutral'>Primary result shown for <b>{kpi_text}</b> ({html_escape(page_text)}). No second occurrence with the <b>exact same KPI name</b> was found.</div></div>"
|
| 757 |
+
|
| 758 |
+
same_all = (
|
| 759 |
+
compare_same(entry1['kpi_name'], entry2['kpi_name']) and
|
| 760 |
+
compare_same(entry1['kpi_id'], entry2['kpi_id']) and
|
| 761 |
+
compare_same(entry1['measure_name'], entry2['measure_name']) and
|
| 762 |
+
compare_same(entry1['definition'], entry2['definition']) and
|
| 763 |
+
compare_same(entry1['formula'], entry2['formula'], formula=True)
|
| 764 |
+
)
|
| 765 |
+
overall_class = 'success' if same_all else 'warning'
|
| 766 |
+
overall_text = 'Exact same KPI name found in two places' if same_all else 'Exact same KPI name found in two places, but details differ'
|
| 767 |
+
page1 = f"Page {entry1['page'] + 1}" if entry1['page'] is not None else 'Unknown'
|
| 768 |
+
page2 = f"Page {entry2['page'] + 1}" if entry2['page'] is not None else 'Unknown'
|
| 769 |
+
|
| 770 |
+
rows = []
|
| 771 |
+
fields = [
|
| 772 |
+
('KPI Name', entry1['kpi_name'], entry2['kpi_name'], False),
|
| 773 |
+
('KPI ID', entry1['kpi_id'], entry2['kpi_id'], False),
|
| 774 |
+
('Power BI Measure', entry1['measure_name'], entry2['measure_name'], False),
|
| 775 |
+
('Definition', entry1['definition'], entry2['definition'], False),
|
| 776 |
+
('Formula', entry1['formula'], entry2['formula'], True),
|
| 777 |
+
]
|
| 778 |
+
for label, left_val, right_val, is_formula in fields:
|
| 779 |
+
left_val, right_val = fix_pdf_text(left_val or 'Not found'), fix_pdf_text(right_val or 'Not found')
|
| 780 |
+
status = 'same' if compare_same(left_val, right_val, formula=is_formula) else 'different'
|
| 781 |
+
diff_panel = field_diff_html(left_val, right_val, formula=is_formula)
|
| 782 |
+
code_class = 'code-block' if is_formula else ''
|
| 783 |
+
rows.append(f"""
|
| 784 |
+
<div class='compare-row {status}'>
|
| 785 |
+
<div class='compare-field'><div class='field-name'>{html_escape(label)}</div><div class='field-status {status}'>{'SAME' if status == 'same' else 'DIFFERENT'}</div></div>
|
| 786 |
+
<div class='compare-cell'><div class='cell-title'>Occurrence 1</div><div class='cell-content {code_class}'>{nl2br(left_val)}</div></div>
|
| 787 |
+
<div class='compare-cell'><div class='cell-title'>Occurrence 2</div><div class='cell-content {code_class}'>{nl2br(right_val)}</div></div>
|
| 788 |
+
</div>
|
| 789 |
+
<div class='diff-row'>{diff_panel}</div>
|
| 790 |
+
""")
|
| 791 |
+
return f"""
|
| 792 |
+
<div class='compare-wrap'>
|
| 793 |
+
<div class='compare-banner {overall_class}'>{html_escape(overall_text)}</div>
|
| 794 |
+
<div class='compare-head'>
|
| 795 |
+
<div class='head-card'><div class='head-label'>Occurrence 1</div><div class='head-page'>{html_escape(page1)}</div><div class='head-name'>{html_escape(entry1['kpi_name'])}</div></div>
|
| 796 |
+
<div class='head-card'><div class='head-label'>Occurrence 2</div><div class='head-page'>{html_escape(page2)}</div><div class='head-name'>{html_escape(entry2['kpi_name'])}</div></div>
|
| 797 |
+
</div>
|
| 798 |
+
<div class='compare-table'>{''.join(rows)}</div>
|
| 799 |
+
</div>
|
| 800 |
+
"""
|
| 801 |
|
|
|
|
|
|
|
| 802 |
|
| 803 |
+
# =========================================================
|
| 804 |
+
# 6) FEEDBACK FLOW
|
| 805 |
+
# =========================================================
|
| 806 |
+
def run_search_and_prepare_feedback(question, audience, excel_mapping):
|
| 807 |
+
results = get_answer(question, audience, excel_mapping=excel_mapping)
|
| 808 |
+
current_kpi_name = ''
|
| 809 |
+
if isinstance(results, tuple) and len(results) >= 5:
|
| 810 |
+
current_kpi_name = extract_kpi_name_from_notes(results[4] or '')
|
| 811 |
+
return results + (
|
| 812 |
+
current_kpi_name,
|
| 813 |
+
gr.update(visible=True), gr.update(value=None, visible=True),
|
| 814 |
+
gr.update(visible=False), gr.update(value=None), gr.update(value='', visible=False),
|
| 815 |
+
gr.update(visible=False), gr.update(value=''), gr.update(visible=False), gr.update(value=None),
|
| 816 |
+
gr.update(value='', visible=False), gr.update(value='', visible=False),
|
| 817 |
+
)
|
| 818 |
+
|
| 819 |
+
|
| 820 |
+
def clear_feedback_only():
|
| 821 |
+
return (
|
| 822 |
+
gr.update(visible=False), gr.update(value=None, visible=False),
|
| 823 |
+
gr.update(visible=False), gr.update(value=None), gr.update(value='', visible=False),
|
| 824 |
+
gr.update(visible=False), gr.update(value=''), gr.update(visible=False), gr.update(value=None),
|
| 825 |
+
gr.update(value='', visible=False), gr.update(value='', visible=False),
|
| 826 |
+
)
|
| 827 |
+
|
| 828 |
+
|
| 829 |
+
def on_satisfaction_change(choice):
|
| 830 |
+
if choice == 'Yes':
|
| 831 |
return (
|
| 832 |
+
gr.update(visible=True), gr.update(visible=False), gr.update(visible=False),
|
| 833 |
+
gr.update(value='', visible=False), gr.update(value='Please rate the definition from 1 to 5.', visible=True),
|
|
|
|
|
|
|
| 834 |
)
|
| 835 |
+
if choice == 'No':
|
| 836 |
+
return (
|
| 837 |
+
gr.update(visible=False), gr.update(visible=True), gr.update(visible=False),
|
| 838 |
+
gr.update(value='', visible=False), gr.update(value='Please ask more so the app can try again.', visible=True),
|
| 839 |
+
)
|
| 840 |
+
return (
|
| 841 |
+
gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
|
| 842 |
+
gr.update(value='', visible=False), gr.update(value='', visible=False),
|
| 843 |
+
)
|
| 844 |
+
|
| 845 |
|
| 846 |
+
def submit_rating(rating):
|
| 847 |
+
if rating is None:
|
| 848 |
+
return gr.update(value='Please select a rating from 1 to 5.', visible=True)
|
| 849 |
+
return gr.update(value=f"Thanks for the feedback. You rated the definition **{rating}/5**.", visible=True)
|
| 850 |
|
| 851 |
+
|
| 852 |
+
def run_followup_search(followup_question, audience, current_kpi_name, excel_mapping):
|
| 853 |
+
if not followup_question or not followup_question.strip():
|
| 854 |
return (
|
| 855 |
+
gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
|
| 856 |
+
gr.update(value=current_kpi_name), gr.update(visible=True), gr.update(value='No', visible=True),
|
| 857 |
+
gr.update(visible=False), gr.update(value=None), gr.update(value='', visible=False),
|
| 858 |
+
gr.update(visible=True), gr.update(value=''), gr.update(visible=True), gr.update(value=None),
|
| 859 |
+
gr.update(value='Please type a follow-up question before submitting.', visible=True), gr.update(value='', visible=False),
|
| 860 |
)
|
| 861 |
|
| 862 |
+
effective_followup = current_kpi_name if current_kpi_name and is_generic_followup_question(followup_question) else followup_question
|
| 863 |
+
used_context = effective_followup != followup_question
|
| 864 |
+
results = get_answer(effective_followup, audience, excel_mapping=excel_mapping)
|
| 865 |
+
new_current_kpi = current_kpi_name or ''
|
| 866 |
+
if isinstance(results, tuple) and len(results) >= 5:
|
| 867 |
+
extracted = extract_kpi_name_from_notes(results[4] or '')
|
| 868 |
+
if extracted:
|
| 869 |
+
new_current_kpi = extracted
|
| 870 |
+
helper_message = 'If you are still not satisfied, choose below to raise an incident.'
|
| 871 |
+
if used_context and current_kpi_name:
|
| 872 |
+
helper_message = f"Used KPI context from the previous result: **{current_kpi_name}**. If you are still not satisfied, choose below to raise an incident."
|
| 873 |
+
return results + (
|
| 874 |
+
new_current_kpi, gr.update(visible=True), gr.update(value='No', visible=True),
|
| 875 |
+
gr.update(visible=False), gr.update(value=None), gr.update(value='', visible=False),
|
| 876 |
+
gr.update(visible=True), gr.update(value=followup_question), gr.update(visible=True), gr.update(value=None),
|
| 877 |
+
gr.update(value=helper_message, visible=True), gr.update(value='', visible=False),
|
| 878 |
+
)
|
| 879 |
+
|
| 880 |
+
|
| 881 |
+
def on_still_not_satisfied_change(choice):
|
| 882 |
+
if choice == 'Yes':
|
| 883 |
+
html = f"<div class='incident-box'><div class='incident-title'>Still not satisfied?</div><div class='incident-text'>You can raise an incident in ServiceNow for further help.</div><a class='incident-link' href='{html_escape(SERVICENOW_INCIDENT_URL)}' target='_blank' rel='noopener noreferrer'>Raise Incident in ServiceNow</a></div>"
|
| 884 |
+
return gr.update(value=html, visible=True), gr.update(value='You selected to raise an incident for further support.', visible=True)
|
| 885 |
+
if choice == 'No':
|
| 886 |
+
return gr.update(value='', visible=False), gr.update(value='Glad the follow-up helped.', visible=True)
|
| 887 |
+
return gr.update(value='', visible=False), gr.update(value='', visible=False)
|
| 888 |
|
|
|
|
| 889 |
|
| 890 |
# =========================================================
|
| 891 |
+
# 7) MAIN ANSWER
|
| 892 |
# =========================================================
|
| 893 |
+
def get_answer(question, audience, excel_mapping=None):
|
| 894 |
+
if not question or not question.strip():
|
| 895 |
+
return ('<div class="empty-state">Ask a KPI question to see the summary cards.</div>', 'Please enter a KPI question.', '', '', '', '<div class="empty-state">No comparison available.</div>')
|
| 896 |
|
| 897 |
+
primary_entry, best_score = choose_primary_entry(question, audience, excel_mapping=excel_mapping)
|
| 898 |
+
if primary_entry is None:
|
| 899 |
+
workbook_note = DEFAULT_KPI_EXCEL if Path(DEFAULT_KPI_EXCEL).exists() else f"{DEFAULT_KPI_EXCEL} not found next to the app file"
|
| 900 |
+
return (
|
| 901 |
+
'<div class="empty-state">No KPI found. The app auto-loads the KPI Glossary Excel and should print the Yes columns for the matching KPI row, but this KPI could not be matched safely.</div>',
|
| 902 |
+
'No KPI found for the searched text.', '', '',
|
| 903 |
+
f"**Search Tried:** `{fix_pdf_text(clean_user_query(question))}`\n\n**Excel Auto-load:** {workbook_note}\n\nIf the KPI text is present visually in the PDF but still not found, the PDF extraction may be breaking the text across lines/chunks.",
|
| 904 |
+
'<div class="empty-state">No comparison available because the primary KPI was not found.</div>',
|
| 905 |
+
)
|
| 906 |
+
|
| 907 |
+
second_entry = find_second_same_occurrence(primary_entry, audience, excel_mapping=excel_mapping)
|
| 908 |
+
summary_html = build_summary_cards(primary_entry, second_entry, retrieval_score=best_score)
|
| 909 |
+
comparison_html = build_side_by_side_comparison(primary_entry, second_entry)
|
| 910 |
+
return summary_html, primary_entry['definition'], primary_entry['business'], primary_entry['formula'], primary_entry['notes'], comparison_html
|
| 911 |
|
|
|
|
| 912 |
|
| 913 |
+
def clear_all(default_mapping):
|
| 914 |
+
return (
|
| 915 |
+
'', 'Business User', '<div class="empty-state">Ask a KPI question to see the summary cards.</div>',
|
| 916 |
+
'', '', '', '', '<div class="empty-state">Comparison results will appear here.</div>',
|
| 917 |
+
default_mapping, '', *clear_feedback_only(),
|
| 918 |
+
)
|
| 919 |
|
|
|
|
| 920 |
|
| 921 |
+
# =========================================================
|
| 922 |
+
# 8) UI
|
| 923 |
+
# =========================================================
|
| 924 |
+
CUSTOM_CSS = """
|
| 925 |
+
<style>
|
| 926 |
+
:root {
|
| 927 |
+
--bg1: #f6f8ff; --bg2: #fafdff; --bg3: #eef4ff; --card: rgba(255,255,255,0.82);
|
| 928 |
+
--card-strong: rgba(255,255,255,0.94); --stroke: rgba(99, 102, 241, 0.14); --text: #14213d;
|
| 929 |
+
--muted: #667085; --primary: #5b5bd6; --primary-2: #7c4dff; --success-bg: #ecfdf3;
|
| 930 |
+
--success-text: #067647; --warning-bg: #fff7ed; --warning-text: #c2410c; --neutral-bg: #f8fafc;
|
| 931 |
+
--neutral-text: #475467; --shadow: 0 18px 40px rgba(34, 55, 110, 0.10);
|
| 932 |
+
}
|
| 933 |
+
body, .gradio-container { background: linear-gradient(135deg, var(--bg1) 0%, var(--bg2) 45%, var(--bg3) 100%) !important; }
|
| 934 |
+
.gradio-container { max-width: 1500px !important; padding-top: 18px !important; }
|
| 935 |
+
.hero { background: linear-gradient(135deg, rgba(91,91,214,0.14), rgba(124,77,255,0.08), rgba(59,130,246,0.06)); border: 1px solid rgba(124,77,255,0.14); box-shadow: var(--shadow); border-radius: 26px; padding: 26px 30px; margin-bottom: 18px; backdrop-filter: blur(10px); }
|
| 936 |
+
.hero-title { font-size: 34px; font-weight: 800; color: var(--text); margin: 0 0 8px 0; }
|
| 937 |
+
.hero-subtitle { font-size: 15px; color: var(--muted); margin: 0; line-height: 1.65; }
|
| 938 |
+
.panel { background: var(--card) !important; border: 1px solid var(--stroke) !important; border-radius: 22px !important; box-shadow: var(--shadow) !important; padding: 16px !important; backdrop-filter: blur(12px); }
|
| 939 |
+
textarea, input, .gr-textbox, .gr-dropdown, .gr-radio { border-radius: 16px !important; }
|
| 940 |
+
button.primary, button[class*='primary'] { background: linear-gradient(135deg, var(--primary), var(--primary-2)) !important; border: none !important; color: white !important; border-radius: 16px !important; box-shadow: 0 10px 22px rgba(91,91,214,0.22) !important; }
|
| 941 |
+
button.secondary { border-radius: 16px !important; }
|
| 942 |
+
button[role='tab'][aria-selected='true'] { color: var(--primary) !important; border-bottom: 3px solid var(--primary) !important; }
|
| 943 |
+
.kpi-note { background: rgba(255,255,255,0.68); border: 1px dashed rgba(91,91,214,0.18); border-radius: 16px; padding: 12px 14px; color: var(--muted); font-size: 13px; margin-top: 8px; }
|
| 944 |
+
.summary-grid { display: grid; grid-template-columns: repeat(5, minmax(0, 1fr)); gap: 14px; margin-bottom: 16px; }
|
| 945 |
+
.summary-card { background: linear-gradient(180deg, var(--card-strong), rgba(255,255,255,0.72)); border: 1px solid rgba(91,91,214,0.12); border-radius: 20px; padding: 16px; box-shadow: 0 12px 28px rgba(56,72,122,0.08); min-height: 122px; }
|
| 946 |
+
.summary-label { color: var(--muted); font-size: 12px; font-weight: 700; letter-spacing: .04em; text-transform: uppercase; margin-bottom: 10px; }
|
| 947 |
+
.summary-value { color: var(--text); font-size: 20px; font-weight: 800; line-height: 1.25; word-break: break-word; }
|
| 948 |
+
.summary-sub { margin-top: 14px; }
|
| 949 |
+
.badge-wrap { display:flex; flex-wrap:wrap; gap:8px; align-items:flex-start; }
|
| 950 |
+
.pill { display:inline-flex; align-items:center; gap:6px; padding:7px 11px; border-radius:999px; font-size:12px; font-weight:700; }
|
| 951 |
+
.pill.info { background: rgba(59,130,246,0.12); color:#1d4ed8; }
|
| 952 |
+
.pill.success { background: rgba(16,185,129,0.14); color:#047857; }
|
| 953 |
+
.pill.warning { background: rgba(245,158,11,0.16); color:#b45309; }
|
| 954 |
+
.pill.neutral { background: rgba(100,116,139,0.12); color:#475467; }
|
| 955 |
+
.compare-wrap { display:flex; flex-direction:column; gap:14px; }
|
| 956 |
+
.compare-banner { padding:14px 16px; border-radius:16px; font-weight:800; font-size:14px; border:1px solid transparent; }
|
| 957 |
+
.compare-banner.success { background: var(--success-bg); color: var(--success-text); }
|
| 958 |
+
.compare-banner.warning { background: var(--warning-bg); color: var(--warning-text); }
|
| 959 |
+
.compare-banner.neutral { background: var(--neutral-bg); color: var(--neutral-text); }
|
| 960 |
+
.compare-head { display:grid; grid-template-columns: repeat(2, minmax(0,1fr)); gap:14px; }
|
| 961 |
+
.head-card { background: rgba(255,255,255,0.82); border:1px solid rgba(99,102,241,0.12); border-radius:18px; padding:16px; }
|
| 962 |
+
.head-label { color: var(--muted); font-size:12px; font-weight:700; text-transform:uppercase; letter-spacing:.04em; }
|
| 963 |
+
.head-page { color: var(--primary); font-size:13px; font-weight:700; margin-top:6px; }
|
| 964 |
+
.head-name { color: var(--text); font-size:18px; font-weight:800; margin-top:8px; }
|
| 965 |
+
.compare-table { display:flex; flex-direction:column; gap:12px; }
|
| 966 |
+
.compare-row { display:grid; grid-template-columns:220px 1fr 1fr; gap:12px; align-items:stretch; }
|
| 967 |
+
.compare-field, .compare-cell { background: rgba(255,255,255,0.82); border:1px solid rgba(99,102,241,0.10); border-radius:18px; padding:14px; }
|
| 968 |
+
.compare-row.same .compare-field { background: linear-gradient(180deg, #f0fdf4, #ffffff); }
|
| 969 |
+
.compare-row.different .compare-field { background: linear-gradient(180deg, #fff7ed, #ffffff); }
|
| 970 |
+
.field-name { color: var(--text); font-weight:800; font-size:15px; }
|
| 971 |
+
.field-status { display:inline-block; margin-top:12px; padding:6px 10px; border-radius:999px; font-size:11px; font-weight:800; letter-spacing:.05em; }
|
| 972 |
+
.field-status.same { background: rgba(16,185,129,0.14); color:#047857; }
|
| 973 |
+
.field-status.different { background: rgba(245,158,11,0.16); color:#b45309; }
|
| 974 |
+
.cell-title { color: var(--muted); font-size:12px; font-weight:700; text-transform:uppercase; letter-spacing:.04em; margin-bottom:8px; }
|
| 975 |
+
.cell-content { color: var(--text); font-size:14px; line-height:1.6; white-space:normal; word-break:break-word; }
|
| 976 |
+
.code-block { font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, 'Liberation Mono', monospace; background:#f8fafc; border:1px solid rgba(148,163,184,0.16); border-radius:14px; padding:12px; white-space:pre-wrap; }
|
| 977 |
+
.diff-box { background: rgba(255,255,255,0.76); border:1px solid rgba(99,102,241,0.10); border-radius:18px; padding:14px; }
|
| 978 |
+
.diff-box.same { color:#047857; background: rgba(236,253,243,0.82); }
|
| 979 |
+
.diff-box.different { background: rgba(255,247,237,0.78); }
|
| 980 |
+
.diff-title { font-size:13px; font-weight:800; color: var(--text); margin-bottom:10px; }
|
| 981 |
+
.diff-grid { display:grid; grid-template-columns: repeat(2, minmax(0,1fr)); gap:12px; }
|
| 982 |
+
.diff-col { background: rgba(255,255,255,0.85); border-radius:14px; padding:12px; border:1px dashed rgba(99,102,241,0.12); }
|
| 983 |
+
.diff-col-title { font-size:12px; font-weight:800; color: var(--muted); margin-bottom:8px; text-transform:uppercase; }
|
| 984 |
+
.diff-col ul { margin:0; padding-left:18px; }
|
| 985 |
+
.diff-col li { margin:6px 0; color: var(--text); font-size:13px; }
|
| 986 |
+
.feedback-box { background: rgba(255,255,255,0.76); border:1px solid rgba(99,102,241,0.10); border-radius:18px; padding:16px; margin-top:14px; }
|
| 987 |
+
.feedback-title { font-size:16px; font-weight:800; color: var(--text); margin-bottom:8px; }
|
| 988 |
+
.incident-box { background: rgba(255,247,237,0.78); border:1px solid rgba(245,158,11,0.22); border-radius:16px; padding:14px; margin-top:10px; }
|
| 989 |
+
.incident-title { font-weight:800; color:#9a3412; margin-bottom:6px; }
|
| 990 |
+
.incident-text { color:#7c2d12; margin-bottom:10px; }
|
| 991 |
+
.incident-link { display:inline-block; padding:10px 14px; border-radius:12px; background:#7c3aed; color:white !important; text-decoration:none; font-weight:700; }
|
| 992 |
+
.empty-state { background: rgba(255,255,255,0.74); border:1px dashed rgba(91,91,214,0.20); border-radius:18px; padding:18px; color: var(--muted); }
|
| 993 |
+
@media (max-width:1300px){ .summary-grid{grid-template-columns:repeat(3,minmax(0,1fr));} }
|
| 994 |
+
@media (max-width:1100px){ .summary-grid{grid-template-columns:repeat(2,minmax(0,1fr));} .compare-row{grid-template-columns:1fr;} .compare-head{grid-template-columns:1fr;} .diff-grid{grid-template-columns:1fr;} }
|
| 995 |
+
@media (max-width:700px){ .summary-grid{grid-template-columns:1fr;} }
|
| 996 |
+
</style>
|
| 997 |
+
"""
|
| 998 |
+
|
| 999 |
+
DEFAULT_MAPPING = load_default_excel_if_present()
|
| 1000 |
+
DEFAULT_STATUS = (
|
| 1001 |
+
f"Auto-loaded Excel: {DEFAULT_KPI_EXCEL} | mapped KPI keys: {len(DEFAULT_MAPPING)}" if Path(DEFAULT_KPI_EXCEL).exists() else
|
| 1002 |
+
f"Auto-load Excel not found: place '{DEFAULT_KPI_EXCEL}' next to app.py"
|
| 1003 |
+
)
|
| 1004 |
|
|
|
|
|
|
|
| 1005 |
|
| 1006 |
+
with gr.Blocks() as demo:
|
| 1007 |
+
gr.HTML(CUSTOM_CSS)
|
| 1008 |
+
gr.HTML("""
|
| 1009 |
+
<div class='hero'>
|
| 1010 |
+
<div class='hero-title'>💊 Pharma KPI Chatbot</div>
|
| 1011 |
+
<p class='hero-subtitle'>
|
| 1012 |
+
This is an AI-powered solution for OneCI applications that helps business users quickly access, analyze, and understand key KPIs through natural language queries.
|
| 1013 |
+
It enables faster business insights, improved decision-making, and a more efficient reporting experience across OneCI platforms.
|
| 1014 |
+
</p>
|
| 1015 |
+
</div>
|
| 1016 |
+
""")
|
| 1017 |
+
|
| 1018 |
+
with gr.Row():
|
| 1019 |
+
with gr.Column(scale=4, elem_classes=['panel']):
|
| 1020 |
+
question = gr.Textbox(label='Ask KPI question', placeholder='e.g. OCCP Interactions', lines=2)
|
| 1021 |
+
audience = gr.Dropdown(choices=['Business User', 'Analytics User', 'Leadership'], value='Business User', label='Explain for')
|
| 1022 |
+
excel_status = gr.Markdown(DEFAULT_STATUS)
|
| 1023 |
+
submit_btn = gr.Button('Submit', variant='primary')
|
| 1024 |
+
clear_btn = gr.Button('Clear')
|
| 1025 |
+
|
| 1026 |
+
|
| 1027 |
+
with gr.Column(scale=8, elem_classes=['panel']):
|
| 1028 |
+
summary_cards = gr.HTML('<div class="empty-state">Ask a KPI question to see the summary cards.</div>')
|
| 1029 |
+
with gr.Tab('Definition'):
|
| 1030 |
+
definition = gr.Markdown()
|
| 1031 |
+
with gr.Tab('Business Meaning'):
|
| 1032 |
+
business = gr.Markdown()
|
| 1033 |
+
with gr.Tab('Formula'):
|
| 1034 |
+
formula = gr.Textbox(label='Formula', lines=14)
|
| 1035 |
+
with gr.Tab('Notes'):
|
| 1036 |
+
notes = gr.Markdown()
|
| 1037 |
+
with gr.Tab('Comparison'):
|
| 1038 |
+
comparison = gr.HTML('<div class="empty-state">Comparison results will appear here.</div>')
|
| 1039 |
+
|
| 1040 |
+
excel_mapping_state = gr.State(DEFAULT_MAPPING)
|
| 1041 |
+
current_kpi_state = gr.State('')
|
| 1042 |
+
|
| 1043 |
+
with gr.Group(visible=False) as feedback_panel:
|
| 1044 |
+
gr.HTML("<div class='feedback-box'><div class='feedback-title'>Are you satisfied with the definition?</div></div>")
|
| 1045 |
+
satisfied_choice = gr.Radio(choices=['Yes', 'No'], label='Was the definition satisfactory?', visible=True)
|
| 1046 |
+
with gr.Row(visible=False) as rating_row:
|
| 1047 |
+
rating_value = gr.Radio(choices=['1', '2', '3', '4', '5'], label='Rate the definition (1 to 5)')
|
| 1048 |
+
rating_submit_btn = gr.Button('Submit Rating')
|
| 1049 |
+
rating_status = gr.Markdown(visible=False)
|
| 1050 |
+
with gr.Column(visible=False) as followup_row:
|
| 1051 |
+
followup_question = gr.Textbox(label='Ask more', placeholder='Please ask your follow-up question here', lines=3)
|
| 1052 |
+
followup_submit_btn = gr.Button('Ask More', variant='primary')
|
| 1053 |
+
with gr.Row(visible=False) as still_not_satisfied_row:
|
| 1054 |
+
still_not_satisfied_choice = gr.Radio(choices=['Yes', 'No'], label='Still not satisfied after the follow-up?')
|
| 1055 |
+
feedback_status = gr.Markdown(visible=False)
|
| 1056 |
+
incident_html = gr.HTML(visible=False)
|
| 1057 |
+
|
| 1058 |
+
submit_btn.click(
|
| 1059 |
+
fn=run_search_and_prepare_feedback,
|
| 1060 |
+
inputs=[question, audience, excel_mapping_state],
|
| 1061 |
+
outputs=[
|
| 1062 |
+
summary_cards, definition, business, formula, notes, comparison,
|
| 1063 |
+
current_kpi_state,
|
| 1064 |
+
feedback_panel, satisfied_choice, rating_row, rating_value,
|
| 1065 |
+
rating_status, followup_row, followup_question,
|
| 1066 |
+
still_not_satisfied_row, still_not_satisfied_choice,
|
| 1067 |
+
feedback_status, incident_html,
|
| 1068 |
+
],
|
| 1069 |
+
)
|
| 1070 |
+
|
| 1071 |
+
satisfied_choice.change(fn=on_satisfaction_change, inputs=[satisfied_choice], outputs=[rating_row, followup_row, still_not_satisfied_row, incident_html, feedback_status])
|
| 1072 |
+
rating_submit_btn.click(fn=submit_rating, inputs=[rating_value], outputs=[rating_status])
|
| 1073 |
+
followup_submit_btn.click(
|
| 1074 |
+
fn=run_followup_search,
|
| 1075 |
+
inputs=[followup_question, audience, current_kpi_state, excel_mapping_state],
|
| 1076 |
+
outputs=[
|
| 1077 |
+
summary_cards, definition, business, formula, notes, comparison,
|
| 1078 |
+
current_kpi_state,
|
| 1079 |
+
feedback_panel, satisfied_choice, rating_row, rating_value,
|
| 1080 |
+
rating_status, followup_row, followup_question,
|
| 1081 |
+
still_not_satisfied_row, still_not_satisfied_choice,
|
| 1082 |
+
feedback_status, incident_html,
|
| 1083 |
+
],
|
| 1084 |
+
)
|
| 1085 |
+
still_not_satisfied_choice.change(fn=on_still_not_satisfied_change, inputs=[still_not_satisfied_choice], outputs=[incident_html, feedback_status])
|
| 1086 |
+
clear_btn.click(
|
| 1087 |
+
fn=clear_all,
|
| 1088 |
+
inputs=[excel_mapping_state],
|
| 1089 |
+
outputs=[
|
| 1090 |
+
question, audience, summary_cards, definition, business, formula, notes, comparison,
|
| 1091 |
+
excel_mapping_state, current_kpi_state,
|
| 1092 |
+
feedback_panel, satisfied_choice, rating_row, rating_value,
|
| 1093 |
+
rating_status, followup_row, followup_question,
|
| 1094 |
+
still_not_satisfied_row, still_not_satisfied_choice,
|
| 1095 |
+
feedback_status, incident_html,
|
| 1096 |
+
],
|
| 1097 |
+
)
|
| 1098 |
|
| 1099 |
demo.launch()
|