Barisha commited on
Commit
a8608f2
Β·
verified Β·
1 Parent(s): e1be192

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +329 -68
app.py CHANGED
@@ -1,83 +1,344 @@
 
 
 
1
  import gradio as gr
2
  import numpy as np
3
  import pandas as pd
 
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
- import torch
6
-
7
- MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
8
-
9
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
10
- model = AutoModelForCausalLM.from_pretrained(
11
- MODEL_NAME,
12
- torch_dtype=torch.float32,
13
- device_map="cpu"
14
- )
15
-
16
- # ----- Trend detection -----
17
- def detect_trend(values):
18
- diffs = np.diff(values)
19
- if all(d > 0 for d in diffs):
20
- return "INCREASING"
21
- elif all(d < 0 for d in diffs):
22
- return "DECREASING"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  else:
24
- return "MIXED"
25
-
26
- # ----- Anomaly detection -----
27
- def detect_anomaly(values):
28
- mean = np.mean(values)
29
- std = np.std(values)
30
- anomalies = [(i, v) for i, v in enumerate(values) if abs(v - mean) > 2 * std]
31
- return "No anomalies detected" if len(anomalies) == 0 else str(anomalies)
32
-
33
- # ----- LLM explanation -----
34
- def explanation(entity, values, trend):
35
- prompt = f"""
36
- You are a KPI analysis expert.
37
- The entity is: {entity}
38
- The values are: {values}
39
- The detected trend is: {trend}
40
- Explain in simple words why the trend is {trend}.
41
- """
42
- inputs = tokenizer(prompt, return_tensors="pt")
43
- outputs = model.generate(**inputs, max_new_tokens=150)
44
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
45
-
46
- # ----- Wrapper with loading message -----
47
- def analyze_with_spinner(entity, value_string):
48
- yield "⏳ **Analyzing... please wait...**"
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  try:
50
- values = [float(x.strip()) for x in value_string.split(",")]
51
- except:
52
- yield "❌ Error: Please enter numbers separated by commas"
53
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
- trend = detect_trend(values)
56
- anomaly = detect_anomaly(values)
57
- exp = explanation(entity, values, trend)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
- yield f"""
60
- πŸ“Œ **Entity:** {entity}
61
- πŸ“‰ **Trend:** {trend}
62
- ⚠️ **Anomalies:** {anomaly}
63
- 🧠 **Explanation:**
64
- {exp}
65
- """
 
 
 
 
 
 
66
 
67
- # ----- UI -----
68
  with gr.Blocks() as demo:
69
- gr.Markdown("# πŸ“ˆ KPI Analyzer")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
- entity = gr.Textbox(label="KPI Name (example: volte_rate)")
72
- values = gr.Textbox(label="Values (comma separated, example: 10,11,13,14,15)")
73
- output = gr.Markdown()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
- run_btn = gr.Button("Analyze")
76
 
77
- run_btn.click(
78
- fn=analyze_with_spinner,
79
- inputs=[entity, values],
80
- outputs=output,
81
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  demo.launch()
 
1
+ import io
2
+ import math
3
+ import time
4
  import gradio as gr
5
  import numpy as np
6
  import pandas as pd
7
+ import matplotlib.pyplot as plt
8
  from transformers import AutoTokenizer, AutoModelForCausalLM
9
+
10
+ # ---------- CONFIG ----------
11
+ # CPU-friendly model for optional explanations
12
+ LLM_NAME = "microsoft/Phi-3-mini-4k-instruct" # works in free HF Spaces
13
+ # LLM will be loaded lazily only if user requests explanation
14
+
15
+ # Globals for lazy LLM load
16
+ _llm_tokenizer = None
17
+ _llm_model = None
18
+ _llm_loaded = False
19
+
20
+ def load_llm():
21
+ global _llm_tokenizer, _llm_model, _llm_loaded
22
+ if _llm_loaded:
23
+ return
24
+ _llm_tokenizer = AutoTokenizer.from_pretrained(LLM_NAME)
25
+ _llm_model = AutoModelForCausalLM.from_pretrained(
26
+ LLM_NAME,
27
+ device_map="cpu"
28
+ )
29
+ _llm_loaded = True
30
+
31
+ # ---------- DATA HELPERS ----------
32
+ def try_parse_dates(df):
33
+ # Try common names, otherwise look for datetime-like columns
34
+ for col in df.columns:
35
+ if col.lower() in ["date", "day", "timestamp", "time"]:
36
+ try:
37
+ df[col] = pd.to_datetime(df[col])
38
+ return col, df
39
+ except:
40
+ continue
41
+ # fallback: find first datetime-like parseable column
42
+ for col in df.columns:
43
+ try:
44
+ parsed = pd.to_datetime(df[col])
45
+ # ensure parse converted something
46
+ if parsed.notna().sum() > 0:
47
+ df[col] = parsed
48
+ return col, df
49
+ except:
50
+ continue
51
+ return None, df
52
+
53
+ def numeric_kpis(df, date_col=None):
54
+ if date_col:
55
+ numeric = df.drop(columns=[date_col]).select_dtypes(include=[np.number]).columns.tolist()
56
+ else:
57
+ numeric = df.select_dtypes(include=[np.number]).columns.tolist()
58
+ return numeric
59
+
60
+ # ---------- ANALYSIS METRICS ----------
61
+ def calc_metrics(series, dates=None):
62
+ # series: pandas Series indexed by time order (or simple sequence)
63
+ vals = series.dropna().astype(float)
64
+ if len(vals) < 2:
65
+ return {
66
+ "trend": "MIXED",
67
+ "slope": 0.0,
68
+ "std": float(np.std(vals)) if len(vals)>0 else 0.0,
69
+ "pct_change": 0.0,
70
+ "score": 0.0
71
+ }
72
+ # slope via polyfit against integer time index to be robust to irregular dates
73
+ x = np.arange(len(vals))
74
+ y = vals.values
75
+ slope = np.polyfit(x, y, 1)[0]
76
+ std = float(np.std(y))
77
+ first, last = float(y[0]), float(y[-1])
78
+ if first == 0:
79
+ pct_change = float("inf") if last != 0 else 0.0
80
+ else:
81
+ pct_change = (last - first) / abs(first)
82
+ # simple trend label
83
+ if np.all(np.diff(y) > 0):
84
+ trend = "INCREASING"
85
+ elif np.all(np.diff(y) < 0):
86
+ trend = "DECREASING"
87
+ else:
88
+ trend = "MIXED"
89
+ # score combining magnitude and noisiness:
90
+ # higher for large absolute slope, large percent change, lower for noise (std)
91
+ score = abs(slope) * (abs(pct_change) + 1e-6) / (std + 1e-6)
92
+ # normalize a bit for display
93
+ score = float(score)
94
+ return {
95
+ "trend": trend,
96
+ "slope": float(slope),
97
+ "std": std,
98
+ "pct_change": pct_change,
99
+ "score": score
100
+ }
101
+
102
+ def detect_anomalies(series, threshold_sigma=2.0):
103
+ y = series.dropna().astype(float).values
104
+ if len(y) == 0:
105
+ return []
106
+ mean = np.mean(y)
107
+ std = np.std(y)
108
+ anomalies = []
109
+ for idx, val in enumerate(y):
110
+ if std == 0:
111
+ continue
112
+ if abs(val - mean) > threshold_sigma * std:
113
+ anomalies.append((idx, float(val)))
114
+ return anomalies
115
+
116
+ # ---------- PLOTTING ----------
117
+ def plot_top_scores(df_scores, top_k=5):
118
+ top = df_scores.sort_values("score", ascending=False).head(top_k)
119
+ fig, ax = plt.subplots(figsize=(6, 3.5))
120
+ ax.bar(top["kpi"], top["score"])
121
+ ax.set_title(f"Top {top_k} KPIs by change score")
122
+ ax.set_ylabel("Score")
123
+ ax.set_xlabel("KPI")
124
+ plt.xticks(rotation=45, ha="right")
125
+ plt.tight_layout()
126
+ buf = io.BytesIO()
127
+ fig.savefig(buf, format="png")
128
+ plt.close(fig)
129
+ buf.seek(0)
130
+ return buf
131
+
132
+ def plot_time_series_with_anomalies(series):
133
+ y = series.dropna().astype(float)
134
+ if y.empty:
135
+ fig, ax = plt.subplots(figsize=(6,3))
136
+ ax.text(0.5, 0.5, "No numeric data", ha="center")
137
  else:
138
+ fig, ax = plt.subplots(figsize=(6,3.5))
139
+ ax.plot(y.index, y.values, marker="o")
140
+ anomalies = detect_anomalies(y)
141
+ if anomalies:
142
+ idxs = [a[0] for a in anomalies]
143
+ vals = [a[1] for a in anomalies]
144
+ # map numeric index to index labels
145
+ labels = y.index[idxs]
146
+ ax.scatter(labels, vals, color='red', zorder=5)
147
+ ax.set_title("Time series (with anomalies in red)")
148
+ plt.xticks(rotation=45, ha="right")
149
+ plt.tight_layout()
150
+ buf = io.BytesIO()
151
+ fig.savefig(buf, format="png")
152
+ plt.close(fig)
153
+ buf.seek(0)
154
+ return buf
155
+
156
+ # ---------- LLM EXPLANATION ----------
157
+ def llm_explain(kpi_name, values_list, trend_label):
158
+ # lazy load
159
+ if not _llm_loaded:
160
+ load_llm()
161
+ prompt = f"""You are a concise KPI analytics assistant.
162
+ KPI: {kpi_name}
163
+ Values (in order): {values_list}
164
+ Detected trend: {trend_label}
165
+
166
+ Give a 2-3 sentence explanation of what likely happened and possible reasons (short).
167
+ Also provide a one-line suggestion to check further."""
168
+ inputs = _llm_tokenizer(prompt, return_tensors="pt")
169
+ outputs = _llm_model.generate(**inputs, max_new_tokens=120)
170
+ text = _llm_tokenizer.decode(outputs[0], skip_special_tokens=True)
171
+ return text.strip()
172
+
173
+ # ---------- MAIN ANALYSIS FUNCTION ----------
174
+ def analyze_csv(file_obj, date_col_choice, selected_kpis, top_k=5, explanation=False):
175
+ # Read CSV
176
  try:
177
+ df = pd.read_csv(file_obj.name)
178
+ except Exception as e:
179
+ return {"error": f"Failed to read CSV: {e}"}
180
+ # parse date if user selected
181
+ if date_col_choice and date_col_choice in df.columns:
182
+ try:
183
+ df[date_col_choice] = pd.to_datetime(df[date_col_choice])
184
+ except:
185
+ pass
186
+ df = df.sort_values(by=date_col_choice).reset_index(drop=True)
187
+ # Build scores for each KPI
188
+ scores = []
189
+ for kpi in selected_kpis:
190
+ series = df[kpi]
191
+ # if date column present, use it as index for plotting
192
+ if date_col_choice in df.columns:
193
+ series = series.copy()
194
+ series.index = df[date_col_choice]
195
+ metrics = calc_metrics(series)
196
+ anomalies = detect_anomalies(series)
197
+ scores.append({
198
+ "kpi": kpi,
199
+ "trend": metrics["trend"],
200
+ "slope": metrics["slope"],
201
+ "std": metrics["std"],
202
+ "pct_change": metrics["pct_change"],
203
+ "score": metrics["score"],
204
+ "anomalies": anomalies,
205
+ "values": series.tolist() if hasattr(series, "tolist") else list(series)
206
+ })
207
+ score_df = pd.DataFrame(scores)
208
+ score_df = score_df.sort_values("score", ascending=False).reset_index(drop=True)
209
+ # Top-K figure
210
+ fig_buf = plot_top_scores(score_df, top_k=top_k)
211
+ # Prepare per-kpi detail for the first top one
212
+ top_kpis = score_df.head(top_k)["kpi"].tolist()
213
+ details = []
214
+ explanations = {}
215
+ if explanation:
216
+ # generate LLM explanation for each of top_k
217
+ for r in score_df.head(top_k).itertuples():
218
+ try:
219
+ expl = llm_explain(r.kpi, r.values, r.trend)
220
+ except Exception as e:
221
+ expl = f"LLM error: {e}"
222
+ explanations[r.kpi] = expl
223
+ # return structured output
224
+ return {
225
+ "score_df": score_df,
226
+ "top_kpis": top_kpis,
227
+ "top_chart": fig_buf,
228
+ "explanations": explanations,
229
+ "raw_df_head": df.head().to_csv(index=False)
230
+ }
231
 
232
+ # ---------- GRADIO UI ----------
233
+ def on_upload(file):
234
+ # called when file uploaded; return detected suggestions
235
+ try:
236
+ df = pd.read_csv(file.name)
237
+ except Exception as e:
238
+ return gr.update(visible=True, value=f"Failed to read CSV: {e}"), [], []
239
+ date_col, df = try_parse_dates(df)
240
+ numeric = numeric_kpis(df, date_col)
241
+ # default select top numeric columns (first 10)
242
+ default_selected = numeric[:10]
243
+ preview_csv = df.head().to_csv(index=False)
244
+ return gr.update(visible=False, value=""), numeric, default_selected, date_col or ""
245
+
246
+ def run_analysis(file, date_col, selected_kpis, top_k, explanation_toggle):
247
+ if file is None:
248
+ return "❌ Upload a CSV first.", None, None, None, None
249
+ result = analyze_csv(file, date_col, selected_kpis, top_k=top_k, explanation=explanation_toggle)
250
+ if "error" in result:
251
+ return f"❌ {result['error']}", None, None, None, None
252
+ score_df = result["score_df"]
253
+ # format pct_change for readability
254
+ score_df_display = score_df.copy()
255
+ score_df_display["pct_change"] = score_df_display["pct_change"].apply(lambda x: f"{x*100:.2f}%" if np.isfinite(x) else "inf")
256
+ score_df_display["score"] = score_df_display["score"].round(4)
257
+ # top chart image
258
+ chart = result["top_chart"]
259
+ explanations = result["explanations"]
260
+ raw_preview = result["raw_df_head"]
261
+ return "βœ… Analysis complete.", score_df_display, chart, explanations, raw_preview
262
 
263
+ def show_kpi_detail(file, date_col, kpi_name):
264
+ if file is None or kpi_name is None:
265
+ return None, "Upload CSV and select a KPI"
266
+ df = pd.read_csv(file.name)
267
+ if date_col and date_col in df.columns:
268
+ df[date_col] = pd.to_datetime(df[date_col])
269
+ series = df.set_index(date_col)[kpi_name]
270
+ else:
271
+ series = df[kpi_name]
272
+ imgbuf = plot_time_series_with_anomalies(series)
273
+ anomalies = detect_anomalies(series)
274
+ text = f"Anomalies (index, value): {anomalies}" if anomalies else "No anomalies detected"
275
+ return imgbuf, text
276
 
 
277
  with gr.Blocks() as demo:
278
+ gr.Markdown("## πŸ“Š KPI Multi-Column Trend Analyzer & Ranker")
279
+ gr.Markdown("Upload a CSV (date column optional). Select KPI columns to analyze, pick Top-K, and (optionally) ask for LLM explanations.")
280
+ with gr.Row():
281
+ csv_in = gr.File(label="Upload CSV (required)")
282
+ upload_msg = gr.Textbox(value="", interactive=False, visible=False)
283
+ csv_in.change(fn=on_upload, inputs=[csv_in], outputs=[upload_msg, gr.State(), gr.State(), gr.State()], api_name="on_upload")
284
+ # We'll call on_upload logic directly inside run call to populate choices: simpler approach below
285
+ with gr.Row():
286
+ date_col = gr.Textbox(label="Date column (leave empty to auto-detect)", placeholder="e.g. date")
287
+ kpi_choices = gr.Dropdown(choices=[], multiselect=True, label="Select KPI columns (numeric)", info="Pick KPI columns to include in analysis")
288
+ top_k = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Top K KPIs to show")
289
+ explanation_toggle = gr.Checkbox(label="Generate LLM explanations for Top-K KPIs (slower)", value=False)
290
+ analyze_btn = gr.Button("Run Analysis")
291
+ status = gr.Markdown("", visible=True)
292
+ result_table = gr.Dataframe(headers=["kpi","trend","slope","std","pct_change","score","anomalies"], label="Scores (sorted)")
293
+ chart_output = gr.Image(type="pil", label="Top-K Score Chart")
294
+ explanations_out = gr.Textbox(label="LLM Explanations (Top-K)", lines=6)
295
+ raw_preview = gr.Textbox(label="CSV preview (first rows)", lines=6)
296
 
297
+ # populate kpi choices when the file changes: we do it by running a tiny helper on file change
298
+ def populate_choices(file, date_guess):
299
+ if file is None:
300
+ return [], []
301
+ try:
302
+ df = pd.read_csv(file.name)
303
+ except Exception as e:
304
+ return [], []
305
+ guessed_date, df = try_parse_dates(df)
306
+ if date_guess and date_guess in df.columns:
307
+ used_date = date_guess
308
+ else:
309
+ used_date = guessed_date
310
+ numeric = numeric_kpis(df, used_date)
311
+ # default select up to 10
312
+ default = numeric[:10]
313
+ return numeric, default
314
 
315
+ csv_in.change(fn=populate_choices, inputs=[csv_in, date_col], outputs=[kpi_choices, kpi_choices])
316
 
317
+ def run_all(file, date_col_text, kpi_list, top_k_val, explanation_flag):
318
+ # populate error if no file or no kpis
319
+ if file is None:
320
+ return "❌ Upload CSV first", None, None, None, None
321
+ if not kpi_list:
322
+ return "❌ Select at least one KPI column", None, None, None, None
323
+ status_text, score_df_display, chart_buf, explanations, raw_csv = run_analysis(file, date_col_text, kpi_list, top_k_val, explanation_flag)
324
+ # explanations dict -> string
325
+ expl_text = "\n\n".join([f"{k}:\n{v}" for k, v in (explanations or {}).items()])
326
+ # chart_buf is BytesIO
327
+ chart_img = None
328
+ if chart_buf is not None:
329
+ chart_img = chart_buf
330
+ return status_text, score_df_display, chart_img, expl_text, raw_csv
331
+
332
+ analyze_btn.click(fn=run_all, inputs=[csv_in, date_col, kpi_choices, top_k, explanation_toggle], outputs=[status, result_table, chart_output, explanations_out, raw_preview])
333
+
334
+ # KPI detail UI
335
+ gr.Markdown("### Per-KPI detail (select KPI name and click Show)")
336
+ detail_kpi = gr.Dropdown(choices=[], label="Pick KPI to inspect (use results table to pick)")
337
+ csv_in.change(lambda f: [], inputs=[csv_in], outputs=[detail_kpi]) # placeholder to refresh UI state
338
+ show_btn = gr.Button("Show KPI detail")
339
+ detail_plot = gr.Image(type="pil", label="Time series + anomalies")
340
+ detail_text = gr.Textbox(label="Anomaly summary", lines=3)
341
+ # when result_table updates, populate detail_kpi choices from it (we can't directly get it; user picks)
342
+ show_btn.click(fn=show_kpi_detail, inputs=[csv_in, date_col, detail_kpi], outputs=[detail_plot, detail_text])
343
 
344
  demo.launch()