Spaces:

Barisha
/

summary

Sleeping

App Files Files Community

Barisha commited on Dec 6, 2025

Commit

a8608f2

verified ·

1 Parent(s): e1be192

Update app.py

Browse files

Files changed (1) hide show

app.py +329 -68

app.py CHANGED Viewed

@@ -1,83 +1,344 @@
 import gradio as gr
 import numpy as np
 import pandas as pd
 from transformers import AutoTokenizer, AutoModelForCausalLM
-import torch
-MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_NAME,
-    torch_dtype=torch.float32,
-    device_map="cpu"
-)
-# ----- Trend detection -----
-def detect_trend(values):
-    diffs = np.diff(values)
-    if all(d > 0 for d in diffs):
-        return "INCREASING"
-    elif all(d < 0 for d in diffs):
-        return "DECREASING"
     else:
-        return "MIXED"
-# ----- Anomaly detection -----
-def detect_anomaly(values):
-    mean = np.mean(values)
-    std = np.std(values)
-    anomalies = [(i, v) for i, v in enumerate(values) if abs(v - mean) > 2 * std]
-    return "No anomalies detected" if len(anomalies) == 0 else str(anomalies)
-# ----- LLM explanation -----
-def explanation(entity, values, trend):
-    prompt = f"""
-You are a KPI analysis expert.
-The entity is: {entity}
-The values are: {values}
-The detected trend is: {trend}
-Explain in simple words why the trend is {trend}.
-"""
-    inputs = tokenizer(prompt, return_tensors="pt")
-    outputs = model.generate(**inputs, max_new_tokens=150)
-    return tokenizer.decode(outputs[0], skip_special_tokens=True)
-# ----- Wrapper with loading message -----
-def analyze_with_spinner(entity, value_string):
-    yield "⏳ **Analyzing... please wait...**"
     try:
-        values = [float(x.strip()) for x in value_string.split(",")]
-    except:
-        yield "❌ Error: Please enter numbers separated by commas"
-        return
-    trend = detect_trend(values)
-    anomaly = detect_anomaly(values)
-    exp = explanation(entity, values, trend)
-    yield f"""
-📌 **Entity:** {entity}
-📉 **Trend:** {trend}
-⚠️ **Anomalies:** {anomaly}
-🧠 **Explanation:**
-{exp}
-"""
-# ----- UI -----
 with gr.Blocks() as demo:
-    gr.Markdown("# 📈 KPI Analyzer")
-    entity = gr.Textbox(label="KPI Name (example: volte_rate)")
-    values = gr.Textbox(label="Values (comma separated, example: 10,11,13,14,15)")
-    output = gr.Markdown()
-    run_btn = gr.Button("Analyze")
-    run_btn.click(
-        fn=analyze_with_spinner,
-        inputs=[entity, values],
-        outputs=output,
-    )
 demo.launch()

+import io
+import math
+import time
 import gradio as gr
 import numpy as np
 import pandas as pd
+import matplotlib.pyplot as plt
 from transformers import AutoTokenizer, AutoModelForCausalLM
+# ---------- CONFIG ----------
+# CPU-friendly model for optional explanations
+LLM_NAME = "microsoft/Phi-3-mini-4k-instruct"  # works in free HF Spaces
+# LLM will be loaded lazily only if user requests explanation
+# Globals for lazy LLM load
+_llm_tokenizer = None
+_llm_model = None
+_llm_loaded = False
+def load_llm():
+    global _llm_tokenizer, _llm_model, _llm_loaded
+    if _llm_loaded:
+        return
+    _llm_tokenizer = AutoTokenizer.from_pretrained(LLM_NAME)
+    _llm_model = AutoModelForCausalLM.from_pretrained(
+        LLM_NAME,
+        device_map="cpu"
+    )
+    _llm_loaded = True
+# ---------- DATA HELPERS ----------
+def try_parse_dates(df):
+    # Try common names, otherwise look for datetime-like columns
+    for col in df.columns:
+        if col.lower() in ["date", "day", "timestamp", "time"]:
+            try:
+                df[col] = pd.to_datetime(df[col])
+                return col, df
+            except:
+                continue
+    # fallback: find first datetime-like parseable column
+    for col in df.columns:
+        try:
+            parsed = pd.to_datetime(df[col])
+            # ensure parse converted something
+            if parsed.notna().sum() > 0:
+                df[col] = parsed
+                return col, df
+        except:
+            continue
+    return None, df
+def numeric_kpis(df, date_col=None):
+    if date_col:
+        numeric = df.drop(columns=[date_col]).select_dtypes(include=[np.number]).columns.tolist()
+    else:
+        numeric = df.select_dtypes(include=[np.number]).columns.tolist()
+    return numeric
+# ---------- ANALYSIS METRICS ----------
+def calc_metrics(series, dates=None):
+    # series: pandas Series indexed by time order (or simple sequence)
+    vals = series.dropna().astype(float)
+    if len(vals) < 2:
+        return {
+            "trend": "MIXED",
+            "slope": 0.0,
+            "std": float(np.std(vals)) if len(vals)>0 else 0.0,
+            "pct_change": 0.0,
+            "score": 0.0
+        }
+    # slope via polyfit against integer time index to be robust to irregular dates
+    x = np.arange(len(vals))
+    y = vals.values
+    slope = np.polyfit(x, y, 1)[0]
+    std = float(np.std(y))
+    first, last = float(y[0]), float(y[-1])
+    if first == 0:
+        pct_change = float("inf") if last != 0 else 0.0
+    else:
+        pct_change = (last - first) / abs(first)
+    # simple trend label
+    if np.all(np.diff(y) > 0):
+        trend = "INCREASING"
+    elif np.all(np.diff(y) < 0):
+        trend = "DECREASING"
+    else:
+        trend = "MIXED"
+    # score combining magnitude and noisiness:
+    # higher for large absolute slope, large percent change, lower for noise (std)
+    score = abs(slope) * (abs(pct_change) + 1e-6) / (std + 1e-6)
+    # normalize a bit for display
+    score = float(score)
+    return {
+        "trend": trend,
+        "slope": float(slope),
+        "std": std,
+        "pct_change": pct_change,
+        "score": score
+    }
+def detect_anomalies(series, threshold_sigma=2.0):
+    y = series.dropna().astype(float).values
+    if len(y) == 0:
+        return []
+    mean = np.mean(y)
+    std = np.std(y)
+    anomalies = []
+    for idx, val in enumerate(y):
+        if std == 0:
+            continue
+        if abs(val - mean) > threshold_sigma * std:
+            anomalies.append((idx, float(val)))
+    return anomalies
+# ---------- PLOTTING ----------
+def plot_top_scores(df_scores, top_k=5):
+    top = df_scores.sort_values("score", ascending=False).head(top_k)
+    fig, ax = plt.subplots(figsize=(6, 3.5))
+    ax.bar(top["kpi"], top["score"])
+    ax.set_title(f"Top {top_k} KPIs by change score")
+    ax.set_ylabel("Score")
+    ax.set_xlabel("KPI")
+    plt.xticks(rotation=45, ha="right")
+    plt.tight_layout()
+    buf = io.BytesIO()
+    fig.savefig(buf, format="png")
+    plt.close(fig)
+    buf.seek(0)
+    return buf
+def plot_time_series_with_anomalies(series):
+    y = series.dropna().astype(float)
+    if y.empty:
+        fig, ax = plt.subplots(figsize=(6,3))
+        ax.text(0.5, 0.5, "No numeric data", ha="center")
     else:
+        fig, ax = plt.subplots(figsize=(6,3.5))
+        ax.plot(y.index, y.values, marker="o")
+        anomalies = detect_anomalies(y)
+        if anomalies:
+            idxs = [a[0] for a in anomalies]
+            vals = [a[1] for a in anomalies]
+            # map numeric index to index labels
+            labels = y.index[idxs]
+            ax.scatter(labels, vals, color='red', zorder=5)
+        ax.set_title("Time series (with anomalies in red)")
+        plt.xticks(rotation=45, ha="right")
+    plt.tight_layout()
+    buf = io.BytesIO()
+    fig.savefig(buf, format="png")
+    plt.close(fig)
+    buf.seek(0)
+    return buf
+# ---------- LLM EXPLANATION ----------
+def llm_explain(kpi_name, values_list, trend_label):
+    # lazy load
+    if not _llm_loaded:
+        load_llm()
+    prompt = f"""You are a concise KPI analytics assistant.
+KPI: {kpi_name}
+Values (in order): {values_list}
+Detected trend: {trend_label}
+Give a 2-3 sentence explanation of what likely happened and possible reasons (short).
+Also provide a one-line suggestion to check further."""
+    inputs = _llm_tokenizer(prompt, return_tensors="pt")
+    outputs = _llm_model.generate(**inputs, max_new_tokens=120)
+    text = _llm_tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return text.strip()
+# ---------- MAIN ANALYSIS FUNCTION ----------
+def analyze_csv(file_obj, date_col_choice, selected_kpis, top_k=5, explanation=False):
+    # Read CSV
     try:
+        df = pd.read_csv(file_obj.name)
+    except Exception as e:
+        return {"error": f"Failed to read CSV: {e}"}
+    # parse date if user selected
+    if date_col_choice and date_col_choice in df.columns:
+        try:
+            df[date_col_choice] = pd.to_datetime(df[date_col_choice])
+        except:
+            pass
+        df = df.sort_values(by=date_col_choice).reset_index(drop=True)
+    # Build scores for each KPI
+    scores = []
+    for kpi in selected_kpis:
+        series = df[kpi]
+        # if date column present, use it as index for plotting
+        if date_col_choice in df.columns:
+            series = series.copy()
+            series.index = df[date_col_choice]
+        metrics = calc_metrics(series)
+        anomalies = detect_anomalies(series)
+        scores.append({
+            "kpi": kpi,
+            "trend": metrics["trend"],
+            "slope": metrics["slope"],
+            "std": metrics["std"],
+            "pct_change": metrics["pct_change"],
+            "score": metrics["score"],
+            "anomalies": anomalies,
+            "values": series.tolist() if hasattr(series, "tolist") else list(series)
+        })
+    score_df = pd.DataFrame(scores)
+    score_df = score_df.sort_values("score", ascending=False).reset_index(drop=True)
+    # Top-K figure
+    fig_buf = plot_top_scores(score_df, top_k=top_k)
+    # Prepare per-kpi detail for the first top one
+    top_kpis = score_df.head(top_k)["kpi"].tolist()
+    details = []
+    explanations = {}
+    if explanation:
+        # generate LLM explanation for each of top_k
+        for r in score_df.head(top_k).itertuples():
+            try:
+                expl = llm_explain(r.kpi, r.values, r.trend)
+            except Exception as e:
+                expl = f"LLM error: {e}"
+            explanations[r.kpi] = expl
+    # return structured output
+    return {
+        "score_df": score_df,
+        "top_kpis": top_kpis,
+        "top_chart": fig_buf,
+        "explanations": explanations,
+        "raw_df_head": df.head().to_csv(index=False)
+    }
+# ---------- GRADIO UI ----------
+def on_upload(file):
+    # called when file uploaded; return detected suggestions
+    try:
+        df = pd.read_csv(file.name)
+    except Exception as e:
+        return gr.update(visible=True, value=f"Failed to read CSV: {e}"), [], []
+    date_col, df = try_parse_dates(df)
+    numeric = numeric_kpis(df, date_col)
+    # default select top numeric columns (first 10)
+    default_selected = numeric[:10]
+    preview_csv = df.head().to_csv(index=False)
+    return gr.update(visible=False, value=""), numeric, default_selected, date_col or ""
+def run_analysis(file, date_col, selected_kpis, top_k, explanation_toggle):
+    if file is None:
+        return "❌ Upload a CSV first.", None, None, None, None
+    result = analyze_csv(file, date_col, selected_kpis, top_k=top_k, explanation=explanation_toggle)
+    if "error" in result:
+        return f"❌ {result['error']}", None, None, None, None
+    score_df = result["score_df"]
+    # format pct_change for readability
+    score_df_display = score_df.copy()
+    score_df_display["pct_change"] = score_df_display["pct_change"].apply(lambda x: f"{x*100:.2f}%" if np.isfinite(x) else "inf")
+    score_df_display["score"] = score_df_display["score"].round(4)
+    # top chart image
+    chart = result["top_chart"]
+    explanations = result["explanations"]
+    raw_preview = result["raw_df_head"]
+    return "✅ Analysis complete.", score_df_display, chart, explanations, raw_preview
+def show_kpi_detail(file, date_col, kpi_name):
+    if file is None or kpi_name is None:
+        return None, "Upload CSV and select a KPI"
+    df = pd.read_csv(file.name)
+    if date_col and date_col in df.columns:
+        df[date_col] = pd.to_datetime(df[date_col])
+        series = df.set_index(date_col)[kpi_name]
+    else:
+        series = df[kpi_name]
+    imgbuf = plot_time_series_with_anomalies(series)
+    anomalies = detect_anomalies(series)
+    text = f"Anomalies (index, value): {anomalies}" if anomalies else "No anomalies detected"
+    return imgbuf, text
 with gr.Blocks() as demo:
+    gr.Markdown("## 📊 KPI Multi-Column Trend Analyzer & Ranker")
+    gr.Markdown("Upload a CSV (date column optional). Select KPI columns to analyze, pick Top-K, and (optionally) ask for LLM explanations.")
+    with gr.Row():
+        csv_in = gr.File(label="Upload CSV (required)")
+        upload_msg = gr.Textbox(value="", interactive=False, visible=False)
+    csv_in.change(fn=on_upload, inputs=[csv_in], outputs=[upload_msg, gr.State(), gr.State(), gr.State()], api_name="on_upload")
+    # We'll call on_upload logic directly inside run call to populate choices: simpler approach below
+    with gr.Row():
+        date_col = gr.Textbox(label="Date column (leave empty to auto-detect)", placeholder="e.g. date")
+    kpi_choices = gr.Dropdown(choices=[], multiselect=True, label="Select KPI columns (numeric)", info="Pick KPI columns to include in analysis")
+    top_k = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Top K KPIs to show")
+    explanation_toggle = gr.Checkbox(label="Generate LLM explanations for Top-K KPIs (slower)", value=False)
+    analyze_btn = gr.Button("Run Analysis")
+    status = gr.Markdown("", visible=True)
+    result_table = gr.Dataframe(headers=["kpi","trend","slope","std","pct_change","score","anomalies"], label="Scores (sorted)")
+    chart_output = gr.Image(type="pil", label="Top-K Score Chart")
+    explanations_out = gr.Textbox(label="LLM Explanations (Top-K)", lines=6)
+    raw_preview = gr.Textbox(label="CSV preview (first rows)", lines=6)
+    # populate kpi choices when the file changes: we do it by running a tiny helper on file change
+    def populate_choices(file, date_guess):
+        if file is None:
+            return [], []
+        try:
+            df = pd.read_csv(file.name)
+        except Exception as e:
+            return [], []
+        guessed_date, df = try_parse_dates(df)
+        if date_guess and date_guess in df.columns:
+            used_date = date_guess
+        else:
+            used_date = guessed_date
+        numeric = numeric_kpis(df, used_date)
+        # default select up to 10
+        default = numeric[:10]
+        return numeric, default
+    csv_in.change(fn=populate_choices, inputs=[csv_in, date_col], outputs=[kpi_choices, kpi_choices])
+    def run_all(file, date_col_text, kpi_list, top_k_val, explanation_flag):
+        # populate error if no file or no kpis
+        if file is None:
+            return "❌ Upload CSV first", None, None, None, None
+        if not kpi_list:
+            return "❌ Select at least one KPI column", None, None, None, None
+        status_text, score_df_display, chart_buf, explanations, raw_csv = run_analysis(file, date_col_text, kpi_list, top_k_val, explanation_flag)
+        # explanations dict -> string
+        expl_text = "\n\n".join([f"{k}:\n{v}" for k, v in (explanations or {}).items()])
+        # chart_buf is BytesIO
+        chart_img = None
+        if chart_buf is not None:
+            chart_img = chart_buf
+        return status_text, score_df_display, chart_img, expl_text, raw_csv
+    analyze_btn.click(fn=run_all, inputs=[csv_in, date_col, kpi_choices, top_k, explanation_toggle], outputs=[status, result_table, chart_output, explanations_out, raw_preview])
+    # KPI detail UI
+    gr.Markdown("### Per-KPI detail (select KPI name and click Show)")
+    detail_kpi = gr.Dropdown(choices=[], label="Pick KPI to inspect (use results table to pick)")
+    csv_in.change(lambda f: [], inputs=[csv_in], outputs=[detail_kpi])  # placeholder to refresh UI state
+    show_btn = gr.Button("Show KPI detail")
+    detail_plot = gr.Image(type="pil", label="Time series + anomalies")
+    detail_text = gr.Textbox(label="Anomaly summary", lines=3)
+    # when result_table updates, populate detail_kpi choices from it (we can't directly get it; user picks)
+    show_btn.click(fn=show_kpi_detail, inputs=[csv_in, date_col, detail_kpi], outputs=[detail_plot, detail_text])
 demo.launch()