Barisha commited on
Commit
62cf6c0
·
verified ·
1 Parent(s): e6b58e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -72
app.py CHANGED
@@ -3,51 +3,33 @@ import pandas as pd
3
  from transformers import pipeline
4
 
5
  # ------------------------------------------------
6
- # Load Qwen-3B (polishing only)
7
  # ------------------------------------------------
8
  generator = pipeline(
9
- task="text-generation",
10
  model="Qwen/Qwen2.5-3B-Instruct",
11
  device_map="auto",
12
  trust_remote_code=True
13
  )
14
 
15
  # ------------------------------------------------
16
- # Generic helpers (pure statistics → language)
17
  # ------------------------------------------------
18
- def trend_word(diff):
19
- return "increased" if diff > 0 else "decreased"
20
-
21
- def magnitude_word(abs_diff):
22
- if abs_diff < 0.05:
23
- return "marginally"
24
- elif abs_diff < 0.2:
25
- return "slightly"
26
  else:
27
- return "noticeably"
28
 
29
- def unit_word(kpi):
30
- return "percentage points" if "%" in kpi else ""
31
-
32
- def summarize_secondary(diffs):
33
- avg_change = diffs.abs().mean()
34
- inc_ratio = (diffs > 0).mean()
35
-
36
- if avg_change < 0.05:
37
- magnitude = "minor variation"
38
- elif avg_change < 0.2:
39
- magnitude = "moderate movement"
40
- else:
41
- magnitude = "notable movement"
42
-
43
- if inc_ratio > 0.7:
44
- direction = "mostly increased"
45
- elif inc_ratio < 0.3:
46
- direction = "mostly decreased"
47
  else:
48
- direction = "mixed movement"
49
-
50
- return f"Remaining indicators showed {magnitude} with {direction}"
51
 
52
  # ------------------------------------------------
53
  # Core logic
@@ -55,69 +37,79 @@ def summarize_secondary(diffs):
55
  def analyze_kpi(csv_file, top_n):
56
  df = pd.read_csv(csv_file.name)
57
 
58
- date_cols = df.columns[1:]
59
- prev_date = date_cols[-2]
60
- curr_date = date_cols[-1]
61
 
62
- df["Diff"] = df[curr_date] - df[prev_date]
63
- df["Change"] = df["Diff"].abs()
64
 
65
- df_sorted = df.sort_values("Change", ascending=False)
66
- top_kpis = df_sorted.head(top_n)
67
 
68
  # -------------------------------
69
- # Primary KPI sentence
70
  # -------------------------------
71
- primary = top_kpis.iloc[0]
72
- abs_diff = abs(primary["Diff"])
73
-
74
- primary_sentence = (
75
- f"{primary['Kpi']} "
76
- f"{trend_word(primary['Diff'])} "
77
- f"{magnitude_word(abs_diff)} "
78
- f"by {abs_diff:.2f} {unit_word(primary['Kpi'])}".strip()
79
- )
80
 
81
  # -------------------------------
82
- # Secondary KPI sentence
83
  # -------------------------------
84
- secondary_diffs = top_kpis.iloc[1:]["Diff"]
85
- secondary_sentence = summarize_secondary(secondary_diffs)
 
 
 
 
 
 
 
 
 
 
86
 
87
- # Model input (already well-formed)
88
- model_input = primary_sentence + ". " + secondary_sentence + "."
 
 
 
 
 
 
89
 
90
- # Qwen generation (polishing only)
91
  output = generator(
92
  model_input,
93
- max_new_tokens=60,
94
  do_sample=False
95
  )[0]["generated_text"]
96
 
97
- return top_kpis[["Kpi", "Change"]], output
98
 
99
  # ------------------------------------------------
100
- # Gradio UI (HF Space)
101
  # ------------------------------------------------
102
- with gr.Blocks(title="KPI Change Newsletter") as demo:
103
- gr.Markdown("## 📰 KPI Change Newsletter Generator")
104
- gr.Markdown(
105
- "Upload a KPI CSV file to rank changes and generate "
106
- "a short, data-driven summary."
107
- )
108
 
109
- csv_input = gr.File(label="Upload CSV", file_types=[".csv"])
110
- top_n_input = gr.Slider(3, 5, value=3, step=1, label="Top KPIs")
111
 
112
- generate_btn = gr.Button("Generate Summary")
113
 
114
- table_output = gr.Dataframe(label="Top KPI Changes")
115
- summary_output = gr.Textbox(label="Newsletter Summary", lines=2)
116
 
117
- generate_btn.click(
118
  analyze_kpi,
119
  inputs=[csv_input, top_n_input],
120
- outputs=[table_output, summary_output]
121
  )
122
 
123
  demo.launch()
 
3
  from transformers import pipeline
4
 
5
  # ------------------------------------------------
6
+ # Load Qwen 3B
7
  # ------------------------------------------------
8
  generator = pipeline(
9
+ "text-generation",
10
  model="Qwen/Qwen2.5-3B-Instruct",
11
  device_map="auto",
12
  trust_remote_code=True
13
  )
14
 
15
  # ------------------------------------------------
16
+ # Quantization helpers (FACTS only)
17
  # ------------------------------------------------
18
+ def magnitude_bucket(x):
19
+ if x < 0.05:
20
+ return "low"
21
+ elif x < 0.2:
22
+ return "medium"
 
 
 
23
  else:
24
+ return "high"
25
 
26
+ def direction_bucket(diff):
27
+ if diff > 0:
28
+ return "increase"
29
+ elif diff < 0:
30
+ return "decrease"
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  else:
32
+ return "no_change"
 
 
33
 
34
  # ------------------------------------------------
35
  # Core logic
 
37
  def analyze_kpi(csv_file, top_n):
38
  df = pd.read_csv(csv_file.name)
39
 
40
+ dates = df.columns[1:]
41
+ prev_date, curr_date = dates[-2], dates[-1]
 
42
 
43
+ df["diff"] = df[curr_date] - df[prev_date]
44
+ df["abs_diff"] = df["diff"].abs()
45
 
46
+ ranked = df.sort_values("abs_diff", ascending=False).head(top_n)
 
47
 
48
  # -------------------------------
49
+ # Primary KPI facts
50
  # -------------------------------
51
+ primary = ranked.iloc[0]
52
+
53
+ primary_facts = {
54
+ "PRIMARY_KPI": primary["Kpi"],
55
+ "PRIMARY_DIRECTION": direction_bucket(primary["diff"]),
56
+ "PRIMARY_CHANGE": round(primary["abs_diff"], 2),
57
+ "PRIMARY_MAGNITUDE": magnitude_bucket(primary["abs_diff"]),
58
+ "PRIMARY_UNIT": "percentage points" if "%" in primary["Kpi"] else "units"
59
+ }
60
 
61
  # -------------------------------
62
+ # Secondary KPI facts
63
  # -------------------------------
64
+ secondary = ranked.iloc[1:]
65
+
66
+ secondary_facts = {
67
+ "SECONDARY_COUNT": len(secondary),
68
+ "SECONDARY_AVG_CHANGE": round(secondary["abs_diff"].mean(), 2),
69
+ "SECONDARY_MAGNITUDE": magnitude_bucket(secondary["abs_diff"].mean()),
70
+ "SECONDARY_DIRECTION_BALANCE": (
71
+ "mostly_increase" if (secondary["diff"] > 0).mean() > 0.7
72
+ else "mostly_decrease" if (secondary["diff"] > 0).mean() < 0.3
73
+ else "mixed"
74
+ )
75
+ }
76
 
77
+ # -------------------------------
78
+ # Model input = FACT BLOCK
79
+ # -------------------------------
80
+ model_input = (
81
+ "Generate a short operational summary from the following facts.\n\n"
82
+ f"{primary_facts}\n"
83
+ f"{secondary_facts}"
84
+ )
85
 
 
86
  output = generator(
87
  model_input,
88
+ max_new_tokens=80,
89
  do_sample=False
90
  )[0]["generated_text"]
91
 
92
+ return ranked[["Kpi", "abs_diff"]], output
93
 
94
  # ------------------------------------------------
95
+ # UI
96
  # ------------------------------------------------
97
+ with gr.Blocks(title="KPI Summary Generator") as demo:
98
+ gr.Markdown("## KPI Change Summary")
99
+ gr.Markdown("Upload CSV. Summary is generated strictly from data-derived facts.")
 
 
 
100
 
101
+ csv_input = gr.File(file_types=[".csv"])
102
+ top_n_input = gr.Slider(3, 5, value=3, step=1)
103
 
104
+ btn = gr.Button("Generate")
105
 
106
+ table = gr.Dataframe()
107
+ summary = gr.Textbox(lines=3)
108
 
109
+ btn.click(
110
  analyze_kpi,
111
  inputs=[csv_input, top_n_input],
112
+ outputs=[table, summary]
113
  )
114
 
115
  demo.launch()