DavMelchi commited on
Commit
4a2530d
·
1 Parent(s): eb773b0

Add multi-RAT combined sheet generation to KPI health check Excel export with time-based site aggregation, implement time key normalization for hourly/daily granularity with period_start/date_only column detection, build unified sheet merging 2G/3G/LTE/TWAMP KPIs by site_code and time with RAT prefix columns, extract and merge geographic coordinates from all RATs, and fallback to per-RAT sheets when combined generation fails

Browse files
process_kpi/kpi_health_check/export.py CHANGED
@@ -3,6 +3,153 @@ import pandas as pd
3
  from panel_app.convert_to_excel_panel import write_dfs_to_excel
4
 
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  def build_export_bytes(
7
  datasets_df: pd.DataFrame | None,
8
  rules_df: pd.DataFrame | None,
@@ -41,20 +188,35 @@ def build_export_bytes(
41
  if daily_by_rat and isinstance(daily_by_rat, dict):
42
  g = str(granularity or "Daily").strip().lower()
43
  prefix = "Hourly" if (g.startswith("hour") or g.startswith("h")) else "Daily"
44
- for rat, df in daily_by_rat.items():
45
- if not isinstance(df, pd.DataFrame):
46
- continue
47
- base = f"{prefix}_All_{str(rat)}"
48
- if len(df) <= max_data_rows:
49
- dfs.append(df)
50
  sheet_names.append(base[:31])
51
  else:
52
  part = 1
53
- for start in range(0, len(df), max_data_rows):
54
- end = min(start + max_data_rows, len(df))
55
- dfs.append(df.iloc[start:end].copy())
56
  sheet_names.append(f"{base}_p{part}"[:31])
57
  part += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  dfs.extend(
60
  [
 
3
  from panel_app.convert_to_excel_panel import write_dfs_to_excel
4
 
5
 
6
+ def _normalize_time_key(
7
+ df: pd.DataFrame, granularity: str
8
+ ) -> tuple[str, pd.Series] | None:
9
+ if df is None or df.empty:
10
+ return None
11
+ g = str(granularity or "Daily").strip().lower()
12
+ is_hourly = g.startswith("hour") or g.startswith("h")
13
+ if is_hourly:
14
+ time_col = "period_start" if "period_start" in df.columns else "date_only"
15
+ t = pd.to_datetime(df.get(time_col), errors="coerce").dt.floor("h")
16
+ return time_col, t
17
+
18
+ time_col = "date_only" if "date_only" in df.columns else "period_start"
19
+ t = pd.to_datetime(df.get(time_col), errors="coerce").dt.date
20
+ return time_col, t
21
+
22
+
23
+ def _build_all_tech_sheet(
24
+ daily_by_rat: dict[str, pd.DataFrame],
25
+ granularity: str,
26
+ ) -> tuple[str, pd.DataFrame] | None:
27
+ if not daily_by_rat or not isinstance(daily_by_rat, dict):
28
+ return None
29
+
30
+ g = str(granularity or "Daily").strip().lower()
31
+ prefix = "Hourly" if (g.startswith("hour") or g.startswith("h")) else "Daily"
32
+
33
+ ordered_rats = ["2G", "3G", "LTE", "TWAMP"]
34
+ present = [r for r in ordered_rats if r in daily_by_rat]
35
+ if not present:
36
+ present = [str(r) for r in daily_by_rat.keys()]
37
+
38
+ time_col = None
39
+ keys = []
40
+ coords_parts = []
41
+
42
+ for rat in present:
43
+ df = daily_by_rat.get(rat)
44
+ if not isinstance(df, pd.DataFrame) or df.empty:
45
+ continue
46
+
47
+ nt = _normalize_time_key(df, granularity)
48
+ if nt is None:
49
+ continue
50
+ tc, tkey = nt
51
+ if time_col is None:
52
+ time_col = tc
53
+
54
+ tmp = pd.DataFrame(
55
+ {"site_code": pd.to_numeric(df.get("site_code"), errors="coerce"), tc: tkey}
56
+ )
57
+ tmp = tmp.dropna(subset=["site_code", tc]).copy()
58
+ tmp["site_code"] = tmp["site_code"].astype(int)
59
+ keys.append(tmp[["site_code", tc]])
60
+
61
+ cols = [
62
+ c for c in ["site_code", "City", "Longitude", "Latitude"] if c in df.columns
63
+ ]
64
+ if cols:
65
+ cp = df[cols].copy()
66
+ cp["site_code"] = pd.to_numeric(cp["site_code"], errors="coerce")
67
+ cp = cp.dropna(subset=["site_code"]).copy()
68
+ cp["site_code"] = cp["site_code"].astype(int)
69
+ coords_parts.append(cp)
70
+
71
+ if not keys or time_col is None:
72
+ return None
73
+
74
+ base = pd.concat(keys, ignore_index=True).drop_duplicates(
75
+ subset=["site_code", time_col]
76
+ )
77
+
78
+ coords = None
79
+ if coords_parts:
80
+ coords_all = pd.concat(coords_parts, ignore_index=True)
81
+ coords_all = coords_all.drop_duplicates(subset=["site_code"])
82
+ keep = [
83
+ c
84
+ for c in ["site_code", "City", "Longitude", "Latitude"]
85
+ if c in coords_all.columns
86
+ ]
87
+ coords = coords_all[keep].copy() if keep else None
88
+
89
+ if isinstance(coords, pd.DataFrame) and not coords.empty:
90
+ base = pd.merge(base, coords, on="site_code", how="left")
91
+
92
+ base["ID"] = base[time_col].astype(str) + "_" + base["site_code"].astype(str)
93
+
94
+ meta_cols = {
95
+ "site_code",
96
+ "period_start",
97
+ "date_only",
98
+ "Longitude",
99
+ "Latitude",
100
+ "City",
101
+ "RAT",
102
+ "ID",
103
+ }
104
+
105
+ out = base
106
+ for rat in present:
107
+ df = daily_by_rat.get(rat)
108
+ if not isinstance(df, pd.DataFrame) or df.empty:
109
+ continue
110
+
111
+ nt = _normalize_time_key(df, granularity)
112
+ if nt is None:
113
+ continue
114
+ tc, tkey = nt
115
+
116
+ tmp = df.copy()
117
+ tmp["site_code"] = pd.to_numeric(tmp.get("site_code"), errors="coerce")
118
+ tmp = tmp.dropna(subset=["site_code"]).copy()
119
+ tmp["site_code"] = tmp["site_code"].astype(int)
120
+ tmp[tc] = tkey
121
+ tmp = tmp.dropna(subset=[tc]).copy()
122
+
123
+ kpi_cols = [c for c in tmp.columns if c not in meta_cols]
124
+ keep_cols = ["site_code", tc] + kpi_cols
125
+ tmp2 = tmp[keep_cols].copy()
126
+ rename = {c: f"{rat}_{c}" for c in kpi_cols}
127
+ tmp2 = tmp2.rename(columns=rename)
128
+ out = pd.merge(
129
+ out,
130
+ tmp2,
131
+ left_on=["site_code", time_col],
132
+ right_on=["site_code", tc],
133
+ how="left",
134
+ )
135
+ if tc != time_col and tc in out.columns:
136
+ out = out.drop(columns=[tc], errors="ignore")
137
+
138
+ first_cols = [
139
+ c
140
+ for c in ["ID", time_col, "site_code", "City", "Longitude", "Latitude"]
141
+ if c in out.columns
142
+ ]
143
+ rest = [c for c in out.columns if c not in first_cols]
144
+ out = out[first_cols + rest]
145
+ try:
146
+ out = out.sort_values(by=[time_col, "site_code"], ascending=[True, True])
147
+ except Exception:
148
+ pass
149
+
150
+ return f"{prefix}_All", out
151
+
152
+
153
  def build_export_bytes(
154
  datasets_df: pd.DataFrame | None,
155
  rules_df: pd.DataFrame | None,
 
188
  if daily_by_rat and isinstance(daily_by_rat, dict):
189
  g = str(granularity or "Daily").strip().lower()
190
  prefix = "Hourly" if (g.startswith("hour") or g.startswith("h")) else "Daily"
191
+
192
+ combined = _build_all_tech_sheet(daily_by_rat, granularity)
193
+ if combined is not None:
194
+ base, df_all = combined
195
+ if len(df_all) <= max_data_rows:
196
+ dfs.append(df_all)
197
  sheet_names.append(base[:31])
198
  else:
199
  part = 1
200
+ for start in range(0, len(df_all), max_data_rows):
201
+ end = min(start + max_data_rows, len(df_all))
202
+ dfs.append(df_all.iloc[start:end].copy())
203
  sheet_names.append(f"{base}_p{part}"[:31])
204
  part += 1
205
+ else:
206
+ for rat, df in daily_by_rat.items():
207
+ if not isinstance(df, pd.DataFrame):
208
+ continue
209
+ base = f"{prefix}_All_{str(rat)}"
210
+ if len(df) <= max_data_rows:
211
+ dfs.append(df)
212
+ sheet_names.append(base[:31])
213
+ else:
214
+ part = 1
215
+ for start in range(0, len(df), max_data_rows):
216
+ end = min(start + max_data_rows, len(df))
217
+ dfs.append(df.iloc[start:end].copy())
218
+ sheet_names.append(f"{base}_p{part}"[:31])
219
+ part += 1
220
 
221
  dfs.extend(
222
  [