farmentano12 commited on
Commit
7b6d2b4
·
verified ·
1 Parent(s): 55c55d0

Make validation optional

Browse files
Files changed (1) hide show
  1. app.py +72 -34
app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import gradio as gr
3
  import pandas as pd
4
  from io import BytesIO
@@ -9,10 +8,11 @@ APP_TITLE = "Cruce CLIENTE × MMP por EVENTO"
9
  APP_DESC = """
10
  **Pasos**
11
  **1)** Subí **CLIENTE** (validación) y **MMP** (xlsx/csv), luego presioná **Cargar columnas**.
12
- **2)** Elegí **ID CLIENTE**, **ID MMP**, **columna de validación (CLIENTE)** *(sugerimos Advertising ID/Status)* y **métrica del MMP** (opcional).
13
  **3)** Elegí la **columna de EVENTO (MMP)** y mapeá los **eventos por los que el cliente paga**.
14
- **4)** Cargá los **valores de validación** (CLIENTE) y marcá cuáles significan **VALIDADO**.
15
  **5)** Generá tablas. Por cada **EVENTO** se crea una tabla con **Cliente, MMP, %** y, si definiste **métrica**, se suma **sólo en filas validadas**.
 
16
  **% = (Cliente / MMP) × 100** (1 decimal).
17
  **Hoja 1:** tablas apiladas por EVENTO. **Hoja 2:** `raw_merge` con todas las filas de CLIENTE (left join).
18
  """
@@ -54,6 +54,7 @@ def _guess(cols, candidates):
54
  return lower_map[cand.lower()]
55
  return cols[0] if cols else None
56
 
 
57
  def load_columns(cliente_file, mmp_file):
58
  try:
59
  df_c = _safe_read(cliente_file) if cliente_file else None
@@ -66,19 +67,21 @@ def load_columns(cliente_file, mmp_file):
66
 
67
  id_c_guess = _guess(cliente_cols, ["Advertising ID","advertising id","advertising_id","User Id","Transaction Id","ID","Id"])
68
  id_m_guess = _guess(mmp_cols, ["Advertising ID","advertising id","advertising_id","User Id","Transaction Id","ID","Id"])
69
- validation_guess = _guess(cliente_cols, ["Advertising ID","advertising id","advertising_id","Validado","Validation","Status","Estado"])
 
70
  metric_guess = _guess(mmp_cols, ["Event Revenue","Revenue","Amount","Value"])
71
  event_guess = _guess(mmp_cols, ["Event Name","event_name","Evento","EVENTO","Event"])
72
 
73
  return (
74
- gr.update(choices=cliente_cols, value=id_c_guess), # id_cliente_col
75
- gr.update(choices=mmp_cols, value=id_m_guess), # id_mmp_col
76
- gr.update(choices=cliente_cols, value=validation_guess),# validation_col_client
77
- gr.update(choices=mmp_cols, value=metric_guess), # metric_col_mmp
78
- gr.update(choices=mmp_cols, value=event_guess), # mmp_event_col
79
- "Columnas cargadas. Completá el Paso 2 y luego mapeá eventos."
80
  )
81
 
 
82
  def load_event_values(mmp_file, event_col):
83
  try:
84
  df_m = _safe_read(mmp_file) if mmp_file else None
@@ -90,6 +93,7 @@ def load_event_values(mmp_file, event_col):
90
  vals = sorted(pd.Series(df_m[event_col].unique(), dtype="object").astype(str).fillna(""))
91
  return gr.update(choices=vals, value=vals), f"{len(vals)} eventos encontrados (pre-seleccionados)."
92
 
 
93
  def load_validation_values(cliente_file, validation_col):
94
  try:
95
  df_c = _safe_read(cliente_file) if cliente_file else None
@@ -97,10 +101,11 @@ def load_validation_values(cliente_file, validation_col):
97
  return gr.update(choices=[], value=[]), f"Error al leer CLIENTE: {e}"
98
 
99
  if df_c is None or not validation_col or validation_col not in df_c.columns:
100
- return gr.update(choices=[], value=[]), "Subí CLIENTE y elegí la columna de validación (CLIENTE)."
101
  vals = sorted(pd.Series(df_c[validation_col].unique(), dtype="object").astype(str).fillna(""))
102
  return gr.update(choices=vals, value=[]), f"{len(vals)} valores posibles de validación."
103
 
 
104
  def compute(cliente_file, mmp_file,
105
  id_cliente_col, id_mmp_col,
106
  validation_col_client, metric_col_mmp,
@@ -114,14 +119,15 @@ def compute(cliente_file, mmp_file,
114
  except Exception as e:
115
  return None, None, f"Error al leer archivos: {e}"
116
 
 
117
  for name, col, df in [
118
  ("ID CLIENTE", id_cliente_col, df_c),
119
  ("ID MMP", id_mmp_col, df_m),
120
- ("Validación (CLIENTE)", validation_col_client, df_c),
121
  ]:
122
  if not col or col not in df.columns:
123
  return None, None, f"Columna inválida: {name} = {col}"
124
 
 
125
  try:
126
  merged = df_c.merge(
127
  df_m, left_on=id_cliente_col, right_on=id_mmp_col, how="left",
@@ -133,45 +139,73 @@ def compute(cliente_file, mmp_file,
133
  if merged.empty:
134
  return None, None, "El cruce no arrojó filas."
135
 
 
136
  event_in_merged = event_col if (event_col and event_col in merged.columns) else (f"{event_col}_MMP" if event_col else None)
137
  if not event_in_merged or event_in_merged not in merged.columns:
138
  return None, None, "Elegí la columna de EVENTO en el Paso 3."
139
 
140
- validation_in_merged = validation_col_client if validation_col_client in merged.columns else f"{validation_col_client}_CLIENTE"
141
- if validation_in_merged not in merged.columns:
142
- return None, None, f"No se encuentra '{validation_col_client}' en merged."
 
 
 
 
 
 
 
143
 
 
144
  metric_in_merged = None
145
  if metric_col_mmp and len(str(metric_col_mmp)) > 0:
146
- metric_in_merged = metric_col_mmp if metric_col_mmp in merged.columns else f"{metric_col_mmp}_MMP"
147
- if metric_in_merged not in merged.columns:
148
- metric_in_merged = None
 
149
 
 
150
  if not selected_events:
151
- selected_events = sorted(pd.Series(merged[event_in_merged].dropna().unique(), dtype="object").astype(str))
152
 
 
 
 
 
153
  tables_by_event = {}
154
  for ev in selected_events:
155
- sub = merged[merged[event_in_merged].astype(str) == str(ev)]
 
 
 
 
 
 
156
  if sub.empty:
157
- row = {"Cliente": 0, "MMP": 0, "%": 0.0}
158
  if metric_in_merged:
159
  row[f"MMP_{metric_in_merged}_suma_validado"] = 0.0
160
  tables_by_event[ev] = pd.DataFrame([row])
161
  continue
162
 
163
- mmp_count = len(sub)
164
- valid_mask = sub[validation_in_merged].astype(str).isin([str(v) for v in (validation_values or [])])
 
 
 
 
165
  cliente_count = int(valid_mask.sum())
166
- pct = round((cliente_count / mmp_count * 100), 1) if mmp_count else 0.0
 
 
167
 
168
- row = {"Cliente": cliente_count, "MMP": mmp_count, "%": pct}
169
  if metric_in_merged:
170
  vals = pd.to_numeric(sub.loc[valid_mask, metric_in_merged], errors="coerce")
171
  row[f"MMP_{metric_in_merged}_suma_validado"] = float(vals.sum()) if cliente_count else 0.0
172
 
173
  tables_by_event[ev] = pd.DataFrame([row])
174
 
 
175
  xls_bytes = BytesIO()
176
  with pd.ExcelWriter(xls_bytes, engine="xlsxwriter") as writer:
177
  sheet_name = "tablas_por_EVENTO"
@@ -182,10 +216,13 @@ def compute(cliente_file, mmp_file,
182
  table_df.to_excel(writer, sheet_name=sheet_name, startrow=start_row, index=False)
183
  start_row += len(table_df) + 2
184
 
 
185
  cols_keep = []
186
- for col in [id_cliente_col, id_mmp_col if id_mmp_col in merged.columns else f"{id_mmp_col}_MMP", event_in_merged, validation_in_merged]:
187
  if col in merged.columns and col not in cols_keep:
188
  cols_keep.append(col)
 
 
189
  if metric_in_merged and metric_in_merged in merged.columns and metric_in_merged not in cols_keep:
190
  cols_keep.append(metric_in_merged)
191
  cols_rest = [c for c in merged.columns if c not in cols_keep]
@@ -202,8 +239,9 @@ def compute(cliente_file, mmp_file,
202
  first_ev = list(tables_by_event.keys())[0]
203
  preview = tables_by_event[first_ev]
204
 
205
- return preview, download_path, "Listo ✅"
206
 
 
207
  with gr.Blocks(title=APP_TITLE) as demo:
208
  gr.Markdown(f"# {APP_TITLE}\n\n{APP_DESC}")
209
 
@@ -215,13 +253,13 @@ with gr.Blocks(title=APP_TITLE) as demo:
215
  step1_btn = gr.Button("Paso 1: Cargar columnas")
216
 
217
  # Paso 2
218
- gr.Markdown("## Paso 2: Elegir columnas de ID, validación (CLIENTE) y métrica MMP (opcional)")
219
  with gr.Row():
220
  id_cliente_col = gr.Dropdown(choices=[], label="ID en CLIENTE (para cruce)")
221
  id_mmp_col = gr.Dropdown(choices=[], label="ID en MMP (para cruce)")
222
  with gr.Row():
223
- validation_col_client = gr.Dropdown(choices=[], label="Columna de validación (CLIENTE) — sugerimos 'Advertising ID' o 'Status'")
224
- metric_col_mmp = gr.Dropdown(choices=[], label="Columna de métrica en MMP (opcional)")
225
  mmp_event_col = gr.Dropdown(choices=[], label="(Se usará en el Paso 3) Columna de EVENTO en MMP")
226
  step1_btn.click(
227
  load_columns,
@@ -229,7 +267,7 @@ with gr.Blocks(title=APP_TITLE) as demo:
229
  outputs=[id_cliente_col, id_mmp_col, validation_col_client, metric_col_mmp, mmp_event_col, gr.Markdown()]
230
  )
231
 
232
- # Botón de mapeo (reubicado arriba del Paso 3)
233
  map_events_btn = gr.Button("Mapear eventos desde columna de eventos de MMP")
234
 
235
  # Paso 3
@@ -241,9 +279,9 @@ with gr.Blocks(title=APP_TITLE) as demo:
241
  outputs=[event_vals, gr.Markdown()]
242
  )
243
 
244
- # Paso 4: mover el BOTÓN arriba del título
245
- load_valid_btn = gr.Button("Paso 4: Cargar valores de validación (CLIENTE)")
246
- gr.Markdown("## Paso 4: Cargar valores de validación (CLIENTE) y elegirlos")
247
  valid_vals = gr.CheckboxGroup(choices=[], label="Valores que significan VALIDADO (CLIENTE)")
248
  load_valid_btn.click(
249
  load_validation_values,
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  from io import BytesIO
 
8
  APP_DESC = """
9
  **Pasos**
10
  **1)** Subí **CLIENTE** (validación) y **MMP** (xlsx/csv), luego presioná **Cargar columnas**.
11
+ **2)** Elegí **ID CLIENTE**, **ID MMP**, **columna de validación (CLIENTE) [opcional]** y **métrica del MMP** (opcional).
12
  **3)** Elegí la **columna de EVENTO (MMP)** y mapeá los **eventos por los que el cliente paga**.
13
+ **4)** (Opcional) Cargá los **valores de validación** (CLIENTE) y marcá cuáles significan **VALIDADO**.
14
  **5)** Generá tablas. Por cada **EVENTO** se crea una tabla con **Cliente, MMP, %** y, si definiste **métrica**, se suma **sólo en filas validadas**.
15
+ - Si **no** elegís columna/valores de validación, se considera **validado = cruce de IDs** (todas las filas de MMP del evento cuentan como Cliente).
16
  **% = (Cliente / MMP) × 100** (1 decimal).
17
  **Hoja 1:** tablas apiladas por EVENTO. **Hoja 2:** `raw_merge` con todas las filas de CLIENTE (left join).
18
  """
 
54
  return lower_map[cand.lower()]
55
  return cols[0] if cols else None
56
 
57
+ # --------------------- Load columns (Step 1) ---------------------
58
  def load_columns(cliente_file, mmp_file):
59
  try:
60
  df_c = _safe_read(cliente_file) if cliente_file else None
 
67
 
68
  id_c_guess = _guess(cliente_cols, ["Advertising ID","advertising id","advertising_id","User Id","Transaction Id","ID","Id"])
69
  id_m_guess = _guess(mmp_cols, ["Advertising ID","advertising id","advertising_id","User Id","Transaction Id","ID","Id"])
70
+ # Validación OPCIONAL: no preseleccionamos valor por defecto
71
+ validation_guess = None
72
  metric_guess = _guess(mmp_cols, ["Event Revenue","Revenue","Amount","Value"])
73
  event_guess = _guess(mmp_cols, ["Event Name","event_name","Evento","EVENTO","Event"])
74
 
75
  return (
76
+ gr.update(choices=cliente_cols, value=id_c_guess), # id_cliente_col
77
+ gr.update(choices=mmp_cols, value=id_m_guess), # id_mmp_col
78
+ gr.update(choices=cliente_cols, value=validation_guess),# validation_col_client (opcional)
79
+ gr.update(choices=mmp_cols, value=metric_guess), # metric_col_mmp (opcional)
80
+ gr.update(choices=mmp_cols, value=event_guess), # mmp_event_col
81
+ "Columnas cargadas. La validación es opcional (si la omitis, se usa cruce de IDs)."
82
  )
83
 
84
+ # --------------------- Load event values (Step 3) ---------------------
85
  def load_event_values(mmp_file, event_col):
86
  try:
87
  df_m = _safe_read(mmp_file) if mmp_file else None
 
93
  vals = sorted(pd.Series(df_m[event_col].unique(), dtype="object").astype(str).fillna(""))
94
  return gr.update(choices=vals, value=vals), f"{len(vals)} eventos encontrados (pre-seleccionados)."
95
 
96
+ # --------------------- Load validation values (Step 4, opcional) ---------------------
97
  def load_validation_values(cliente_file, validation_col):
98
  try:
99
  df_c = _safe_read(cliente_file) if cliente_file else None
 
101
  return gr.update(choices=[], value=[]), f"Error al leer CLIENTE: {e}"
102
 
103
  if df_c is None or not validation_col or validation_col not in df_c.columns:
104
+ return gr.update(choices=[], value=[]), "Omitido: sin columna de validación (se usará cruce de IDs)."
105
  vals = sorted(pd.Series(df_c[validation_col].unique(), dtype="object").astype(str).fillna(""))
106
  return gr.update(choices=vals, value=[]), f"{len(vals)} valores posibles de validación."
107
 
108
+ # --------------------- Compute (Step 5) ---------------------
109
  def compute(cliente_file, mmp_file,
110
  id_cliente_col, id_mmp_col,
111
  validation_col_client, metric_col_mmp,
 
119
  except Exception as e:
120
  return None, None, f"Error al leer archivos: {e}"
121
 
122
+ # Validaciones base (validación puede omitirse)
123
  for name, col, df in [
124
  ("ID CLIENTE", id_cliente_col, df_c),
125
  ("ID MMP", id_mmp_col, df_m),
 
126
  ]:
127
  if not col or col not in df.columns:
128
  return None, None, f"Columna inválida: {name} = {col}"
129
 
130
+ # Left join para mantener todas las filas de CLIENTE
131
  try:
132
  merged = df_c.merge(
133
  df_m, left_on=id_cliente_col, right_on=id_mmp_col, how="left",
 
139
  if merged.empty:
140
  return None, None, "El cruce no arrojó filas."
141
 
142
+ # Resolver nombres en merged (para filtrar por EVENTO en merged) y en df_m (para contar MMP)
143
  event_in_merged = event_col if (event_col and event_col in merged.columns) else (f"{event_col}_MMP" if event_col else None)
144
  if not event_in_merged or event_in_merged not in merged.columns:
145
  return None, None, "Elegí la columna de EVENTO en el Paso 3."
146
 
147
+ if event_col not in df_m.columns:
148
+ return None, None, f"No se encuentra '{event_col}' en el reporte MMP."
149
+
150
+ # Validación opcional
151
+ validation_in_merged = None
152
+ if validation_col_client:
153
+ if validation_col_client in merged.columns:
154
+ validation_in_merged = validation_col_client
155
+ elif f"{validation_col_client}_CLIENTE" in merged.columns:
156
+ validation_in_merged = f"{validation_col_client}_CLIENTE"
157
 
158
+ # Métrica opcional
159
  metric_in_merged = None
160
  if metric_col_mmp and len(str(metric_col_mmp)) > 0:
161
+ if metric_col_mmp in merged.columns:
162
+ metric_in_merged = metric_col_mmp
163
+ elif f"{metric_col_mmp}_MMP" in merged.columns:
164
+ metric_in_merged = f"{metric_col_mmp}_MMP"
165
 
166
+ # Eventos por defecto: tomar SIEMPRE de MMP (no del merged)
167
  if not selected_events:
168
+ selected_events = sorted(pd.Series(df_m[event_col].dropna().unique(), dtype="object").astype(str))
169
 
170
+ # Precontar filas de MMP por evento (base del denominador y de la columna "MMP")
171
+ mmp_counts_map = df_m[event_col].astype(str).value_counts(dropna=False).to_dict()
172
+
173
+ # Construcción de tablas
174
  tables_by_event = {}
175
  for ev in selected_events:
176
+ ev_str = str(ev)
177
+ # Subconjunto del merged SOLO para el EVENTO (para contar "Cliente" y sumar métricas)
178
+ sub = merged[merged[event_in_merged].astype(str) == ev_str]
179
+
180
+ # Denominador y columna "MMP": contar directamente en el archivo MMP
181
+ mmp_total = int(mmp_counts_map.get(ev_str, 0))
182
+
183
  if sub.empty:
184
+ row = {"Cliente": 0, "MMP": mmp_total, "%": (0.0 if mmp_total == 0 else round(0 / mmp_total * 100, 1))}
185
  if metric_in_merged:
186
  row[f"MMP_{metric_in_merged}_suma_validado"] = 0.0
187
  tables_by_event[ev] = pd.DataFrame([row])
188
  continue
189
 
190
+ # valid_mask: si hay validación y valores → usar; si no → todo True (cruce de IDs)
191
+ if validation_in_merged and validation_values:
192
+ valid_mask = sub[validation_in_merged].astype(str).isin([str(v) for v in validation_values])
193
+ else:
194
+ valid_mask = pd.Series(True, index=sub.index)
195
+
196
  cliente_count = int(valid_mask.sum())
197
+ pct = round((cliente_count / mmp_total * 100), 1) if mmp_total else 0.0
198
+
199
+ row = {"Cliente": cliente_count, "MMP": mmp_total, "%": pct}
200
 
201
+ # Métrica: suma sólo en las filas "válidas" del merged (si validación omitida → todas)
202
  if metric_in_merged:
203
  vals = pd.to_numeric(sub.loc[valid_mask, metric_in_merged], errors="coerce")
204
  row[f"MMP_{metric_in_merged}_suma_validado"] = float(vals.sum()) if cliente_count else 0.0
205
 
206
  tables_by_event[ev] = pd.DataFrame([row])
207
 
208
+ # Excel
209
  xls_bytes = BytesIO()
210
  with pd.ExcelWriter(xls_bytes, engine="xlsxwriter") as writer:
211
  sheet_name = "tablas_por_EVENTO"
 
216
  table_df.to_excel(writer, sheet_name=sheet_name, startrow=start_row, index=False)
217
  start_row += len(table_df) + 2
218
 
219
+ # raw_merge (cols clave primero)
220
  cols_keep = []
221
+ for col in [id_cliente_col, id_mmp_col if id_mmp_col in merged.columns else f"{id_mmp_col}_MMP", event_in_merged]:
222
  if col in merged.columns and col not in cols_keep:
223
  cols_keep.append(col)
224
+ if validation_in_merged and validation_in_merged in merged.columns and validation_in_merged not in cols_keep:
225
+ cols_keep.append(validation_in_merged)
226
  if metric_in_merged and metric_in_merged in merged.columns and metric_in_merged not in cols_keep:
227
  cols_keep.append(metric_in_merged)
228
  cols_rest = [c for c in merged.columns if c not in cols_keep]
 
239
  first_ev = list(tables_by_event.keys())[0]
240
  preview = tables_by_event[first_ev]
241
 
242
+ return preview, download_path, "Listo ✅ (MMP como denominador real)"
243
 
244
+ # --------------------- App (Pasos) ---------------------
245
  with gr.Blocks(title=APP_TITLE) as demo:
246
  gr.Markdown(f"# {APP_TITLE}\n\n{APP_DESC}")
247
 
 
253
  step1_btn = gr.Button("Paso 1: Cargar columnas")
254
 
255
  # Paso 2
256
+ gr.Markdown("## Paso 2: Elegir columnas de ID, validación (CLIENTE) [opcional] y métrica MMP (opcional)")
257
  with gr.Row():
258
  id_cliente_col = gr.Dropdown(choices=[], label="ID en CLIENTE (para cruce)")
259
  id_mmp_col = gr.Dropdown(choices=[], label="ID en MMP (para cruce)")
260
  with gr.Row():
261
+ validation_col_client = gr.Dropdown(choices=[], value=None, label="Columna de validación (CLIENTE) — opcional")
262
+ metric_col_mmp = gr.Dropdown(choices=[], value=None, label="Columna de métrica en MMP opcional")
263
  mmp_event_col = gr.Dropdown(choices=[], label="(Se usará en el Paso 3) Columna de EVENTO en MMP")
264
  step1_btn.click(
265
  load_columns,
 
267
  outputs=[id_cliente_col, id_mmp_col, validation_col_client, metric_col_mmp, mmp_event_col, gr.Markdown()]
268
  )
269
 
270
+ # Mapeo de eventos (antes del Paso 3)
271
  map_events_btn = gr.Button("Mapear eventos desde columna de eventos de MMP")
272
 
273
  # Paso 3
 
279
  outputs=[event_vals, gr.Markdown()]
280
  )
281
 
282
+ # Paso 4 (opcional)
283
+ load_valid_btn = gr.Button("Paso 4 (opcional): Cargar valores de validación (CLIENTE)")
284
+ gr.Markdown("## Paso 4 (opcional): Cargar valores de validación (CLIENTE) y elegirlos")
285
  valid_vals = gr.CheckboxGroup(choices=[], label="Valores que significan VALIDADO (CLIENTE)")
286
  load_valid_btn.click(
287
  load_validation_values,