farmentano12 commited on
Commit
8d81e17
·
verified ·
1 Parent(s): 3c237a7

Back to the good one

Browse files
Files changed (1) hide show
  1. app.py +216 -468
app.py CHANGED
@@ -1,50 +1,21 @@
1
  import gradio as gr
2
  import pandas as pd
3
  from io import BytesIO
 
4
  import tempfile
5
- import os, json
6
- from google.cloud import bigquery
7
- from google.oauth2 import service_account
8
-
9
- # BigQuery (optional)
10
- try:
11
- from google.cloud import bigquery
12
- _HAS_BQ = True
13
- except Exception:
14
- _HAS_BQ = False
15
-
16
- # Optional: ensure db_dtypes is available for BigQuery -> pandas
17
- try:
18
- import db_dtypes # noqa: F401
19
- _HAS_DB_DTYPES = True
20
- except Exception:
21
- _HAS_DB_DTYPES = False
22
-
23
- APP_TITLE = "Cruce CLIENTE × MMP por EVENTO (archivo o BigQuery)"
24
  APP_DESC = """
25
- ### Fuente 1: MMP
26
- **BigQuery (tabla única)**: `plasma-bison-438415-t8.connector_appsflyer_raw_data.appsflyer_raw_data_daily_report`
27
- Pasos BQ:
28
- 1) Ingresá **App ID** y **rango de fechas** (YYYY-MM-DD).
29
- 2) **Obtener columnas (schema)** sugiere **columna temporal (event_time)**, **evento (event_name)**, **ID en MMP (appsflyer_id)** y **App ID columna** (app_id).
30
- 3) **Listar eventos por rango** (usa App ID + fechas + columna de evento).
31
- 4) **Consultar y cargar MMP** → genera CSV temporal, preview y descarga.
32
-
33
- **Archivo**: subir archivo, detectar columnas y (opcional) **listar eventos** para filtrar. No hace falta App ID ni fechas.
34
-
35
- ### Fuente 2: CLIENTE
36
- 1) Subir **CLIENTE** → **Obtener mapeo de columnas**.
37
- 2) Elegir **ID en CLIENTE**.
38
- 3) **Columna de validación (opcional)** y **valores** (opcional).
39
- 4) **Columna de métrica (CLIENTE) (opcional)**.
40
- 5) **Columna de EVENTO (CLIENTE)**.
41
-
42
- ### Final
43
- - Por cada **evento** (de MMP), **Cliente, MMP, %** con `% = Cliente / MMP × 100` (1 decimal).
44
- - Excel: **Hoja 1** tablas por evento; **Hoja 2** `raw_merge`.
45
  """
46
 
47
- # -------------------------- Helpers --------------------------
48
  def _read_excel(pathlike):
49
  return pd.read_excel(pathlike, engine="openpyxl")
50
 
@@ -54,10 +25,10 @@ def _read_csv_with_fallbacks(pathlike):
54
  except Exception:
55
  return pd.read_csv(pathlike, sep=None, engine="python", on_bad_lines="skip", encoding="latin-1")
56
 
57
- def _safe_read(fileobj_or_path):
58
- if fileobj_or_path is None or (isinstance(fileobj_or_path, str) and not fileobj_or_path.strip()):
59
  return None
60
- path = fileobj_or_path.name if hasattr(fileobj_or_path, "name") else fileobj_or_path
61
  ext = os.path.splitext(str(path))[-1].lower()
62
  try:
63
  if ext in [".xlsx", ".xlsm", ".xltx", ".xltm"]:
@@ -75,481 +46,258 @@ def _safe_read(fileobj_or_path):
75
  except Exception as e:
76
  raise RuntimeError(f"No se pudo leer '{os.path.basename(str(path))}': {e}")
77
 
78
- def _guess(cols, candidates):
79
- lower_map = {c.lower(): c for c in cols}
80
- for cand in candidates:
81
- if cand.lower() in lower_map:
82
- return lower_map[cand.lower()]
83
- return cols[0] if cols else None
84
-
85
- def _safe_file_output(path):
86
- if path and isinstance(path, str) and os.path.isfile(path):
87
- return path
88
- return None
89
-
90
- # -------------------------- BQ helpers (tabla fija) --------------------------
91
- BQ_PROJECT = "plasma-bison-438415-t8"
92
- BQ_TABLE_FQN = "plasma-bison-438415-t8.connector_appsflyer_raw_data.appsflyer_raw_data_daily_report"
93
-
94
- #def _need_bq_client():
95
- # if not _HAS_BQ:
96
- # raise RuntimeError("Falta dependencia 'google-cloud-bigquery'.")
97
- # if not os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
98
- # raise RuntimeError("GOOGLE_APPLICATION_CREDENTIALS no seteado.")
99
- # return bigquery.Client(project=BQ_PROJECT)
100
-
101
- def _need_bq_client():
102
- sa_json = os.getenv("GCP_SA_JSON")
103
- if sa_json:
104
- info = json.loads(sa_json)
105
- creds = service_account.Credentials.from_service_account_info(info)
106
- project = info.get("project_id") or PROJECT_DEFAULT
107
- return bigquery.Client(project=project, credentials=creds)
108
-
109
- # Fallbacks: local file via GOOGLE_APPLICATION_CREDENTIALS or metadata if running on GCP
110
- return bigquery.Client(project=PROJECT_DEFAULT)
111
-
112
- def bq_get_columns_fixed():
113
- client = _need_bq_client()
114
- table = client.get_table(BQ_TABLE_FQN)
115
- cols = [sch.name for sch in table.schema]
116
- time_guess = _guess(cols, ["event_time","event_date","event_datetime","timestamp","date"])
117
- event_guess = _guess(cols, ["event_name","Event Name","evento","event"])
118
- id_guess = _guess(cols, ["appsflyer_id","advertising_id","adid","idfa","ID","Id"])
119
- appid_guess = _guess(cols, ["app_id","bundle_id","app","appId"])
120
- return cols, time_guess, event_guess, id_guess, appid_guess
121
-
122
- def bq_list_events_fixed(event_col, time_col, app_id_col, app_id_value, start_date, end_date, limit=500):
123
- client = _need_bq_client()
124
- cols, t_guess, e_guess, _, a_guess = bq_get_columns_fixed()
125
- event_col = event_col or e_guess
126
- time_col = time_col or t_guess
127
- app_id_col = app_id_col or a_guess
128
- if not (event_col and time_col and app_id_col and app_id_value and start_date and end_date):
129
- return [], "Completá App ID, fechas y columnas (evento/fecha/App ID)."
130
- sql = f"""
131
- SELECT DISTINCT CAST({event_col} AS STRING) AS ev
132
- FROM `{BQ_TABLE_FQN}`
133
- WHERE DATE({time_col}) BETWEEN @sd AND @ed
134
- AND CAST({app_id_col} AS STRING) = @app_id
135
- ORDER BY ev
136
- LIMIT {int(limit)}
137
- """
138
- job = client.query(sql, job_config=bigquery.QueryJobConfig(
139
- query_parameters=[
140
- bigquery.ScalarQueryParameter("sd", "DATE", str(start_date)),
141
- bigquery.ScalarQueryParameter("ed", "DATE", str(end_date)),
142
- bigquery.ScalarQueryParameter("app_id", "STRING", str(app_id_value).strip()),
143
- ]
144
- ))
145
- df = job.result().to_dataframe(create_bqstorage_client=False)
146
- return sorted(df["ev"].dropna().astype(str).tolist()), f"{len(df)} eventos encontrados."
147
-
148
- def bq_query_to_temp_fixed(event_col, time_col, app_id_col, app_id_value, start_date, end_date, selected_events):
149
- client = _need_bq_client()
150
- cols, t_guess, e_guess, _, a_guess = bq_get_columns_fixed()
151
- event_col = event_col or e_guess
152
- time_col = time_col or t_guess
153
- app_id_col = app_id_col or a_guess
154
- if not (event_col and time_col and app_id_col and app_id_value and start_date and end_date):
155
- raise RuntimeError("Completá App ID, fechas y columnas (evento/fecha/App ID).")
156
- params = [
157
- bigquery.ScalarQueryParameter("sd", "DATE", str(start_date)),
158
- bigquery.ScalarQueryParameter("ed", "DATE", str(end_date)),
159
- bigquery.ScalarQueryParameter("app_id", "STRING", str(app_id_value).strip()),
160
- ]
161
- ev_filter = ""
162
- if selected_events:
163
- params.append(bigquery.ArrayQueryParameter("events", "STRING", [str(v) for v in selected_events]))
164
- ev_filter = f"AND CAST({event_col} AS STRING) IN UNNEST(@events)"
165
- sql = f"""
166
- SELECT *
167
- FROM `{BQ_TABLE_FQN}`
168
- WHERE DATE({time_col}) BETWEEN @sd AND @ed
169
- AND CAST({app_id_col} AS STRING) = @app_id
170
- {ev_filter}
171
- """
172
- job = client.query(sql, job_config=bigquery.QueryJobConfig(query_parameters=params))
173
- df = job.result().to_dataframe(create_bqstorage_client=False)
174
- tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
175
- df.to_csv(tmp.name, index=False)
176
- return tmp.name, df.head(20).to_dict(orient="records")
177
-
178
- # -------------------------- File helpers --------------------------
179
- def file_mmp_schema(file):
180
- try:
181
- df = _safe_read(file)
182
- except Exception as e:
183
- return (gr.update(), gr.update(), gr.update(), gr.update(), f"Error al leer MMP: {e}")
184
- cols = list(df.columns)
185
- time_guess = _guess(cols, ["event_time","event_date","event_time_millis","timestamp","date","Date","Event Time"])
186
- event_guess = _guess(cols, ["event_name","Event Name","evento","EVENTO","Event"])
187
- id_guess = _guess(cols, ["appsflyer_id","Advertising ID","advertising_id","adid","idfa","ID","Id"])
188
- appid_guess = _guess(cols, ["app_id","bundle_id","app","appId","App ID"])
189
- return (gr.update(choices=cols, value=time_guess),
190
- gr.update(choices=cols, value=event_guess),
191
- gr.update(choices=cols, value=id_guess),
192
- gr.update(choices=cols, value=appid_guess),
193
- "Columnas detectadas (archivo MMP).")
194
-
195
- def file_mmp_list_events_simple(file, event_col):
196
  try:
197
- df = _safe_read(file)
198
- except Exception as e:
199
- return gr.update(choices=[], value=[]), f"Error al leer MMP: {e}"
200
- if not event_col or event_col not in df.columns:
201
- return gr.update(choices=[], value=[]), "Elegí la columna de evento (archivo MMP)."
202
- vals = sorted(pd.Series(df[event_col].astype(str).unique()).dropna().tolist())
203
- return gr.update(choices=vals, value=vals), f"{len(vals)} eventos detectados (archivo MMP)."
204
-
205
- def file_prepare(src_file, ev_col, selected_events):
206
- try:
207
- df = _safe_read(src_file)
208
- if selected_events:
209
- df = df[df[ev_col].astype(str).isin([str(v) for v in selected_events])]
210
- tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
211
- df.to_csv(tmp.name, index=False)
212
- return tmp.name, df.head(20)
213
- except Exception as e:
214
- raise RuntimeError(f"Error al preparar MMP (archivo): {e}")
215
-
216
- # -------------------------- CLIENTE helpers --------------------------
217
- def cliente_map_columns(cliente_file):
218
- try:
219
- df = _safe_read(cliente_file)
220
  except Exception as e:
221
- return (gr.update(), gr.update(), gr.update(), gr.update(), "Error al leer CLIENTE: "+str(e))
222
- cols = list(df.columns)
223
- id_guess = _guess(cols, ["appsflyer_id","Advertising ID","advertising_id","user_id","User Id","transaction_id","Transaction Id","ID","Id"])
224
- valid_guess = None # opcional
225
- metric_guess = _guess(cols, ["revenue","amount","value","ticket","Event Revenue","importe","monto"])
226
- event_guess = _guess(cols, ["event_name","Event Name","evento","EVENTO","Event"])
227
- return (gr.update(choices=cols, value=id_guess),
228
- gr.update(choices=cols, value=valid_guess),
229
- gr.update(choices=cols, value=metric_guess),
230
- gr.update(choices=cols, value=event_guess),
231
- "Columnas de CLIENTE listas.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
 
233
  def load_validation_values(cliente_file, validation_col):
234
  try:
235
  df_c = _safe_read(cliente_file) if cliente_file else None
236
  except Exception as e:
237
  return gr.update(choices=[], value=[]), f"Error al leer CLIENTE: {e}"
 
238
  if df_c is None or not validation_col or validation_col not in df_c.columns:
239
- return gr.update(choices=[], value=[]), "Omitido: sin columna de validación (se usará cruce de IDs)."
240
- vals = sorted(pd.Series(df_c[validation_col].astype(str).unique()).dropna().tolist())
241
- return gr.update(choices=vals, value=[]), f"{len(vals)} valores posibles de validación."
242
-
243
- # -------------------------- Compute --------------------------
244
- def compute(cliente_file, mmp_final_path,
245
- id_cliente_col, id_mmp_col,
246
- validation_col_client, validation_values,
247
- metric_col_client,
248
- client_event_col,
249
- mmp_event_col,
250
- selected_events_mmp):
251
- if not mmp_final_path:
252
- return None, None, "Primero completá la fuente MMP."
253
- if not cliente_file:
254
- return None, None, "Subí CLIENTE y mapeá las columnas."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  try:
256
  df_c = _safe_read(cliente_file)
257
- df_m = _safe_read(mmp_final_path)
258
  except Exception as e:
259
- return None, None, f"Error al leer fuentes: {e}"
 
 
260
  for name, col, df in [
261
  ("ID CLIENTE", id_cliente_col, df_c),
262
- ("ID MMP", id_mmp_col, df_m),
263
- ("EVENTO (CLIENTE)", client_event_col, df_c),
264
- ("EVENTO (MMP)", mmp_event_col, df_m),
265
  ]:
266
  if not col or col not in df.columns:
267
  return None, None, f"Columna inválida: {name} = {col}"
 
 
 
 
 
268
  try:
269
- merged = df_c.merge(df_m, left_on=id_cliente_col, right_on=id_mmp_col, how="left",
270
- suffixes=("_CLIENTE", "_MMP"))
 
 
271
  except Exception as e:
272
  return None, None, f"Error durante el merge por IDs: {e}"
273
 
274
- # --- Resolver nombres de columnas tras el merge (por posibles sufijos) ---
275
- def _resolve_merged(col, prefer_suffix):
276
- if col in merged.columns:
277
- return col
278
- cand1 = f"{col}{prefer_suffix}"
279
- cand2 = f"{col}_x"
280
- cand3 = f"{col}_y"
281
- for c in (cand1, cand2, cand3):
282
- if c in merged.columns:
283
- return c
284
- # último intento: coincidencia case-insensitive
285
- lower_map = {c.lower(): c for c in merged.columns}
286
- return lower_map.get(col.lower(), col)
287
-
288
- client_event_in_merged = _resolve_merged(client_event_col, "_CLIENTE")
289
- mmp_event_in_merged = _resolve_merged(mmp_event_col, "_MMP")
290
- validation_in_merged = _resolve_merged(validation_col_client, "_CLIENTE") if validation_col_client else None
291
- metric_in_merged = _resolve_merged(metric_col_client, "_CLIENTE") if metric_col_client else None
292
-
293
- # Si no se seleccionaron eventos MMP, usar todos los presentes en df_m (ya filtrado por tu consulta)
294
- if not selected_events_mmp:
295
- selected_events_mmp = sorted(df_m[mmp_event_col].astype(str).dropna().unique().tolist())
296
-
297
- # Denominador: conteo directo desde MMP por evento
298
- mmp_counts_map = df_m[mmp_event_col].astype(str).value_counts(dropna=False).to_dict()
299
-
300
- # Validación opcional (sobre columnas del lado CLIENTE en el merged)
301
- if validation_in_merged and validation_in_merged in merged.columns and validation_values:
302
- valid_mask_all = merged[validation_in_merged].astype(str).isin([str(v) for v in validation_values])
303
- else:
304
- valid_mask_all = pd.Series(True, index=merged.index)
305
-
306
- # Construcción de tablas por evento (los eventos vienen de MMP)
307
- tables_by_event = {}
308
- for ev in selected_events_mmp:
309
- ev_str = str(ev)
310
- # Numerador: filas de CLIENTE cuyo EVENTO_CLIENTE == evento MMP y cumplan validación
311
- if client_event_in_merged not in merged.columns:
312
- return None, None, f"No encuentro columna de evento de CLIENTE tras el merge: {client_event_in_merged}"
313
- sub = merged[merged[client_event_in_merged].astype(str) == ev_str]
314
- if sub.empty:
315
- cliente_count = 0
316
- metric_sum = 0.0
317
- else:
318
- valid_mask = valid_mask_all.loc[sub.index]
319
- cliente_count = int(valid_mask.sum())
320
- if metric_in_merged and metric_in_merged in sub.columns:
321
- vals = pd.to_numeric(sub.loc[valid_mask, metric_in_merged], errors="coerce")
322
- metric_sum = float(vals.sum()) if cliente_count else 0.0
323
- else:
324
- metric_sum = 0.0
325
-
326
- mmp_total = int(mmp_counts_map.get(ev_str, 0))
327
- pct = round((cliente_count / mmp_total * 100), 1) if mmp_total else 0.0
328
-
329
- row = {"Cliente": cliente_count, "MMP": mmp_total, "%": pct}
330
- if metric_in_merged and metric_in_merged in merged.columns:
331
- row[f"CLIENTE_{metric_col_client}_suma_validado"] = metric_sum
332
- tables_by_event[ev] = pd.DataFrame([row])
333
-
334
- # Excel
335
  xls_bytes = BytesIO()
336
  with pd.ExcelWriter(xls_bytes, engine="xlsxwriter") as writer:
337
- sheet_name = "tablas_por_EVENTO"
338
  start_row = 0
339
- for ev, table_df in tables_by_event.items():
340
- pd.DataFrame([[ev]]).to_excel(writer, sheet_name=sheet_name, startrow=start_row, index=False, header=False)
341
  start_row += 1
342
  table_df.to_excel(writer, sheet_name=sheet_name, startrow=start_row, index=False)
343
  start_row += len(table_df) + 2
344
 
345
- # Hoja 2: raw_merge (columnas clave primero; usar nombres resueltos)
346
  cols_keep = []
347
- for col in [id_cliente_col, id_mmp_col, client_event_in_merged, mmp_event_in_merged]:
348
- if col and col in merged.columns and col not in cols_keep:
349
  cols_keep.append(col)
350
- if validation_in_merged and validation_in_merged in merged.columns and validation_in_merged not in cols_keep:
351
- cols_keep.append(validation_in_merged)
352
- if metric_in_merged and metric_in_merged in merged.columns and metric_in_merged not in cols_keep:
353
- cols_keep.append(metric_in_merged)
354
  cols_rest = [c for c in merged.columns if c not in cols_keep]
355
  merged[cols_keep + cols_rest].to_excel(writer, sheet_name="raw_merge", index=False)
356
  xls_bytes.seek(0)
357
 
358
- import tempfile
359
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
360
- tmp.write(xls_bytes.getvalue()); tmp.flush(); tmp.close()
 
361
  download_path = tmp.name
362
 
363
- # Preview: primera tabla
364
  preview = None
365
- if tables_by_event:
366
- first_ev = list(tables_by_event.keys())[0]
367
- preview = tables_by_event[first_ev]
368
 
369
  return preview, download_path, "Listo ✅"
370
 
371
- # -------------------------- UI --------------------------
372
  with gr.Blocks(title=APP_TITLE) as demo:
373
  gr.Markdown(f"# {APP_TITLE}\n\n{APP_DESC}")
374
 
375
- # ===== MMP: Selección de fuente =====
376
- gr.Markdown("## Fuente 1: MMP")
377
- mmp_source = gr.Radio(choices=["Subir archivo", "BigQuery"], value="Subir archivo", label="Fuente de MMP")
378
-
379
- # --- BigQuery Panel (tabla fija) ---
380
- with gr.Column(visible=False) as bq_panel:
381
- gr.Markdown("**Paso MMP-BQ 1**: App ID y Fechas")
382
- with gr.Row():
383
- bq_app_id_value = gr.Textbox(label="App ID (valor exacto)", placeholder="com.tu.app")
384
- bq_start = gr.Textbox(label="Fecha desde (YYYY-MM-DD)", placeholder="YYYY-MM-DD")
385
- bq_end = gr.Textbox(label="Fecha hasta (YYYY-MM-DD)", placeholder="YYYY-MM-DD")
386
-
387
- gr.Markdown("**Paso MMP-BQ 2**: Obtener columnas (schema)")
388
- with gr.Row():
389
- bq_time_col = gr.Dropdown(choices=[], label="Columna temporal (ej: event_time)")
390
- mmp_event_col_bq = gr.Dropdown(choices=[], label="Columna de EVENTO en MMP (ej: event_name)")
391
- id_mmp_col_bq = gr.Dropdown(choices=[], label="ID en MMP (para cruce) (ej: appsflyer_id)")
392
- bq_app_id_col = gr.Dropdown(choices=[], label="Columna App ID (ej: app_id)")
393
- bq_schema_btn = gr.Button("Obtener columnas (schema)")
394
- bq_schema_msg = gr.Markdown()
395
-
396
- gr.Markdown("**Paso MMP-BQ 3**: Listar eventos por rango")
397
- mmp_events_bq = gr.CheckboxGroup(choices=[], label="Eventos detectados (BigQuery)")
398
- bq_events_btn = gr.Button("Listar eventos por rango (BigQuery)")
399
- bq_events_msg = gr.Markdown()
400
-
401
- gr.Markdown("**Paso MMP-BQ 4**: Consultar y cargar MMP")
402
- mmp_preview_bq = gr.Dataframe(label="Preview MMP (BQ)", interactive=False)
403
- mmp_bq_download = gr.File(label="Descargar MMP (resultado de BigQuery)", interactive=False)
404
- mmp_final_path_bq = gr.Textbox(label="Ruta MMP final (temporal BQ)", visible=False)
405
- bq_query_btn = gr.Button("Consultar y cargar MMP (BigQuery)")
406
- bq_query_msg = gr.Markdown()
407
-
408
- # --- File Panel (simplificado) ---
409
- with gr.Column(visible=True) as file_panel:
410
- gr.Markdown("**Paso MMP-Archivo 1**: Subir y detectar columnas")
411
- mmp_file = gr.File(label="Subí MMP.xlsx/csv", file_types=[".xlsx", ".csv"])
412
- with gr.Row():
413
- file_time_col = gr.Dropdown(choices=[], label="Columna temporal (archivo)")
414
- mmp_event_col_file = gr.Dropdown(choices=[], label="Columna de EVENTO (archivo)")
415
- id_mmp_col_file = gr.Dropdown(choices=[], label="ID en MMP (archivo)")
416
- file_app_id_col = gr.Dropdown(choices=[], label="Columna App ID (archivo)")
417
- file_schema_btn = gr.Button("Obtener columnas (archivo)")
418
- file_schema_msg = gr.Markdown()
419
-
420
- gr.Markdown("**Paso MMP-Archivo 2**: (opcional) Listar eventos del archivo y filtrar")
421
- mmp_events_file = gr.CheckboxGroup(choices=[], label="Eventos detectados (archivo)")
422
- file_events_btn = gr.Button("Listar eventos (archivo)")
423
- file_events_msg = gr.Markdown()
424
-
425
- gr.Markdown("**Paso MMP-Archivo 3**: Cargar & previsualizar")
426
- mmp_preview_file = gr.Dataframe(label="Preview MMP (archivo)", interactive=False)
427
- mmp_file_download = gr.File(label="Descargar MMP (archivo filtrado)", interactive=False)
428
- mmp_final_path_file = gr.Textbox(label="Ruta MMP final (temporal archivo)", visible=False)
429
- file_query_btn = gr.Button("Cargar MMP (archivo)")
430
- file_query_msg = gr.Markdown()
431
-
432
- # Toggle panels
433
- def _toggle_source(src):
434
- return (gr.update(visible=(src=="Subir archivo")), gr.update(visible=(src=="BigQuery")))
435
- mmp_source.change(_toggle_source, inputs=[mmp_source], outputs=[file_panel, bq_panel])
436
-
437
- # BQ: schema
438
- def _bq_schema_fixed():
439
- try:
440
- cols, t_guess, e_guess, id_guess, appid_guess = bq_get_columns_fixed()
441
- return (gr.update(choices=cols, value=t_guess),
442
- gr.update(choices=cols, value=e_guess),
443
- gr.update(choices=cols, value=id_guess),
444
- gr.update(choices=cols, value=appid_guess),
445
- "Schema cargado (tabla fija BQ).")
446
- except Exception as e:
447
- return (gr.update(choices=[], value=None),
448
- gr.update(choices=[], value=None),
449
- gr.update(choices=[], value=None),
450
- gr.update(choices=[], value=None),
451
- f"Error schema: {e}")
452
- bq_schema_btn.click(_bq_schema_fixed, inputs=[], outputs=[bq_time_col, mmp_event_col_bq, id_mmp_col_bq, bq_app_id_col, bq_schema_msg])
453
-
454
- # BQ: listar eventos
455
- def _bq_list_events_fixed(ev_col, t_col, app_col, app_val, ds, de):
456
- try:
457
- vals, msg = bq_list_events_fixed(ev_col, t_col, app_col, app_val, ds, de)
458
- return gr.update(choices=vals, value=vals), msg
459
- except Exception as e:
460
- return gr.update(choices=[], value=[]), f"Error al listar eventos: {e}"
461
- bq_events_btn.click(_bq_list_events_fixed, inputs=[mmp_event_col_bq, bq_time_col, bq_app_id_col, bq_app_id_value, bq_start, bq_end], outputs=[mmp_events_bq, bq_events_msg])
462
-
463
- # BQ: query final
464
- def _bq_query_fixed(ev_col, t_col, app_col, app_val, ds, de, evs):
465
- try:
466
- path, preview_rows = bq_query_to_temp_fixed(ev_col, t_col, app_col, app_val, ds, de, evs or [])
467
- preview_df = pd.DataFrame(preview_rows)
468
- file_path = _safe_file_output(path)
469
- return preview_df, file_path, path, "OK: MMP desde BigQuery cargado."
470
- except Exception as e:
471
- return gr.update(), None, "", f"Error consulta BQ: {e}"
472
- bq_query_btn.click(_bq_query_fixed, inputs=[mmp_event_col_bq, bq_time_col, bq_app_id_col, bq_app_id_value, bq_start, bq_end, mmp_events_bq], outputs=[mmp_preview_bq, mmp_bq_download, mmp_final_path_bq, bq_query_msg])
473
-
474
- # File: schema & events
475
- file_schema_btn.click(file_mmp_schema, inputs=[mmp_file], outputs=[file_time_col, mmp_event_col_file, id_mmp_col_file, file_app_id_col, file_schema_msg])
476
- file_events_btn.click(file_mmp_list_events_simple, inputs=[mmp_file, mmp_event_col_file], outputs=[mmp_events_file, file_events_msg])
477
-
478
- # File: final
479
- def _file_query(src_file, ev_col, evs):
480
- try:
481
- path, preview = file_prepare(src_file, ev_col, evs or [])
482
- file_path = _safe_file_output(path)
483
- return preview, file_path, path, "OK: MMP desde archivo cargado."
484
- except Exception as e:
485
- return gr.update(), None, "", f"Error archivo MMP: {e}"
486
- file_query_btn.click(_file_query, inputs=[mmp_file, mmp_event_col_file, mmp_events_file], outputs=[mmp_preview_file, mmp_file_download, mmp_final_path_file, file_query_msg])
487
-
488
- # ===== CLIENTE =====
489
- gr.Markdown("## Fuente 2: CLIENTE")
490
  with gr.Row():
491
- cliente_file = gr.File(label="CLIENTE.xlsx/csv", file_types=[".xlsx", ".csv"])
492
- map_cliente_btn = gr.Button("Obtener mapeo de columnas (CLIENTE)")
 
493
  with gr.Row():
494
  id_cliente_col = gr.Dropdown(choices=[], label="ID en CLIENTE (para cruce)")
495
- validation_col_client = gr.Dropdown(choices=[], value=None, label="Columna de validación (CLIENTE) — opcional")
 
496
  with gr.Row():
497
- metric_col_client = gr.Dropdown(choices=[], value=None, label="Columna de métrica (CLIENTE) opcional")
498
- client_event_col = gr.Dropdown(choices=[], label="Columna de EVENTO (CLIENTE)")
499
- cliente_msg = gr.Markdown()
500
- map_cliente_btn.click(cliente_map_columns, inputs=[cliente_file], outputs=[id_cliente_col, validation_col_client, metric_col_client, client_event_col, cliente_msg])
501
-
502
- gr.Markdown("### Opcional: valores de validación")
503
- valid_vals = gr.CheckboxGroup(choices=[], label="Valores que significan VALIDADO (CLIENTE)")
504
- load_valid_btn = gr.Button("Cargar valores de validación (CLIENTE)")
505
- valid_msg = gr.Markdown()
506
- load_valid_btn.click(load_validation_values, inputs=[cliente_file, validation_col_client], outputs=[valid_vals, valid_msg])
507
-
508
- # ===== Generar =====
509
- gr.Markdown("## Generar tablas y Excel")
510
- run_btn = gr.Button("Generar tablas")
511
- preview_out = gr.Dataframe(label="Preview: primera tabla por EVENTO", interactive=False)
512
- xls_file = gr.File(label="Descargar Excel (tablas_por_EVENTO + raw_merge)", interactive=False)
513
- gen_msg = gr.Markdown()
514
-
515
- def _compute_router(cliente,
516
- source,
517
- mmp_final_file_panel, mmp_final_bq_panel,
518
- id_cli, id_mmp_file, id_mmp_bq,
519
- val_col, val_vals,
520
- metric_cli, cli_evt,
521
- mmp_evt_file, mmp_evt_bq,
522
- events_file, events_bq):
523
- if source == "Subir archivo":
524
- mmp_path = mmp_final_file_panel
525
- id_mmp = id_mmp_file
526
- mmp_evt_col = mmp_evt_file
527
- selected_events = events_file
528
- else:
529
- mmp_path = mmp_final_bq_panel
530
- id_mmp = id_mmp_bq
531
- mmp_evt_col = mmp_evt_bq
532
- selected_events = events_bq
533
-
534
- return compute(cliente, mmp_path,
535
- id_cli, id_mmp,
536
- val_col, val_vals,
537
- metric_cli,
538
- cli_evt,
539
- mmp_evt_col,
540
- selected_events)
541
 
542
  run_btn.click(
543
- _compute_router,
544
- inputs=[cliente_file,
545
- mmp_source,
546
- mmp_final_path_file, mmp_final_path_bq,
547
- id_cliente_col, id_mmp_col_file, id_mmp_col_bq,
548
- validation_col_client, valid_vals,
549
- metric_col_client, client_event_col,
550
- mmp_event_col_file, mmp_event_col_bq,
551
- mmp_events_file, mmp_events_bq],
552
- outputs=[preview_out, xls_file, gen_msg]
553
  )
554
 
555
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  import pandas as pd
3
  from io import BytesIO
4
+ import os
5
  import tempfile
6
+
7
+ from pandas.core.indexes.base import F
8
+
9
+ APP_TITLE = "Cruce CLIENTE × LINKTRUST por MODELO (CLIENTE) y AFFILIATE (LINKTRUST)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  APP_DESC = """
11
+ 1) Subí **CLIENTE** y **LINKTRUST** (xlsx/csv).
12
+ 2) Elegí columnas de **ID** para cruce.
13
+ 3) Elegí **AFFILIATE en LINKTRUST** (filas) y **MODELO en CLIENTE** (una tabla por MODELO).
14
+ 4) Elegí **columna de validación (CLIENTE)** y cargá los **valores que significan VALIDADO**.
15
+ 5) Generá tablas: por cada **MODELO** se crea una tabla con filas **AFFILIATE** y columnas **Cliente, LT, %**, donde **% = (Cliente / LT) × 100** (1 decimal).
16
+ La **Hoja 1** del Excel contiene todas las tablas apiladas por MODELO. La **Hoja 2** contiene la raw data del cruce (pre-filtro).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  """
18
 
 
19
  def _read_excel(pathlike):
20
  return pd.read_excel(pathlike, engine="openpyxl")
21
 
 
25
  except Exception:
26
  return pd.read_csv(pathlike, sep=None, engine="python", on_bad_lines="skip", encoding="latin-1")
27
 
28
+ def _safe_read(fileobj):
29
+ if fileobj is None:
30
  return None
31
+ path = fileobj.name if hasattr(fileobj, "name") else fileobj
32
  ext = os.path.splitext(str(path))[-1].lower()
33
  try:
34
  if ext in [".xlsx", ".xlsm", ".xltx", ".xltm"]:
 
46
  except Exception as e:
47
  raise RuntimeError(f"No se pudo leer '{os.path.basename(str(path))}': {e}")
48
 
49
+ def _resolve_merged_col(merged, base_name, prefer_suffix=None):
50
+ """Devuelve el nombre real de la columna en merged (maneja _CLIENTE/_LINKTRUST y case-insensitive)."""
51
+ if base_name in merged.columns:
52
+ return base_name
53
+ if prefer_suffix and f"{base_name}{prefer_suffix}" in merged.columns:
54
+ return f"{base_name}{prefer_suffix}"
55
+ for suf in ["_CLIENTE", "_LINKTRUST"]:
56
+ cand = f"{base_name}{suf}"
57
+ if cand in merged.columns:
58
+ return cand
59
+ lower_map = {c.lower(): c for c in merged.columns}
60
+ return lower_map.get(base_name.lower(), None)
61
+
62
+ def load_columns(cliente_file, linktrust_file):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  try:
64
+ df_c = _safe_read(cliente_file) if cliente_file else None
65
+ df_l = _safe_read(linktrust_file) if linktrust_file else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  except Exception as e:
67
+ return (gr.update(), gr.update(), gr.update(), gr.update(),
68
+ gr.update(), gr.update(), "Error al leer archivos: " + str(e))
69
+
70
+ cliente_cols = list(df_c.columns) if df_c is not None else []
71
+ linktrust_cols = list(df_l.columns) if df_l is not None else []
72
+
73
+ def _guess(cols, candidates):
74
+ # case-insensitive first match
75
+ lower_map = {c.lower(): c for c in cols}
76
+ for cand in candidates:
77
+ if cand.lower() in lower_map:
78
+ return lower_map[cand.lower()]
79
+ return cols[0] if cols else None
80
+
81
+ guess_id_c = _guess(cliente_cols, ["Transaction Id", "ID", "Id"])
82
+ guess_id_l = _guess(linktrust_cols, ["Transaction Id", "ID", "Id"])
83
+ guess_aff = _guess(linktrust_cols, ["AffiliateId", "Affiliate Id", "AFFILIATEID", "affiliate_id"])
84
+ guess_modelo = _guess(cliente_cols, ["MODELO", "Modelo", "Model", "model"])
85
+
86
+ return (
87
+ gr.update(choices=cliente_cols, value=guess_id_c), # id_cliente_col
88
+ gr.update(choices=linktrust_cols, value=guess_id_l), # id_linktrust_col
89
+ gr.update(choices=linktrust_cols, value=guess_aff), # affiliate_col (LINKTRUST)
90
+ gr.update(choices=cliente_cols, value=guess_modelo), # modelo_col (CLIENTE)
91
+ gr.update(choices=cliente_cols, value=None), # validation_col (CLIENTE)
92
+ gr.update(choices=[], value=[]), # validation values
93
+ "Listo: columnas cargadas."
94
+ )
95
 
96
  def load_validation_values(cliente_file, validation_col):
97
  try:
98
  df_c = _safe_read(cliente_file) if cliente_file else None
99
  except Exception as e:
100
  return gr.update(choices=[], value=[]), f"Error al leer CLIENTE: {e}"
101
+
102
  if df_c is None or not validation_col or validation_col not in df_c.columns:
103
+ return gr.update(choices=[], value=[]), "Subí CLIENTE y elegí la columna de validación (CLIENTE)."
104
+ vals = sorted(pd.Series(df_c[validation_col].unique(), dtype="object").astype(str).fillna(""))
105
+ return gr.update(choices=vals, value=[]), f"{len(vals)} valores posibles encontrados."
106
+
107
+ def _prepare_tables(df_merged, affiliate_col_merged, modelo_col_merged,
108
+ affiliate_display_name, modelo_display_name,
109
+ validation_col_in_merged, valid_values):
110
+ # LT = todos los matches (pre-filtro) por (AFFILIATE, MODELO)
111
+ lt_counts = (
112
+ df_merged
113
+ .groupby([affiliate_col_merged, modelo_col_merged], dropna=False)
114
+ .size()
115
+ .rename("LT")
116
+ .reset_index()
117
+ )
118
+
119
+ # Cliente = sólo filas validadas según CLIENTE
120
+ if valid_values:
121
+ mask = df_merged[validation_col_in_merged].astype(str).isin([str(v) for v in valid_values])
122
+ df_valid = df_merged[mask].copy()
123
+ else:
124
+ df_valid = df_merged.iloc[0:0].copy()
125
+
126
+ cliente_counts = (
127
+ df_valid
128
+ .groupby([affiliate_col_merged, modelo_col_merged], dropna=False)
129
+ .size()
130
+ .rename("Cliente")
131
+ .reset_index()
132
+ )
133
+
134
+ combined = lt_counts.merge(cliente_counts, on=[affiliate_col_merged, modelo_col_merged], how="left").fillna({"Cliente": 0})
135
+ combined["Cliente"] = combined["Cliente"].astype(int)
136
+ combined["LT"] = combined["LT"].astype(int)
137
+ # % = Cliente / LT * 100
138
+ combined["%"] = (combined["Cliente"] / combined["LT"] * 100).round(1)
139
+
140
+ tables_by_modelo = {}
141
+ for modelo_val, sub in combined.groupby(modelo_col_merged, dropna=False):
142
+ sub = sub.rename(columns={affiliate_col_merged: affiliate_display_name, modelo_col_merged: modelo_display_name})
143
+ sub = sub.sort_values(by=[affiliate_display_name]).reset_index(drop=True)
144
+ sub = sub[[affiliate_display_name, "Cliente", "LT", "%"]]
145
+ tot_cliente = int(sub["Cliente"].sum())
146
+ tot_lt = int(sub["LT"].sum())
147
+ tot_pct = round((tot_cliente / tot_lt * 100), 1) if tot_lt else 0.0
148
+ sub = pd.concat([sub, pd.DataFrame([{affiliate_display_name: "Suma total", "Cliente": tot_cliente, "LT": tot_lt, "%": tot_pct}])], ignore_index=True)
149
+ tables_by_modelo[modelo_val] = sub
150
+
151
+ return tables_by_modelo, combined
152
+
153
+ def compute(cliente_file, linktrust_file,
154
+ id_cliente_col, id_linktrust_col,
155
+ affiliate_col, modelo_col,
156
+ validation_col_client, validation_values):
157
+ if not cliente_file or not linktrust_file:
158
+ return None, None, "Faltan archivos."
159
+
160
  try:
161
  df_c = _safe_read(cliente_file)
162
+ df_l = _safe_read(linktrust_file)
163
  except Exception as e:
164
+ return None, None, f"Error al leer archivos: {e}"
165
+
166
+ # Validar columnas de entrada
167
  for name, col, df in [
168
  ("ID CLIENTE", id_cliente_col, df_c),
169
+ ("ID LINKTRUST", id_linktrust_col, df_l),
170
+ ("AFFILIATE (LINKTRUST)", affiliate_col, df_l),
171
+ ("MODELO (CLIENTE)", modelo_col, df_c),
172
  ]:
173
  if not col or col not in df.columns:
174
  return None, None, f"Columna inválida: {name} = {col}"
175
+
176
+ if not validation_col_client or validation_col_client not in df_c.columns:
177
+ return None, None, f"Elegí la columna de validación en CLIENTE."
178
+
179
+ # Merge por IDs
180
  try:
181
+ merged = df_c.merge(
182
+ df_l, left_on=id_cliente_col, right_on=id_linktrust_col, how="inner",
183
+ suffixes=("_CLIENTE", "_LINKTRUST")
184
+ )
185
  except Exception as e:
186
  return None, None, f"Error durante el merge por IDs: {e}"
187
 
188
+ if merged.empty:
189
+ return None, None, "El cruce por IDs no arrojó filas."
190
+
191
+ # Resolver nombres reales en merged
192
+ validation_col_in_merged = validation_col_client if validation_col_client in merged.columns else f"{validation_col_client}_CLIENTE"
193
+ if validation_col_in_merged not in merged.columns:
194
+ return None, None, f"No se encuentra '{validation_col_client}' en merged."
195
+
196
+ affiliate_in_merged = _resolve_merged_col(merged, affiliate_col, prefer_suffix="_LINKTRUST")
197
+ modelo_in_merged = _resolve_merged_col(merged, modelo_col, prefer_suffix="_CLIENTE")
198
+ if affiliate_in_merged is None:
199
+ return None, None, f"No se encuentra la columna AFFILIATE '{affiliate_col}' en merged."
200
+ if modelo_in_merged is None:
201
+ return None, None, f"No se encuentra la columna MODELO '{modelo_col}' en merged."
202
+
203
+ # Columnas internas seguras
204
+ merged["__AFFILIATE__"] = merged[affiliate_in_merged]
205
+ merged["__MODELO__"] = merged[modelo_in_erged] if 'modelo_in_erged' in locals() else merged[modelo_in_merged]
206
+
207
+ # Preparar tablas por MODELO
208
+ try:
209
+ tables_by_modelo, combined_counts = _prepare_tables(
210
+ df_merged=merged,
211
+ affiliate_col_merged="__AFFILIATE__",
212
+ modelo_col_merged="__MODELO__",
213
+ affiliate_display_name=affiliate_col,
214
+ modelo_display_name=modelo_col,
215
+ validation_col_in_merged=validation_col_in_merged,
216
+ valid_values=validation_values or []
217
+ )
218
+ except Exception as e:
219
+ return None, None, f"Error construyendo tablas: {e}"
220
+
221
+ # Excel a archivo temporal
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  xls_bytes = BytesIO()
223
  with pd.ExcelWriter(xls_bytes, engine="xlsxwriter") as writer:
224
+ sheet_name = "tablas_por_MODELO"
225
  start_row = 0
226
+ for modelo_val, table_df in tables_by_modelo.items():
227
+ pd.DataFrame([modelo_val]).to_excel(writer, sheet_name=sheet_name, startrow=start_row, index=False, header=False)
228
  start_row += 1
229
  table_df.to_excel(writer, sheet_name=sheet_name, startrow=start_row, index=False)
230
  start_row += len(table_df) + 2
231
 
232
+ # raw merge con columnas clave primero
233
  cols_keep = []
234
+ for col in [affiliate_in_merged, modelo_in_merged, id_cliente_col, id_linktrust_col, validation_col_in_merged]:
235
+ if col in merged.columns and col not in cols_keep:
236
  cols_keep.append(col)
 
 
 
 
237
  cols_rest = [c for c in merged.columns if c not in cols_keep]
238
  merged[cols_keep + cols_rest].to_excel(writer, sheet_name="raw_merge", index=False)
239
  xls_bytes.seek(0)
240
 
 
241
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
242
+ tmp.write(xls_bytes.getvalue())
243
+ tmp.flush(); tmp.close()
244
  download_path = tmp.name
245
 
246
+ # Preview: primera tabla por MODELO
247
  preview = None
248
+ if tables_by_modelo:
249
+ first_modelo = list(tables_by_modelo.keys())[0]
250
+ preview = tables_by_modelo[first_modelo]
251
 
252
  return preview, download_path, "Listo ✅"
253
 
 
254
  with gr.Blocks(title=APP_TITLE) as demo:
255
  gr.Markdown(f"# {APP_TITLE}\n\n{APP_DESC}")
256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  with gr.Row():
258
+ cliente_file = gr.File(label="CLIENTE.xlsx (o .csv)", file_types=[".xlsx", ".csv"])
259
+ linktrust_file = gr.File(label="LINKTRUST.xlsx (o .csv)", file_types=[".xlsx", ".csv"])
260
+
261
  with gr.Row():
262
  id_cliente_col = gr.Dropdown(choices=[], label="ID en CLIENTE (para cruce)")
263
+ id_linktrust_col = gr.Dropdown(choices=[], label="ID en LINKTRUST (para cruce)")
264
+
265
  with gr.Row():
266
+ affiliate_col = gr.Dropdown(choices=[], label="AFFILIATE en LINKTRUST (filas de cada tabla)")
267
+ modelo_col = gr.Dropdown(choices=[], label="MODELO en CLIENTE (una tabla por MODELO)")
268
+
269
+ with gr.Row():
270
+ validation_col_client = gr.Dropdown(choices=[], label="Columna de validación (CLIENTE)")
271
+ validation_vals = gr.CheckboxGroup(choices=[], label="Valores que significan VALIDADO (CLIENTE)")
272
+
273
+ status = gr.Markdown("Cargá archivos y presioná **Cargar columnas**.")
274
+
275
+ load_btn = gr.Button("Cargar columnas desde archivos")
276
+ load_btn.click(
277
+ load_columns,
278
+ inputs=[cliente_file, linktrust_file],
279
+ outputs=[id_cliente_col, id_linktrust_col, affiliate_col, modelo_col, validation_col_client, validation_vals, status]
280
+ )
281
+
282
+ load_vals_btn = gr.Button("Cargar valores de validación (desde CLIENTE)")
283
+ load_vals_btn.click(
284
+ load_validation_values,
285
+ inputs=[cliente_file, validation_col_client],
286
+ outputs=[validation_vals, status]
287
+ )
288
+
289
+ run_btn = gr.Button("Generar tablas y Excel")
290
+
291
+ preview_out = gr.Dataframe(label="Preview: primera tabla por MODELO", interactive=False)
292
+ xls_file = gr.File(label="Descargar Excel (tablas_por_MODELO + raw_merge)", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
 
294
  run_btn.click(
295
+ compute,
296
+ inputs=[cliente_file, linktrust_file,
297
+ id_cliente_col, id_linktrust_col,
298
+ affiliate_col, modelo_col,
299
+ validation_col_client, validation_vals],
300
+ outputs=[preview_out, xls_file, status]
 
 
 
 
301
  )
302
 
303
  if __name__ == "__main__":