Spaces:
Sleeping
Sleeping
| # app.py | |
| import os | |
| import json | |
| from io import BytesIO | |
| import tempfile | |
| import pandas as pd | |
| import gradio as gr | |
| # ================== BigQuery deps (opcionales) ================== | |
| try: | |
| from google.cloud import bigquery | |
| _HAS_BQ = True | |
| except Exception: | |
| _HAS_BQ = False | |
| try: | |
| import db_dtypes # noqa: F401 | |
| _HAS_DB_DTYPES = True | |
| except Exception: | |
| _HAS_DB_DTYPES = False | |
| APP_TITLE = "Cruce CLIENTE × MMP por EVENTO (archivo o BigQuery)" | |
| APP_DESC = """ | |
| ### Fuente 1: MMP | |
| **BigQuery (tabla única)**: `leadgenios-tech.afiliacion_datalake.daily_afiliate_datalake` | |
| Pasos BQ: | |
| 1) **Listar App IDs (BigQuery)** y seleccionar uno. | |
| 2) Ingresá **rango de fechas** (YYYY-MM-DD). | |
| 3) **Obtener columnas (schema)** → sugiere **columna temporal (event_time)**, **evento (event_name)**, **ID en MMP (appsflyer_id/customer_user_id/advertising_id)** y **App ID** (app_id). | |
| 4) **Listar eventos por rango** (usa App ID + fechas + columna de evento). | |
| 5) **Consultar y cargar MMP** → genera CSV temporal, preview y descarga. | |
| **Archivo**: subir archivo, detectar columnas y (opcional) **listar eventos** para filtrar. No hace falta App ID ni fechas. | |
| ### Fuente 2: CLIENTE | |
| 1) Subir **CLIENTE** → **Obtener mapeo de columnas**. | |
| 2) Elegir **ID en CLIENTE**. | |
| 3) **Columna de validación (opcional)** y **valores** (opcional). | |
| 4) **Columna de métrica (CLIENTE) (opcional)**. | |
| 5) **Columna de EVENTO (CLIENTE) (opcional)**. | |
| ### Final | |
| - Por cada **evento** (de MMP), **Cliente, MMP, %** con `% = Cliente / MMP × 100` (1 decimal). | |
| - Excel: **Hoja 1** tablas por evento; **Hoja 2** `raw_merge`. | |
| """ | |
| # ================== Helpers de lectura ================== | |
| def _read_excel(pathlike): | |
| return pd.read_excel(pathlike, engine="openpyxl") | |
| def _read_csv_with_fallbacks(pathlike): | |
| try: | |
| return pd.read_csv(pathlike, sep=None, engine="python", on_bad_lines="skip", encoding="utf-8") | |
| except Exception: | |
| return pd.read_csv(pathlike, sep=None, engine="python", on_bad_lines="skip", encoding="latin-1") | |
| def _safe_read(fileobj_or_path): | |
| if fileobj_or_path is None or (isinstance(fileobj_or_path, str) and not fileobj_or_path.strip()): | |
| return None | |
| path = fileobj_or_path.name if hasattr(fileobj_or_path, "name") else fileobj_or_path | |
| ext = os.path.splitext(str(path))[-1].lower() | |
| try: | |
| if ext in [".xlsx", ".xlsm", ".xltx", ".xltm"]: | |
| return _read_excel(path) | |
| elif ext == ".csv" or ext == "": | |
| try: | |
| return _read_excel(path) | |
| except Exception: | |
| return _read_csv_with_fallbacks(path) | |
| else: | |
| try: | |
| return _read_excel(path) | |
| except Exception: | |
| return _read_csv_with_fallbacks(path) | |
| except Exception as e: | |
| raise RuntimeError(f"No se pudo leer '{os.path.basename(str(path))}': {e}") | |
| def _guess(cols, candidates): | |
| lower_map = {c.lower(): c for c in cols} | |
| for cand in candidates: | |
| if cand.lower() in lower_map: | |
| return lower_map[cand.lower()] | |
| return cols[0] if cols else None | |
| def _guess_optional(cols, candidates): | |
| lower_map = {c.lower(): c for c in cols} | |
| for cand in candidates: | |
| if cand.lower() in lower_map: | |
| return lower_map[cand.lower()] | |
| return None | |
| def _safe_file_output(path): | |
| if path and isinstance(path, str) and os.path.isfile(path): | |
| return path | |
| return None | |
| # ================== Normalización de IDs ================== | |
| def normalize_id_series(s: pd.Series) -> pd.Series: | |
| """ | |
| Normaliza IDs para merges: | |
| - Convierte a string, quita espacios. | |
| - Si es float 'entero' (123.0) lo transforma a '123'. | |
| - Deja NaN como NaN. | |
| """ | |
| def _norm(v): | |
| if pd.isna(v): | |
| return pd.NA | |
| # floats que representan enteros → sin .0 | |
| if isinstance(v, float): | |
| if v.is_integer(): | |
| return str(int(v)) | |
| else: | |
| # si es float no entero, lo pasamos a string tal cual | |
| return str(v) | |
| # todo lo demás a str | |
| vs = str(v).strip() | |
| # si quedó como "nan" literal, considerar NA | |
| if vs.lower() in ("nan", "none", ""): | |
| return pd.NA | |
| return vs | |
| out = s.map(_norm) | |
| # asegura dtype string que permite NA | |
| return out.astype("string") | |
| # ================== BigQuery helpers ================== | |
| BQ_PROJECT = "leadgenios-tech" | |
| BQ_TABLE_FQN = "leadgenios-tech.afiliacion_datalake.daily_afiliate_datalake" | |
| def _need_bq_client(): | |
| """ | |
| Producción (Hugging Face): usa el secret GCP_SA_JSON (contenido del JSON de la service account). | |
| Local: si no hay GCP_SA_JSON, usa GOOGLE_APPLICATION_CREDENTIALS como fallback. | |
| """ | |
| if not _HAS_BQ: | |
| raise RuntimeError("Falta dependencia 'google-cloud-bigquery'.") | |
| sa_json = os.getenv("GCP_SA_JSON") | |
| if sa_json: | |
| try: | |
| from google.oauth2 import service_account | |
| except Exception as e: | |
| raise RuntimeError(f"No se pudo importar google.oauth2.service_account: {e}") | |
| try: | |
| info = json.loads(sa_json) | |
| creds = service_account.Credentials.from_service_account_info(info) | |
| project = info.get("project_id") or BQ_PROJECT | |
| return bigquery.Client(project=project, credentials=creds) | |
| except Exception as e: | |
| raise RuntimeError(f"GCP_SA_JSON inválido o no utilizable: {e}") | |
| if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"): | |
| try: | |
| return bigquery.Client(project=BQ_PROJECT) | |
| except Exception as e: | |
| raise RuntimeError(f"Error creando cliente BQ con GOOGLE_APPLICATION_CREDENTIALS: {e}") | |
| raise RuntimeError("No hay credenciales: seteá GCP_SA_JSON (prod) o GOOGLE_APPLICATION_CREDENTIALS (local).") | |
| def bq_get_columns_fixed(): | |
| client = _need_bq_client() | |
| table = client.get_table(BQ_TABLE_FQN) | |
| cols = [sch.name for sch in table.schema] | |
| time_guess = _guess(cols, ["event_time", "install_time", "attributed_touch_time"]) | |
| event_guess = _guess(cols, ["event_name"]) | |
| # IDs típicos | |
| id_guess = _guess(cols, ["appsflyer_id", "customer_user_id", "advertising_id"]) | |
| appid_guess = _guess(cols, ["app_id"]) | |
| return cols, time_guess, event_guess, id_guess, appid_guess | |
| def bq_list_app_ids(limit=500): | |
| """Lista App IDs de la tabla BQ para el dropdown.""" | |
| client = _need_bq_client() | |
| sql = f""" | |
| SELECT DISTINCT CAST(app_id AS STRING) AS app_id | |
| FROM `{BQ_TABLE_FQN}` | |
| WHERE app_id IS NOT NULL AND app_id <> '' | |
| ORDER BY app_id | |
| LIMIT {int(limit)} | |
| """ | |
| df = client.query(sql).result().to_dataframe(create_bqstorage_client=False) | |
| vals = sorted(df["app_id"].dropna().astype(str).tolist()) | |
| return vals, f"{len(vals)} App IDs encontrados." | |
| def bq_list_events_fixed(event_col, time_col, app_id_col, app_id_value, start_date, end_date, limit=500): | |
| client = _need_bq_client() | |
| cols, t_guess, e_guess, _, a_guess = bq_get_columns_fixed() | |
| event_col = event_col or e_guess | |
| time_col = time_col or t_guess | |
| app_id_col = app_id_col or a_guess | |
| if not (event_col and time_col and app_id_col and app_id_value and start_date and end_date): | |
| return [], "Completá App ID, fechas y columnas (evento/fecha/App ID)." | |
| sql = f""" | |
| SELECT DISTINCT CAST({event_col} AS STRING) AS ev | |
| FROM `{BQ_TABLE_FQN}` | |
| WHERE DATE({time_col}) BETWEEN @sd AND @ed | |
| AND CAST({app_id_col} AS STRING) = @app_id | |
| ORDER BY ev | |
| LIMIT {int(limit)} | |
| """ | |
| job = client.query(sql, job_config=bigquery.QueryJobConfig( | |
| query_parameters=[ | |
| bigquery.ScalarQueryParameter("sd", "DATE", str(start_date)), | |
| bigquery.ScalarQueryParameter("ed", "DATE", str(end_date)), | |
| bigquery.ScalarQueryParameter("app_id", "STRING", str(app_id_value).strip()), | |
| ] | |
| )) | |
| df = job.result().to_dataframe(create_bqstorage_client=False) | |
| return sorted(df["ev"].dropna().astype(str).tolist()), f"{len(df)} eventos encontrados." | |
| def bq_query_to_temp_fixed(event_col, time_col, app_id_col, app_id_value, start_date, end_date, selected_events): | |
| client = _need_bq_client() | |
| cols, t_guess, e_guess, _, a_guess = bq_get_columns_fixed() | |
| event_col = event_col or e_guess | |
| time_col = time_col or t_guess | |
| app_id_col = app_id_col or a_guess | |
| if not (event_col and time_col and app_id_col and app_id_value and start_date and end_date): | |
| raise RuntimeError("Completá App ID, fechas y columnas (evento/fecha/App ID).") | |
| params = [ | |
| bigquery.ScalarQueryParameter("sd", "DATE", str(start_date)), | |
| bigquery.ScalarQueryParameter("ed", "DATE", str(end_date)), | |
| bigquery.ScalarQueryParameter("app_id", "STRING", str(app_id_value).strip()), | |
| ] | |
| ev_filter = "" | |
| if selected_events: | |
| params.append(bigquery.ArrayQueryParameter("events", "STRING", [str(v) for v in selected_events])) | |
| ev_filter = f"AND CAST({event_col} AS STRING) IN UNNEST(@events)" | |
| sql = f""" | |
| SELECT * | |
| FROM `{BQ_TABLE_FQN}` | |
| WHERE DATE({time_col}) BETWEEN @sd AND @ed | |
| AND CAST({app_id_col} AS STRING) = @app_id | |
| {ev_filter} | |
| """ | |
| job = client.query(sql, job_config=bigquery.QueryJobConfig(query_parameters=params)) | |
| df = job.result().to_dataframe(create_bqstorage_client=False) | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv") | |
| df.to_csv(tmp.name, index=False) | |
| return tmp.name, df.head(20).to_dict(orient="records") | |
| # ================== MMP por archivo ================== | |
| def file_mmp_schema(file): | |
| try: | |
| df = _safe_read(file) | |
| except Exception as e: | |
| return (gr.update(), gr.update(), gr.update(), gr.update(), f"Error al leer MMP: {e}") | |
| cols = list(df.columns) | |
| event_guess = _guess(cols, ["event_name", "Event Name", "evento", "EVENTO", "Event"]) | |
| id_guess = _guess(cols, ["appsflyer_id", "customer_user_id", "advertising_id", | |
| "Advertising ID", "adid", "idfa", "ID", "Id"]) | |
| time_guess = _guess_optional(cols, ["event_time", "install_time", "attributed_touch_time", | |
| "event_date", "timestamp", "date", "Date", "Event Time"]) | |
| appid_guess = _guess_optional(cols, ["app_id", "bundle_id", "app", "appId", "App ID"]) | |
| return (gr.update(choices=cols, value=time_guess), | |
| gr.update(choices=cols, value=event_guess), | |
| gr.update(choices=cols, value=id_guess), | |
| gr.update(choices=cols, value=appid_guess), | |
| "Columnas detectadas (archivo MMP).") | |
| def file_mmp_list_events_simple(file, event_col): | |
| try: | |
| df = _safe_read(file) | |
| except Exception as e: | |
| return gr.update(choices=[], value=[]), f"Error al leer MMP: {e}" | |
| if not event_col or event_col not in df.columns: | |
| return gr.update(choices=[], value=[]), "Elegí la columna de evento (archivo MMP)." | |
| vals = sorted(pd.Series(df[event_col].astype(str).unique()).dropna().tolist()) | |
| return gr.update(choices=vals, value=vals), f"{len(vals)} eventos detectados (archivo MMP)." | |
| def file_prepare(src_file, ev_col, selected_events): | |
| try: | |
| df = _safe_read(src_file) | |
| if selected_events: | |
| df = df[df[ev_col].astype(str).isin([str(v) for v in selected_events])] | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv") | |
| df.to_csv(tmp.name, index=False) | |
| return tmp.name, df.head(20) | |
| except Exception as e: | |
| raise RuntimeError(f"Error al preparar MMP (archivo): {e}") | |
| # ================== CLIENTE helpers ================== | |
| def cliente_map_columns(cliente_file): | |
| try: | |
| df = _safe_read(cliente_file) | |
| except Exception as e: | |
| return (gr.update(), gr.update(), gr.update(), gr.update(), "Error al leer CLIENTE: "+str(e)) | |
| cols = list(df.columns) | |
| id_guess = _guess(cols, [ | |
| "appsflyer_id","customer_user_id","advertising_id", | |
| "Advertising ID","user_id","User Id", | |
| "transaction_id","Transaction Id","ID","Id","rut" | |
| ]) | |
| valid_guess = None | |
| metric_guess = _guess_optional(cols, ["revenue","amount","value","ticket","Event Revenue","importe","monto"]) | |
| event_guess = _guess_optional(cols, ["event_name","Event Name","evento","EVENTO","Event"]) | |
| return (gr.update(choices=cols, value=id_guess), | |
| gr.update(choices=cols, value=valid_guess), | |
| gr.update(choices=cols, value=metric_guess), | |
| gr.update(choices=cols, value=event_guess), | |
| "Columnas de CLIENTE listas.") | |
| def load_validation_values(cliente_file, validation_col): | |
| try: | |
| df_c = _safe_read(cliente_file) if cliente_file else None | |
| except Exception as e: | |
| return gr.update(choices=[], value=[]), f"Error al leer CLIENTE: {e}" | |
| if df_c is None or not validation_col or validation_col not in df_c.columns: | |
| return gr.update(choices=[], value=[]), "Omitido: sin columna de validación (se usará cruce de IDs)." | |
| vals = sorted(pd.Series(df_c[validation_col].astype(str).unique()).dropna().tolist()) | |
| return gr.update(choices=vals, value=[]), f"{len(vals)} valores posibles de validación." | |
| # ================== Compute ================== | |
| from io import BytesIO | |
| import tempfile | |
| import re | |
| import pandas as pd | |
| # --- helpers --------------------------------------------------------- | |
| def normalize_id_series(s: pd.Series) -> pd.Series: | |
| """ | |
| Normalize IDs for robust equality: | |
| - cast to string | |
| - strip whitespace | |
| - lowercase | |
| - convert 'nan'/'none' to '' | |
| """ | |
| x = s.astype(str).str.strip().str.lower() | |
| x = x.replace({"nan": "", "none": ""}) | |
| return x.fillna("") | |
| def _autodetect_validation_col(cols): | |
| """Try to find a likely validation column if user didn't pick one.""" | |
| candidates = [ | |
| "valid", "valido", "válido", "is_valid", "usable", "status", | |
| "approved", "aprobado", "ok", "flag", "validated", "validation" | |
| ] | |
| lower = {c.lower(): c for c in cols} | |
| for cand in candidates: | |
| if cand in lower: | |
| return lower[cand] | |
| return None | |
| def _default_truthy_set(): | |
| # NOTE: all lowercased string checks | |
| return { | |
| "true", "1", "yes", "y", "ok", "si", "sí", | |
| "valid", "valido", "válido", "usable", "approved", "aprobado", | |
| "x", "t" | |
| } | |
| # --- main ------------------------------------------------------------ | |
| def compute(cliente_file, mmp_final_path, | |
| id_cliente_col, id_mmp_col, | |
| validation_col_client, validation_values, # optional | |
| metric_col_client, # ignored in this logic | |
| client_event_col, # ignored (denominator is MMP) | |
| mmp_event_col, # required | |
| selected_events_mmp): | |
| if not mmp_final_path: | |
| return None, None, "Primero completá la fuente MMP." | |
| if not cliente_file: | |
| return None, None, "Subí CLIENTE y mapeá las columnas." | |
| # Read sources | |
| try: | |
| df_c = _safe_read(cliente_file) | |
| df_m = _safe_read(mmp_final_path) | |
| except Exception as e: | |
| return None, None, f"Error al leer fuentes: {e}" | |
| # Required columns present? | |
| for name, col, df in [ | |
| ("ID en CLIENTE", id_cliente_col, df_c), | |
| ("ID en MMP", id_mmp_col, df_m), | |
| ("EVENTO en MMP", mmp_event_col, df_m), | |
| ]: | |
| if not col or col not in df.columns: | |
| return None, None, f"Columna inválida: {name} = {col}" | |
| # Normalize IDs | |
| try: | |
| ids_cli_norm = normalize_id_series(df_c[id_cliente_col]) | |
| ids_mmp_norm = normalize_id_series(df_m[id_mmp_col]) | |
| except Exception as e: | |
| return None, None, f"Error normalizando IDs: {e}" | |
| # If user didn't select a validation col, try to autodetect one | |
| if not validation_col_client or validation_col_client not in df_c.columns: | |
| auto_val_col = _autodetect_validation_col(df_c.columns) | |
| validation_col_client = auto_val_col if auto_val_col else None | |
| # If a validation column exists but user didn't pick values, use default “truthy” set | |
| truthy = _default_truthy_set() | |
| use_validation = validation_col_client is not None | |
| if use_validation: | |
| cand_vals = validation_values or [] | |
| if cand_vals: | |
| truthy = {str(v).strip().lower() for v in cand_vals} | |
| # Build set of CLIENTE IDs that are considered valid | |
| try: | |
| if use_validation: | |
| val_series = df_c[validation_col_client].astype(str).str.strip().str.lower() | |
| mask_valid = val_series.isin(truthy) | |
| valid_client_ids = set(ids_cli_norm[mask_valid][ids_cli_norm[mask_valid] != ""]) | |
| else: | |
| # No validation column → any presence in CLIENTE counts as valid | |
| valid_client_ids = set(ids_cli_norm[ids_cli_norm != ""]) | |
| except Exception as e: | |
| return None, None, f"Error aplicando validación en CLIENTE: {e}" | |
| # Create VALIDO flag in MMP: True if MMP id ∈ valid_client_ids | |
| df_m = df_m.copy() | |
| df_m["VALIDO"] = ids_mmp_norm.isin(valid_client_ids) | |
| # Events to process (if none selected, use all present in MMP) | |
| if not selected_events_mmp: | |
| try: | |
| selected_events_mmp = ( | |
| df_m[mmp_event_col].astype(str).dropna().unique().tolist() | |
| ) | |
| selected_events_mmp = sorted(map(str, selected_events_mmp)) | |
| except Exception as e: | |
| return None, None, f"Error obteniendo lista de eventos MMP: {e}" | |
| # B: total rows in MMP per event | |
| mmp_counts = df_m.groupby(df_m[mmp_event_col].astype(str), dropna=False).size() | |
| # A: total rows in MMP per event with VALIDO=True | |
| cliente_counts = ( | |
| df_m[df_m["VALIDO"]] | |
| .groupby(df_m.loc[df_m["VALIDO"], mmp_event_col].astype(str), dropna=False) | |
| .size() | |
| ) | |
| # Build event tables | |
| tables_by_event = {} | |
| for ev in selected_events_mmp: | |
| ev_str = str(ev) | |
| B = int(mmp_counts.get(ev_str, 0)) | |
| A = int(cliente_counts.get(ev_str, 0)) | |
| pct = round((A / B * 100), 1) if B else 0.0 | |
| tables_by_event[ev] = pd.DataFrame([{"Cliente": A, "MMP": B, "%": pct}]) | |
| # ===== Excel output ===== | |
| xls_bytes = BytesIO() | |
| with pd.ExcelWriter(xls_bytes, engine="xlsxwriter") as writer: | |
| # Sheet 1: tables by EVENT | |
| sheet_name = "tablas_por_EVENTO" | |
| start_row = 0 | |
| for ev, table_df in tables_by_event.items(): | |
| pd.DataFrame([[ev]]).to_excel( | |
| writer, sheet_name=sheet_name, startrow=start_row, | |
| index=False, header=False | |
| ) | |
| start_row += 1 | |
| table_df.to_excel( | |
| writer, sheet_name=sheet_name, startrow=start_row, index=False | |
| ) | |
| start_row += len(table_df) + 2 | |
| # Sheet 2: raw MMP + only VALIDO (explicitly drop the ID columns) | |
| cols_front = ["VALIDO"] # first column | |
| # Keep event column visible & useful | |
| if mmp_event_col in df_m.columns: | |
| cols_front.insert(0, mmp_event_col) | |
| # Exclude ID & any helper columns from raw output | |
| drop_cols = {id_mmp_col, "_id_norm_mmp"} # (we never created _id_norm_mmp here) | |
| cols_rest = [c for c in df_m.columns if c not in set(cols_front) | drop_cols] | |
| df_m[cols_front + cols_rest].to_excel(writer, sheet_name="raw_mmp", index=False) | |
| xls_bytes.seek(0) | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") | |
| tmp.write(xls_bytes.getvalue()); tmp.flush(); tmp.close() | |
| download_path = tmp.name | |
| # Preview: first event table | |
| preview = None | |
| if tables_by_event: | |
| first_ev = list(tables_by_event.keys())[0] | |
| preview = tables_by_event[first_ev] | |
| return preview, download_path, "Listo ✅" | |
| # ================== UI ================== | |
| with gr.Blocks(title=APP_TITLE) as demo: | |
| gr.Markdown(f"# {APP_TITLE}\n\n{APP_DESC}") | |
| # ===== MMP: Selección de fuente ===== | |
| gr.Markdown("## Fuente 1: MMP") | |
| mmp_source = gr.Radio(choices=["Subir archivo", "BigQuery"], value="Subir archivo", label="Fuente de MMP") | |
| # --- BigQuery Panel --- | |
| with gr.Column(visible=False) as bq_panel: | |
| gr.Markdown("**Paso MMP-BQ 1**: App ID y Fechas") | |
| with gr.Row(): | |
| bq_app_id_value = gr.Dropdown(choices=[], value=None, label="App ID (BigQuery)") | |
| list_app_ids_btn = gr.Button("Listar App IDs (BigQuery)") | |
| list_app_ids_msg = gr.Markdown() | |
| with gr.Row(): | |
| bq_start = gr.Textbox(label="Fecha desde (YYYY-MM-DD)", placeholder="YYYY-MM-DD") | |
| bq_end = gr.Textbox(label="Fecha hasta (YYYY-MM-DD)", placeholder="YYYY-MM-DD") | |
| gr.Markdown("**Paso MMP-BQ 2**: Obtener columnas (schema)") | |
| with gr.Row(): | |
| bq_time_col = gr.Dropdown(choices=[], value=None, label="Columna temporal (ej: event_time)") | |
| mmp_event_col_bq = gr.Dropdown(choices=[], value=None, label="Columna de EVENTO en MMP (ej: event_name)") | |
| id_mmp_col_bq = gr.Dropdown(choices=[], value=None, label="ID en MMP (para cruce)") | |
| bq_app_id_col = gr.Dropdown(choices=[], value=None, label="Columna App ID (ej: app_id)") | |
| bq_schema_btn = gr.Button("Obtener columnas (schema)") | |
| bq_schema_msg = gr.Markdown() | |
| gr.Markdown("**Paso MMP-BQ 3**: Listar eventos por rango") | |
| mmp_events_bq = gr.CheckboxGroup(choices=[], value=[], label="Eventos detectados (BigQuery)") | |
| bq_events_btn = gr.Button("Listar eventos por rango (BigQuery)") | |
| bq_events_msg = gr.Markdown() | |
| gr.Markdown("**Paso MMP-BQ 4**: Consultar y cargar MMP") | |
| mmp_preview_bq = gr.Dataframe(label="Preview MMP (BQ)", interactive=False) | |
| mmp_bq_download = gr.File(label="Descargar MMP (resultado de BigQuery)", interactive=False) | |
| mmp_final_path_bq = gr.Textbox(label="Ruta MMP final (temporal BQ)", visible=False) | |
| bq_query_btn = gr.Button("Consultar y cargar MMP (BigQuery)") | |
| bq_query_msg = gr.Markdown() | |
| # --- File Panel --- | |
| with gr.Column(visible=True) as file_panel: | |
| gr.Markdown("**Paso MMP-Archivo 1**: Subir y detectar columnas") | |
| mmp_file = gr.File(label="Subí MMP.xlsx/csv", file_types=[".xlsx", ".csv"]) | |
| with gr.Row(): | |
| file_time_col = gr.Dropdown(choices=[], value=None, label="Columna temporal (archivo)") | |
| mmp_event_col_file = gr.Dropdown(choices=[], value=None, label="Columna de EVENTO (archivo)") | |
| id_mmp_col_file = gr.Dropdown(choices=[], value=None, label="ID en MMP (archivo)") | |
| file_app_id_col = gr.Dropdown(choices=[], value=None, label="Columna App ID (archivo)") | |
| file_schema_btn = gr.Button("Obtener columnas (archivo)") | |
| file_schema_msg = gr.Markdown() | |
| gr.Markdown("**Paso MMP-Archivo 2**: (opcional) Listar eventos del archivo y filtrar") | |
| mmp_events_file = gr.CheckboxGroup(choices=[], value=[], label="Eventos detectados (archivo)") | |
| file_events_btn = gr.Button("Listar eventos (archivo)") | |
| file_events_msg = gr.Markdown() | |
| gr.Markdown("**Paso MMP-Archivo 3**: Cargar & previsualizar") | |
| mmp_preview_file = gr.Dataframe(label="Preview MMP (archivo)", interactive=False) | |
| mmp_file_download = gr.File(label="Descargar MMP (archivo filtrado)", interactive=False) | |
| mmp_final_path_file = gr.Textbox(label="Ruta MMP final (temporal archivo)", visible=False) | |
| file_query_btn = gr.Button("Cargar MMP (archivo)") | |
| file_query_msg = gr.Markdown() | |
| # Toggle panels | |
| def _toggle_source(src): | |
| return (gr.update(visible=(src=="Subir archivo")), gr.update(visible=(src=="BigQuery"))) | |
| mmp_source.change(_toggle_source, inputs=[mmp_source], outputs=[file_panel, bq_panel]) | |
| # BQ: listar App IDs | |
| def _bq_list_app_ids(): | |
| try: | |
| vals, msg = bq_list_app_ids() | |
| return gr.update(choices=vals, value=(vals[0] if vals else None)), msg | |
| except Exception as e: | |
| return gr.update(choices=[], value=None), f"Error listando App IDs: {e}" | |
| list_app_ids_btn.click(_bq_list_app_ids, inputs=[], outputs=[bq_app_id_value, list_app_ids_msg]) | |
| # BQ: schema | |
| def _bq_schema_fixed(): | |
| try: | |
| cols, t_guess, e_guess, id_guess, appid_guess = bq_get_columns_fixed() | |
| return (gr.update(choices=cols, value=t_guess), | |
| gr.update(choices=cols, value=e_guess), | |
| gr.update(choices=cols, value=id_guess), | |
| gr.update(choices=cols, value=appid_guess), | |
| "Schema cargado (tabla fija BQ).") | |
| except Exception as e: | |
| return (gr.update(choices=[], value=None), | |
| gr.update(choices=[], value=None), | |
| gr.update(choices=[], value=None), | |
| gr.update(choices=[], value=None), | |
| f"Error schema: {e}") | |
| bq_schema_btn.click(_bq_schema_fixed, inputs=[], outputs=[bq_time_col, mmp_event_col_bq, id_mmp_col_bq, bq_app_id_col, bq_schema_msg]) | |
| # BQ: listar eventos | |
| def _bq_list_events_fixed(ev_col, t_col, app_col, app_val, ds, de): | |
| try: | |
| vals, msg = bq_list_events_fixed(ev_col, t_col, app_col, app_val, ds, de) | |
| return gr.update(choices=vals, value=vals), msg | |
| except Exception as e: | |
| return gr.update(choices=[], value=[]), f"Error al listar eventos: {e}" | |
| bq_events_btn.click(_bq_list_events_fixed, | |
| inputs=[mmp_event_col_bq, bq_time_col, bq_app_id_col, bq_app_id_value, bq_start, bq_end], | |
| outputs=[mmp_events_bq, bq_events_msg]) | |
| # BQ: query final | |
| def _bq_query_fixed(ev_col, t_col, app_col, app_val, ds, de, evs): | |
| try: | |
| path, preview_rows = bq_query_to_temp_fixed(ev_col, t_col, app_col, app_val, ds, de, evs or []) | |
| preview_df = pd.DataFrame(preview_rows) | |
| file_path = _safe_file_output(path) | |
| return preview_df, file_path, path, "OK: MMP desde BigQuery cargado." | |
| except Exception as e: | |
| return gr.update(), None, "", f"Error consulta BQ: {e}" | |
| bq_query_btn.click(_bq_query_fixed, | |
| inputs=[mmp_event_col_bq, bq_time_col, bq_app_id_col, bq_app_id_value, bq_start, bq_end, mmp_events_bq], | |
| outputs=[mmp_preview_bq, mmp_bq_download, mmp_final_path_bq, bq_query_msg]) | |
| # File: schema & events | |
| file_schema_btn.click(file_mmp_schema, | |
| inputs=[mmp_file], | |
| outputs=[file_time_col, mmp_event_col_file, id_mmp_col_file, file_app_id_col, file_schema_msg]) | |
| file_events_btn.click(file_mmp_list_events_simple, | |
| inputs=[mmp_file, mmp_event_col_file], | |
| outputs=[mmp_events_file, file_events_msg]) | |
| # File: final | |
| def _file_query(src_file, ev_col, evs): | |
| try: | |
| path, preview = file_prepare(src_file, ev_col, evs or []) | |
| file_path = _safe_file_output(path) | |
| return preview, file_path, path, "OK: MMP desde archivo cargado." | |
| except Exception as e: | |
| return gr.update(), None, "", f"Error archivo MMP: {e}" | |
| file_query_btn.click(_file_query, | |
| inputs=[mmp_file, mmp_event_col_file, mmp_events_file], | |
| outputs=[mmp_preview_file, mmp_file_download, mmp_final_path_file, file_query_msg]) | |
| # ===== CLIENTE ===== | |
| gr.Markdown("## Fuente 2: CLIENTE") | |
| with gr.Row(): | |
| cliente_file = gr.File(label="CLIENTE.xlsx/csv", file_types=[".xlsx", ".csv"]) | |
| map_cliente_btn = gr.Button("Obtener mapeo de columnas (CLIENTE)") | |
| with gr.Row(): | |
| id_cliente_col = gr.Dropdown(choices=[], value=None, label="ID en CLIENTE (para cruce)") | |
| validation_col_client = gr.Dropdown(choices=[], value=None, label="Columna de validación (CLIENTE) — opcional") | |
| with gr.Row(): | |
| metric_col_client = gr.Dropdown(choices=[], value=None, label="Columna de métrica (CLIENTE) — opcional") | |
| client_event_col = gr.Dropdown(choices=[], value=None, label="Columna de EVENTO (CLIENTE) — opcional") | |
| cliente_msg = gr.Markdown() | |
| map_cliente_btn.click(cliente_map_columns, | |
| inputs=[cliente_file], | |
| outputs=[id_cliente_col, validation_col_client, metric_col_client, client_event_col, cliente_msg]) | |
| gr.Markdown("### Opcional: valores de validación") | |
| valid_vals = gr.CheckboxGroup(choices=[], value=[], label="Valores que significan VALIDADO (CLIENTE)") | |
| load_valid_btn = gr.Button("Cargar valores de validación (CLIENTE)") | |
| valid_msg = gr.Markdown() | |
| load_valid_btn.click(load_validation_values, | |
| inputs=[cliente_file, validation_col_client], | |
| outputs=[valid_vals, valid_msg]) | |
| # ===== Generar ===== | |
| gr.Markdown("## Generar tablas y Excel") | |
| run_btn = gr.Button("Generar tablas") | |
| preview_out = gr.Dataframe(label="Preview: primera tabla por EVENTO", interactive=False) | |
| xls_file = gr.File(label="Descargar Excel (tablas_por_EVENTO + raw_merge)", interactive=False) | |
| gen_msg = gr.Markdown() | |
| def _compute_router(cliente, | |
| source, | |
| mmp_final_file_panel, mmp_final_bq_panel, | |
| id_cli, id_mmp_file, id_mmp_bq, | |
| val_col, val_vals, | |
| metric_cli, cli_evt, | |
| mmp_evt_file, mmp_evt_bq, | |
| events_file, events_bq): | |
| if source == "Subir archivo": | |
| mmp_path = mmp_final_file_panel | |
| id_mmp = id_mmp_file | |
| mmp_evt_col = mmp_evt_file | |
| selected_events = events_file | |
| else: | |
| mmp_path = mmp_final_bq_panel | |
| id_mmp = id_mmp_bq | |
| mmp_evt_col = mmp_evt_bq | |
| selected_events = events_bq | |
| if not id_cli: | |
| return None, None, "Elegí el ID en CLIENTE." | |
| if not id_mmp or not mmp_evt_col: | |
| return None, None, "Elegí ID y EVENTO en MMP." | |
| return compute(cliente, mmp_path, | |
| id_cli, id_mmp, | |
| val_col, val_vals, | |
| metric_cli, | |
| cli_evt, # puede ser None | |
| mmp_evt_col, # requerido | |
| selected_events) | |
| run_btn.click( | |
| _compute_router, | |
| inputs=[cliente_file, | |
| mmp_source, | |
| mmp_final_path_file, mmp_final_path_bq, | |
| id_cliente_col, id_mmp_col_file, id_mmp_col_bq, | |
| validation_col_client, valid_vals, | |
| metric_col_client, client_event_col, | |
| mmp_event_col_file, mmp_event_col_bq, | |
| mmp_events_file, mmp_events_bq], | |
| outputs=[preview_out, xls_file, gen_msg] | |
| ) | |
| if __name__ == "__main__": | |
| gr.close_all() | |
| demo.launch() |