validation-mobile

Sleeping

File size: 31,102 Bytes

# app.py
import os
import json
from io import BytesIO
import tempfile
import pandas as pd
import gradio as gr

# ================== BigQuery deps (opcionales) ==================
try:
    from google.cloud import bigquery
    _HAS_BQ = True
except Exception:
    _HAS_BQ = False

try:
    import db_dtypes  # noqa: F401
    _HAS_DB_DTYPES = True
except Exception:
    _HAS_DB_DTYPES = False

APP_TITLE = "Cruce CLIENTE × MMP por EVENTO (archivo o BigQuery)"
APP_DESC = """
### Fuente 1: MMP
**BigQuery (tabla única)**: `leadgenios-tech.afiliacion_datalake.daily_afiliate_datalake`

Pasos BQ:
1) **Listar App IDs (BigQuery)** y seleccionar uno.  
2) Ingresá **rango de fechas** (YYYY-MM-DD).  
3) **Obtener columnas (schema)** → sugiere **columna temporal (event_time)**, **evento (event_name)**, **ID en MMP (appsflyer_id/customer_user_id/advertising_id)** y **App ID** (app_id).  
4) **Listar eventos por rango** (usa App ID + fechas + columna de evento).  
5) **Consultar y cargar MMP** → genera CSV temporal, preview y descarga.

**Archivo**: subir archivo, detectar columnas y (opcional) **listar eventos** para filtrar. No hace falta App ID ni fechas.

### Fuente 2: CLIENTE
1) Subir **CLIENTE** → **Obtener mapeo de columnas**.  
2) Elegir **ID en CLIENTE**.  
3) **Columna de validación (opcional)** y **valores** (opcional).  
4) **Columna de métrica (CLIENTE) (opcional)**.  
5) **Columna de EVENTO (CLIENTE) (opcional)**.

### Final
- Por cada **evento** (de MMP), **Cliente, MMP, %** con `% = Cliente / MMP × 100` (1 decimal).  
- Excel: **Hoja 1** tablas por evento; **Hoja 2** `raw_merge`.
"""

# ================== Helpers de lectura ==================
def _read_excel(pathlike):
    return pd.read_excel(pathlike, engine="openpyxl")

def _read_csv_with_fallbacks(pathlike):
    try:
        return pd.read_csv(pathlike, sep=None, engine="python", on_bad_lines="skip", encoding="utf-8")
    except Exception:
        return pd.read_csv(pathlike, sep=None, engine="python", on_bad_lines="skip", encoding="latin-1")

def _safe_read(fileobj_or_path):
    if fileobj_or_path is None or (isinstance(fileobj_or_path, str) and not fileobj_or_path.strip()):
        return None
    path = fileobj_or_path.name if hasattr(fileobj_or_path, "name") else fileobj_or_path
    ext = os.path.splitext(str(path))[-1].lower()
    try:
        if ext in [".xlsx", ".xlsm", ".xltx", ".xltm"]:
            return _read_excel(path)
        elif ext == ".csv" or ext == "":
            try:
                return _read_excel(path)
            except Exception:
                return _read_csv_with_fallbacks(path)
        else:
            try:
                return _read_excel(path)
            except Exception:
                return _read_csv_with_fallbacks(path)
    except Exception as e:
        raise RuntimeError(f"No se pudo leer '{os.path.basename(str(path))}': {e}")

def _guess(cols, candidates):
    lower_map = {c.lower(): c for c in cols}
    for cand in candidates:
        if cand.lower() in lower_map:
            return lower_map[cand.lower()]
    return cols[0] if cols else None

def _guess_optional(cols, candidates):
    lower_map = {c.lower(): c for c in cols}
    for cand in candidates:
        if cand.lower() in lower_map:
            return lower_map[cand.lower()]
    return None

def _safe_file_output(path):
    if path and isinstance(path, str) and os.path.isfile(path):
        return path
    return None

# ================== Normalización de IDs ==================
def normalize_id_series(s: pd.Series) -> pd.Series:
    """
    Normaliza IDs para merges:
    - Convierte a string, quita espacios.
    - Si es float 'entero' (123.0) lo transforma a '123'.
    - Deja NaN como NaN.
    """
    def _norm(v):
        if pd.isna(v):
            return pd.NA
        # floats que representan enteros → sin .0
        if isinstance(v, float):
            if v.is_integer():
                return str(int(v))
            else:
                # si es float no entero, lo pasamos a string tal cual
                return str(v)
        # todo lo demás a str
        vs = str(v).strip()
        # si quedó como "nan" literal, considerar NA
        if vs.lower() in ("nan", "none", ""):
            return pd.NA
        return vs
    out = s.map(_norm)
    # asegura dtype string que permite NA
    return out.astype("string")

# ================== BigQuery helpers ==================
BQ_PROJECT = "leadgenios-tech"
BQ_TABLE_FQN = "leadgenios-tech.afiliacion_datalake.daily_afiliate_datalake"

def _need_bq_client():
    """
    Producción (Hugging Face): usa el secret GCP_SA_JSON (contenido del JSON de la service account).
    Local: si no hay GCP_SA_JSON, usa GOOGLE_APPLICATION_CREDENTIALS como fallback.
    """
    if not _HAS_BQ:
        raise RuntimeError("Falta dependencia 'google-cloud-bigquery'.")

    sa_json = os.getenv("GCP_SA_JSON")
    if sa_json:
        try:
            from google.oauth2 import service_account
        except Exception as e:
            raise RuntimeError(f"No se pudo importar google.oauth2.service_account: {e}")
        try:
            info = json.loads(sa_json)
            creds = service_account.Credentials.from_service_account_info(info)
            project = info.get("project_id") or BQ_PROJECT
            return bigquery.Client(project=project, credentials=creds)
        except Exception as e:
            raise RuntimeError(f"GCP_SA_JSON inválido o no utilizable: {e}")

    if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
        try:
            return bigquery.Client(project=BQ_PROJECT)
        except Exception as e:
            raise RuntimeError(f"Error creando cliente BQ con GOOGLE_APPLICATION_CREDENTIALS: {e}")

    raise RuntimeError("No hay credenciales: seteá GCP_SA_JSON (prod) o GOOGLE_APPLICATION_CREDENTIALS (local).")

def bq_get_columns_fixed():
    client = _need_bq_client()
    table = client.get_table(BQ_TABLE_FQN)
    cols = [sch.name for sch in table.schema]

    time_guess  = _guess(cols, ["event_time", "install_time", "attributed_touch_time"])
    event_guess = _guess(cols, ["event_name"])
    # IDs típicos
    id_guess    = _guess(cols, ["appsflyer_id", "customer_user_id", "advertising_id"])
    appid_guess = _guess(cols, ["app_id"])
    return cols, time_guess, event_guess, id_guess, appid_guess

def bq_list_app_ids(limit=500):
    """Lista App IDs de la tabla BQ para el dropdown."""
    client = _need_bq_client()
    sql = f"""
    SELECT DISTINCT CAST(app_id AS STRING) AS app_id
    FROM `{BQ_TABLE_FQN}`
    WHERE app_id IS NOT NULL AND app_id <> ''
    ORDER BY app_id
    LIMIT {int(limit)}
    """
    df = client.query(sql).result().to_dataframe(create_bqstorage_client=False)
    vals = sorted(df["app_id"].dropna().astype(str).tolist())
    return vals, f"{len(vals)} App IDs encontrados."

def bq_list_events_fixed(event_col, time_col, app_id_col, app_id_value, start_date, end_date, limit=500):
    client = _need_bq_client()
    cols, t_guess, e_guess, _, a_guess = bq_get_columns_fixed()
    event_col  = event_col  or e_guess
    time_col   = time_col   or t_guess
    app_id_col = app_id_col or a_guess
    if not (event_col and time_col and app_id_col and app_id_value and start_date and end_date):
        return [], "Completá App ID, fechas y columnas (evento/fecha/App ID)."
    sql = f"""
    SELECT DISTINCT CAST({event_col} AS STRING) AS ev
    FROM `{BQ_TABLE_FQN}`
    WHERE DATE({time_col}) BETWEEN @sd AND @ed
      AND CAST({app_id_col} AS STRING) = @app_id
    ORDER BY ev
    LIMIT {int(limit)}
    """
    job = client.query(sql, job_config=bigquery.QueryJobConfig(
        query_parameters=[
            bigquery.ScalarQueryParameter("sd", "DATE", str(start_date)),
            bigquery.ScalarQueryParameter("ed", "DATE", str(end_date)),
            bigquery.ScalarQueryParameter("app_id", "STRING", str(app_id_value).strip()),
        ]
    ))
    df = job.result().to_dataframe(create_bqstorage_client=False)
    return sorted(df["ev"].dropna().astype(str).tolist()), f"{len(df)} eventos encontrados."

def bq_query_to_temp_fixed(event_col, time_col, app_id_col, app_id_value, start_date, end_date, selected_events):
    client = _need_bq_client()
    cols, t_guess, e_guess, _, a_guess = bq_get_columns_fixed()
    event_col  = event_col  or e_guess
    time_col   = time_col   or t_guess
    app_id_col = app_id_col or a_guess
    if not (event_col and time_col and app_id_col and app_id_value and start_date and end_date):
        raise RuntimeError("Completá App ID, fechas y columnas (evento/fecha/App ID).")
    params = [
        bigquery.ScalarQueryParameter("sd", "DATE", str(start_date)),
        bigquery.ScalarQueryParameter("ed", "DATE", str(end_date)),
        bigquery.ScalarQueryParameter("app_id", "STRING", str(app_id_value).strip()),
    ]
    ev_filter = ""
    if selected_events:
        params.append(bigquery.ArrayQueryParameter("events", "STRING", [str(v) for v in selected_events]))
        ev_filter = f"AND CAST({event_col} AS STRING) IN UNNEST(@events)"
    sql = f"""
    SELECT *
    FROM `{BQ_TABLE_FQN}`
    WHERE DATE({time_col}) BETWEEN @sd AND @ed
      AND CAST({app_id_col} AS STRING) = @app_id
      {ev_filter}
    """
    job = client.query(sql, job_config=bigquery.QueryJobConfig(query_parameters=params))
    df = job.result().to_dataframe(create_bqstorage_client=False)
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
    df.to_csv(tmp.name, index=False)
    return tmp.name, df.head(20).to_dict(orient="records")

# ================== MMP por archivo ==================
def file_mmp_schema(file):
    try:
        df = _safe_read(file)
    except Exception as e:
        return (gr.update(), gr.update(), gr.update(), gr.update(), f"Error al leer MMP: {e}")
    cols = list(df.columns)
    event_guess = _guess(cols, ["event_name", "Event Name", "evento", "EVENTO", "Event"])
    id_guess    = _guess(cols, ["appsflyer_id", "customer_user_id", "advertising_id",
                                "Advertising ID", "adid", "idfa", "ID", "Id"])
    time_guess  = _guess_optional(cols, ["event_time", "install_time", "attributed_touch_time",
                                         "event_date", "timestamp", "date", "Date", "Event Time"])
    appid_guess = _guess_optional(cols, ["app_id", "bundle_id", "app", "appId", "App ID"])

    return (gr.update(choices=cols, value=time_guess),
            gr.update(choices=cols, value=event_guess),
            gr.update(choices=cols, value=id_guess),
            gr.update(choices=cols, value=appid_guess),
            "Columnas detectadas (archivo MMP).")

def file_mmp_list_events_simple(file, event_col):
    try:
        df = _safe_read(file)
    except Exception as e:
        return gr.update(choices=[], value=[]), f"Error al leer MMP: {e}"
    if not event_col or event_col not in df.columns:
        return gr.update(choices=[], value=[]), "Elegí la columna de evento (archivo MMP)."
    vals = sorted(pd.Series(df[event_col].astype(str).unique()).dropna().tolist())
    return gr.update(choices=vals, value=vals), f"{len(vals)} eventos detectados (archivo MMP)."

def file_prepare(src_file, ev_col, selected_events):
    try:
        df = _safe_read(src_file)
        if selected_events:
            df = df[df[ev_col].astype(str).isin([str(v) for v in selected_events])]
        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
        df.to_csv(tmp.name, index=False)
        return tmp.name, df.head(20)
    except Exception as e:
        raise RuntimeError(f"Error al preparar MMP (archivo): {e}")

# ================== CLIENTE helpers ==================
def cliente_map_columns(cliente_file):
    try:
        df = _safe_read(cliente_file)
    except Exception as e:
        return (gr.update(), gr.update(), gr.update(), gr.update(), "Error al leer CLIENTE: "+str(e))
    cols = list(df.columns)
    id_guess = _guess(cols, [
        "appsflyer_id","customer_user_id","advertising_id",
        "Advertising ID","user_id","User Id",
        "transaction_id","Transaction Id","ID","Id","rut"
    ])
    valid_guess  = None
    metric_guess = _guess_optional(cols, ["revenue","amount","value","ticket","Event Revenue","importe","monto"])
    event_guess  = _guess_optional(cols, ["event_name","Event Name","evento","EVENTO","Event"])

    return (gr.update(choices=cols, value=id_guess),
            gr.update(choices=cols, value=valid_guess),
            gr.update(choices=cols, value=metric_guess),
            gr.update(choices=cols, value=event_guess),
            "Columnas de CLIENTE listas.")

def load_validation_values(cliente_file, validation_col):
    try:
        df_c = _safe_read(cliente_file) if cliente_file else None
    except Exception as e:
        return gr.update(choices=[], value=[]), f"Error al leer CLIENTE: {e}"
    if df_c is None or not validation_col or validation_col not in df_c.columns:
        return gr.update(choices=[], value=[]), "Omitido: sin columna de validación (se usará cruce de IDs)."
    vals = sorted(pd.Series(df_c[validation_col].astype(str).unique()).dropna().tolist())
    return gr.update(choices=vals, value=[]), f"{len(vals)} valores posibles de validación."


# ================== Compute ==================
from io import BytesIO
import tempfile
import re
import pandas as pd

# --- helpers ---------------------------------------------------------

def normalize_id_series(s: pd.Series) -> pd.Series:
    """
    Normalize IDs for robust equality:
    - cast to string
    - strip whitespace
    - lowercase
    - convert 'nan'/'none' to ''
    """
    x = s.astype(str).str.strip().str.lower()
    x = x.replace({"nan": "", "none": ""})
    return x.fillna("")

def _autodetect_validation_col(cols):
    """Try to find a likely validation column if user didn't pick one."""
    candidates = [
        "valid", "valido", "válido", "is_valid", "usable", "status",
        "approved", "aprobado", "ok", "flag", "validated", "validation"
    ]
    lower = {c.lower(): c for c in cols}
    for cand in candidates:
        if cand in lower:
            return lower[cand]
    return None

def _default_truthy_set():
    # NOTE: all lowercased string checks
    return {
        "true", "1", "yes", "y", "ok", "si", "sí",
        "valid", "valido", "válido", "usable", "approved", "aprobado",
        "x", "t"
    }

# --- main ------------------------------------------------------------

def compute(cliente_file, mmp_final_path,
            id_cliente_col, id_mmp_col,
            validation_col_client, validation_values,   # optional
            metric_col_client,                          # ignored in this logic
            client_event_col,                           # ignored (denominator is MMP)
            mmp_event_col,                              # required
            selected_events_mmp):

    if not mmp_final_path:
        return None, None, "Primero completá la fuente MMP."
    if not cliente_file:
        return None, None, "Subí CLIENTE y mapeá las columnas."

    # Read sources
    try:
        df_c = _safe_read(cliente_file)
        df_m = _safe_read(mmp_final_path)
    except Exception as e:
        return None, None, f"Error al leer fuentes: {e}"

    # Required columns present?
    for name, col, df in [
        ("ID en CLIENTE", id_cliente_col, df_c),
        ("ID en MMP",     id_mmp_col,     df_m),
        ("EVENTO en MMP", mmp_event_col,  df_m),
    ]:
        if not col or col not in df.columns:
            return None, None, f"Columna inválida: {name} = {col}"

    # Normalize IDs
    try:
        ids_cli_norm = normalize_id_series(df_c[id_cliente_col])
        ids_mmp_norm = normalize_id_series(df_m[id_mmp_col])
    except Exception as e:
        return None, None, f"Error normalizando IDs: {e}"

    # If user didn't select a validation col, try to autodetect one
    if not validation_col_client or validation_col_client not in df_c.columns:
        auto_val_col = _autodetect_validation_col(df_c.columns)
        validation_col_client = auto_val_col if auto_val_col else None

    # If a validation column exists but user didn't pick values, use default “truthy” set
    truthy = _default_truthy_set()
    use_validation = validation_col_client is not None
    if use_validation:
        cand_vals = validation_values or []
        if cand_vals:
            truthy = {str(v).strip().lower() for v in cand_vals}

    # Build set of CLIENTE IDs that are considered valid
    try:
        if use_validation:
            val_series = df_c[validation_col_client].astype(str).str.strip().str.lower()
            mask_valid = val_series.isin(truthy)
            valid_client_ids = set(ids_cli_norm[mask_valid][ids_cli_norm[mask_valid] != ""])
        else:
            # No validation column → any presence in CLIENTE counts as valid
            valid_client_ids = set(ids_cli_norm[ids_cli_norm != ""])
    except Exception as e:
        return None, None, f"Error aplicando validación en CLIENTE: {e}"

    # Create VALIDO flag in MMP: True if MMP id ∈ valid_client_ids
    df_m = df_m.copy()
    df_m["VALIDO"] = ids_mmp_norm.isin(valid_client_ids)

    # Events to process (if none selected, use all present in MMP)
    if not selected_events_mmp:
        try:
            selected_events_mmp = (
                df_m[mmp_event_col].astype(str).dropna().unique().tolist()
            )
            selected_events_mmp = sorted(map(str, selected_events_mmp))
        except Exception as e:
            return None, None, f"Error obteniendo lista de eventos MMP: {e}"

    # B: total rows in MMP per event
    mmp_counts = df_m.groupby(df_m[mmp_event_col].astype(str), dropna=False).size()

    # A: total rows in MMP per event with VALIDO=True
    cliente_counts = (
        df_m[df_m["VALIDO"]]
        .groupby(df_m.loc[df_m["VALIDO"], mmp_event_col].astype(str), dropna=False)
        .size()
    )

    # Build event tables
    tables_by_event = {}
    for ev in selected_events_mmp:
        ev_str = str(ev)
        B = int(mmp_counts.get(ev_str, 0))
        A = int(cliente_counts.get(ev_str, 0))
        pct = round((A / B * 100), 1) if B else 0.0
        tables_by_event[ev] = pd.DataFrame([{"Cliente": A, "MMP": B, "%": pct}])

    # ===== Excel output =====
    xls_bytes = BytesIO()
    with pd.ExcelWriter(xls_bytes, engine="xlsxwriter") as writer:
        # Sheet 1: tables by EVENT
        sheet_name = "tablas_por_EVENTO"
        start_row = 0
        for ev, table_df in tables_by_event.items():
            pd.DataFrame([[ev]]).to_excel(
                writer, sheet_name=sheet_name, startrow=start_row,
                index=False, header=False
            )
            start_row += 1
            table_df.to_excel(
                writer, sheet_name=sheet_name, startrow=start_row, index=False
            )
            start_row += len(table_df) + 2

        # Sheet 2: raw MMP + only VALIDO (explicitly drop the ID columns)
        cols_front = ["VALIDO"]  # first column
        # Keep event column visible & useful
        if mmp_event_col in df_m.columns:
            cols_front.insert(0, mmp_event_col)

        # Exclude ID & any helper columns from raw output
        drop_cols = {id_mmp_col, "_id_norm_mmp"}  # (we never created _id_norm_mmp here)
        cols_rest = [c for c in df_m.columns if c not in set(cols_front) | drop_cols]
        df_m[cols_front + cols_rest].to_excel(writer, sheet_name="raw_mmp", index=False)

    xls_bytes.seek(0)
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
    tmp.write(xls_bytes.getvalue()); tmp.flush(); tmp.close()
    download_path = tmp.name

    # Preview: first event table
    preview = None
    if tables_by_event:
        first_ev = list(tables_by_event.keys())[0]
        preview = tables_by_event[first_ev]

    return preview, download_path, "Listo ✅"

# ================== UI ==================
with gr.Blocks(title=APP_TITLE) as demo:
    gr.Markdown(f"# {APP_TITLE}\n\n{APP_DESC}")

    # ===== MMP: Selección de fuente =====
    gr.Markdown("## Fuente 1: MMP")
    mmp_source = gr.Radio(choices=["Subir archivo", "BigQuery"], value="Subir archivo", label="Fuente de MMP")

    # --- BigQuery Panel ---
    with gr.Column(visible=False) as bq_panel:
        gr.Markdown("**Paso MMP-BQ 1**: App ID y Fechas")
        with gr.Row():
            bq_app_id_value = gr.Dropdown(choices=[], value=None, label="App ID (BigQuery)")
            list_app_ids_btn = gr.Button("Listar App IDs (BigQuery)")
        list_app_ids_msg = gr.Markdown()

        with gr.Row():
            bq_start = gr.Textbox(label="Fecha desde (YYYY-MM-DD)", placeholder="YYYY-MM-DD")
            bq_end = gr.Textbox(label="Fecha hasta (YYYY-MM-DD)", placeholder="YYYY-MM-DD")

        gr.Markdown("**Paso MMP-BQ 2**: Obtener columnas (schema)")
        with gr.Row():
            bq_time_col = gr.Dropdown(choices=[], value=None, label="Columna temporal (ej: event_time)")
            mmp_event_col_bq = gr.Dropdown(choices=[], value=None, label="Columna de EVENTO en MMP (ej: event_name)")
            id_mmp_col_bq = gr.Dropdown(choices=[], value=None, label="ID en MMP (para cruce)")
            bq_app_id_col = gr.Dropdown(choices=[], value=None, label="Columna App ID (ej: app_id)")
        bq_schema_btn = gr.Button("Obtener columnas (schema)")
        bq_schema_msg = gr.Markdown()

        gr.Markdown("**Paso MMP-BQ 3**: Listar eventos por rango")
        mmp_events_bq = gr.CheckboxGroup(choices=[], value=[], label="Eventos detectados (BigQuery)")
        bq_events_btn = gr.Button("Listar eventos por rango (BigQuery)")
        bq_events_msg = gr.Markdown()

        gr.Markdown("**Paso MMP-BQ 4**: Consultar y cargar MMP")
        mmp_preview_bq = gr.Dataframe(label="Preview MMP (BQ)", interactive=False)
        mmp_bq_download = gr.File(label="Descargar MMP (resultado de BigQuery)", interactive=False)
        mmp_final_path_bq = gr.Textbox(label="Ruta MMP final (temporal BQ)", visible=False)
        bq_query_btn = gr.Button("Consultar y cargar MMP (BigQuery)")
        bq_query_msg = gr.Markdown()

    # --- File Panel ---
    with gr.Column(visible=True) as file_panel:
        gr.Markdown("**Paso MMP-Archivo 1**: Subir y detectar columnas")
        mmp_file = gr.File(label="Subí MMP.xlsx/csv", file_types=[".xlsx", ".csv"])
        with gr.Row():
            file_time_col = gr.Dropdown(choices=[], value=None, label="Columna temporal (archivo)")
            mmp_event_col_file = gr.Dropdown(choices=[], value=None, label="Columna de EVENTO (archivo)")
            id_mmp_col_file = gr.Dropdown(choices=[], value=None, label="ID en MMP (archivo)")
            file_app_id_col = gr.Dropdown(choices=[], value=None, label="Columna App ID (archivo)")
        file_schema_btn = gr.Button("Obtener columnas (archivo)")
        file_schema_msg = gr.Markdown()

        gr.Markdown("**Paso MMP-Archivo 2**: (opcional) Listar eventos del archivo y filtrar")
        mmp_events_file = gr.CheckboxGroup(choices=[], value=[], label="Eventos detectados (archivo)")
        file_events_btn = gr.Button("Listar eventos (archivo)")
        file_events_msg = gr.Markdown()

        gr.Markdown("**Paso MMP-Archivo 3**: Cargar & previsualizar")
        mmp_preview_file = gr.Dataframe(label="Preview MMP (archivo)", interactive=False)
        mmp_file_download = gr.File(label="Descargar MMP (archivo filtrado)", interactive=False)
        mmp_final_path_file = gr.Textbox(label="Ruta MMP final (temporal archivo)", visible=False)
        file_query_btn = gr.Button("Cargar MMP (archivo)")
        file_query_msg = gr.Markdown()

    # Toggle panels
    def _toggle_source(src):
        return (gr.update(visible=(src=="Subir archivo")), gr.update(visible=(src=="BigQuery")))
    mmp_source.change(_toggle_source, inputs=[mmp_source], outputs=[file_panel, bq_panel])

    # BQ: listar App IDs
    def _bq_list_app_ids():
        try:
            vals, msg = bq_list_app_ids()
            return gr.update(choices=vals, value=(vals[0] if vals else None)), msg
        except Exception as e:
            return gr.update(choices=[], value=None), f"Error listando App IDs: {e}"
    list_app_ids_btn.click(_bq_list_app_ids, inputs=[], outputs=[bq_app_id_value, list_app_ids_msg])

    # BQ: schema
    def _bq_schema_fixed():
        try:
            cols, t_guess, e_guess, id_guess, appid_guess = bq_get_columns_fixed()
            return (gr.update(choices=cols, value=t_guess),
                    gr.update(choices=cols, value=e_guess),
                    gr.update(choices=cols, value=id_guess),
                    gr.update(choices=cols, value=appid_guess),
                    "Schema cargado (tabla fija BQ).")
        except Exception as e:
            return (gr.update(choices=[], value=None),
                    gr.update(choices=[], value=None),
                    gr.update(choices=[], value=None),
                    gr.update(choices=[], value=None),
                    f"Error schema: {e}")
    bq_schema_btn.click(_bq_schema_fixed, inputs=[], outputs=[bq_time_col, mmp_event_col_bq, id_mmp_col_bq, bq_app_id_col, bq_schema_msg])

    # BQ: listar eventos
    def _bq_list_events_fixed(ev_col, t_col, app_col, app_val, ds, de):
        try:
            vals, msg = bq_list_events_fixed(ev_col, t_col, app_col, app_val, ds, de)
            return gr.update(choices=vals, value=vals), msg
        except Exception as e:
            return gr.update(choices=[], value=[]), f"Error al listar eventos: {e}"
    bq_events_btn.click(_bq_list_events_fixed,
                        inputs=[mmp_event_col_bq, bq_time_col, bq_app_id_col, bq_app_id_value, bq_start, bq_end],
                        outputs=[mmp_events_bq, bq_events_msg])

    # BQ: query final
    def _bq_query_fixed(ev_col, t_col, app_col, app_val, ds, de, evs):
        try:
            path, preview_rows = bq_query_to_temp_fixed(ev_col, t_col, app_col, app_val, ds, de, evs or [])
            preview_df = pd.DataFrame(preview_rows)
            file_path = _safe_file_output(path)
            return preview_df, file_path, path, "OK: MMP desde BigQuery cargado."
        except Exception as e:
            return gr.update(), None, "", f"Error consulta BQ: {e}"
    bq_query_btn.click(_bq_query_fixed,
                       inputs=[mmp_event_col_bq, bq_time_col, bq_app_id_col, bq_app_id_value, bq_start, bq_end, mmp_events_bq],
                       outputs=[mmp_preview_bq, mmp_bq_download, mmp_final_path_bq, bq_query_msg])

    # File: schema & events
    file_schema_btn.click(file_mmp_schema,
                          inputs=[mmp_file],
                          outputs=[file_time_col, mmp_event_col_file, id_mmp_col_file, file_app_id_col, file_schema_msg])
    file_events_btn.click(file_mmp_list_events_simple,
                          inputs=[mmp_file, mmp_event_col_file],
                          outputs=[mmp_events_file, file_events_msg])

    # File: final
    def _file_query(src_file, ev_col, evs):
        try:
            path, preview = file_prepare(src_file, ev_col, evs or [])
            file_path = _safe_file_output(path)
            return preview, file_path, path, "OK: MMP desde archivo cargado."
        except Exception as e:
            return gr.update(), None, "", f"Error archivo MMP: {e}"
    file_query_btn.click(_file_query,
                         inputs=[mmp_file, mmp_event_col_file, mmp_events_file],
                         outputs=[mmp_preview_file, mmp_file_download, mmp_final_path_file, file_query_msg])

    # ===== CLIENTE =====
    gr.Markdown("## Fuente 2: CLIENTE")
    with gr.Row():
        cliente_file = gr.File(label="CLIENTE.xlsx/csv", file_types=[".xlsx", ".csv"])
        map_cliente_btn = gr.Button("Obtener mapeo de columnas (CLIENTE)")
    with gr.Row():
        id_cliente_col = gr.Dropdown(choices=[], value=None, label="ID en CLIENTE (para cruce)")
        validation_col_client = gr.Dropdown(choices=[], value=None, label="Columna de validación (CLIENTE) — opcional")
    with gr.Row():
        metric_col_client = gr.Dropdown(choices=[], value=None, label="Columna de métrica (CLIENTE) — opcional")
        client_event_col = gr.Dropdown(choices=[], value=None, label="Columna de EVENTO (CLIENTE) — opcional")
    cliente_msg = gr.Markdown()
    map_cliente_btn.click(cliente_map_columns,
                          inputs=[cliente_file],
                          outputs=[id_cliente_col, validation_col_client, metric_col_client, client_event_col, cliente_msg])

    gr.Markdown("### Opcional: valores de validación")
    valid_vals = gr.CheckboxGroup(choices=[], value=[], label="Valores que significan VALIDADO (CLIENTE)")
    load_valid_btn = gr.Button("Cargar valores de validación (CLIENTE)")
    valid_msg = gr.Markdown()
    load_valid_btn.click(load_validation_values,
                         inputs=[cliente_file, validation_col_client],
                         outputs=[valid_vals, valid_msg])

    # ===== Generar =====
    gr.Markdown("## Generar tablas y Excel")
    run_btn = gr.Button("Generar tablas")
    preview_out = gr.Dataframe(label="Preview: primera tabla por EVENTO", interactive=False)
    xls_file = gr.File(label="Descargar Excel (tablas_por_EVENTO + raw_merge)", interactive=False)
    gen_msg = gr.Markdown()

    def _compute_router(cliente,
                        source,
                        mmp_final_file_panel, mmp_final_bq_panel,
                        id_cli, id_mmp_file, id_mmp_bq,
                        val_col, val_vals,
                        metric_cli, cli_evt,
                        mmp_evt_file, mmp_evt_bq,
                        events_file, events_bq):

        if source == "Subir archivo":
            mmp_path = mmp_final_file_panel
            id_mmp = id_mmp_file
            mmp_evt_col = mmp_evt_file
            selected_events = events_file
        else:
            mmp_path = mmp_final_bq_panel
            id_mmp = id_mmp_bq
            mmp_evt_col = mmp_evt_bq
            selected_events = events_bq

        if not id_cli:
            return None, None, "Elegí el ID en CLIENTE."
        if not id_mmp or not mmp_evt_col:
            return None, None, "Elegí ID y EVENTO en MMP."

        return compute(cliente, mmp_path,
                       id_cli, id_mmp,
                       val_col, val_vals,
                       metric_cli,
                       cli_evt,           # puede ser None
                       mmp_evt_col,       # requerido
                       selected_events)

    run_btn.click(
        _compute_router,
        inputs=[cliente_file,
                mmp_source,
                mmp_final_path_file, mmp_final_path_bq,
                id_cliente_col, id_mmp_col_file, id_mmp_col_bq,
                validation_col_client, valid_vals,
                metric_col_client, client_event_col,
                mmp_event_col_file, mmp_event_col_bq,
                mmp_events_file, mmp_events_bq],
        outputs=[preview_out, xls_file, gen_msg]
    )

if __name__ == "__main__":
    gr.close_all()
    demo.launch()