Spaces:

farmentano12
/

validation-affiliates

Sleeping

App Files Files Community

farmentano12 commited on Sep 3, 2025

Commit

8d81e17

verified ·

1 Parent(s): 3c237a7

Back to the good one

Browse files

Files changed (1) hide show

app.py +216 -468

app.py CHANGED Viewed

@@ -1,50 +1,21 @@
 import gradio as gr
 import pandas as pd
 from io import BytesIO
 import tempfile
-import os, json
-from google.cloud import bigquery
-from google.oauth2 import service_account
-# BigQuery (optional)
-try:
-    from google.cloud import bigquery
-    _HAS_BQ = True
-except Exception:
-    _HAS_BQ = False
-# Optional: ensure db_dtypes is available for BigQuery -> pandas
-try:
-    import db_dtypes  # noqa: F401
-    _HAS_DB_DTYPES = True
-except Exception:
-    _HAS_DB_DTYPES = False
-APP_TITLE = "Cruce CLIENTE × MMP por EVENTO (archivo o BigQuery)"
 APP_DESC = """
-### Fuente 1: MMP
-**BigQuery (tabla única)**: `plasma-bison-438415-t8.connector_appsflyer_raw_data.appsflyer_raw_data_daily_report`
-Pasos BQ:
-1) Ingresá **App ID** y **rango de fechas** (YYYY-MM-DD).
-2) **Obtener columnas (schema)** → sugiere **columna temporal (event_time)**, **evento (event_name)**, **ID en MMP (appsflyer_id)** y **App ID columna** (app_id).
-3) **Listar eventos por rango** (usa App ID + fechas + columna de evento).
-4) **Consultar y cargar MMP** → genera CSV temporal, preview y descarga.
-**Archivo**: subir archivo, detectar columnas y (opcional) **listar eventos** para filtrar. No hace falta App ID ni fechas.
-### Fuente 2: CLIENTE
-1) Subir **CLIENTE** → **Obtener mapeo de columnas**.
-2) Elegir **ID en CLIENTE**.
-3) **Columna de validación (opcional)** y **valores** (opcional).
-4) **Columna de métrica (CLIENTE) (opcional)**.
-5) **Columna de EVENTO (CLIENTE)**.
-### Final
-- Por cada **evento** (de MMP), **Cliente, MMP, %** con `% = Cliente / MMP × 100` (1 decimal).
-- Excel: **Hoja 1** tablas por evento; **Hoja 2** `raw_merge`.
 """
-# -------------------------- Helpers --------------------------
 def _read_excel(pathlike):
     return pd.read_excel(pathlike, engine="openpyxl")
@@ -54,10 +25,10 @@ def _read_csv_with_fallbacks(pathlike):
     except Exception:
         return pd.read_csv(pathlike, sep=None, engine="python", on_bad_lines="skip", encoding="latin-1")
-def _safe_read(fileobj_or_path):
-    if fileobj_or_path is None or (isinstance(fileobj_or_path, str) and not fileobj_or_path.strip()):
         return None
-    path = fileobj_or_path.name if hasattr(fileobj_or_path, "name") else fileobj_or_path
     ext = os.path.splitext(str(path))[-1].lower()
     try:
         if ext in [".xlsx", ".xlsm", ".xltx", ".xltm"]:
@@ -75,481 +46,258 @@ def _safe_read(fileobj_or_path):
     except Exception as e:
         raise RuntimeError(f"No se pudo leer '{os.path.basename(str(path))}': {e}")
-def _guess(cols, candidates):
-    lower_map = {c.lower(): c for c in cols}
-    for cand in candidates:
-        if cand.lower() in lower_map:
-            return lower_map[cand.lower()]
-    return cols[0] if cols else None
-def _safe_file_output(path):
-    if path and isinstance(path, str) and os.path.isfile(path):
-        return path
-    return None
-# -------------------------- BQ helpers (tabla fija) --------------------------
-BQ_PROJECT = "plasma-bison-438415-t8"
-BQ_TABLE_FQN = "plasma-bison-438415-t8.connector_appsflyer_raw_data.appsflyer_raw_data_daily_report"
-#def _need_bq_client():
-#    if not _HAS_BQ:
-#        raise RuntimeError("Falta dependencia 'google-cloud-bigquery'.")
-#    if not os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
-#        raise RuntimeError("GOOGLE_APPLICATION_CREDENTIALS no seteado.")
-#    return bigquery.Client(project=BQ_PROJECT)
-def _need_bq_client():
-    sa_json = os.getenv("GCP_SA_JSON")
-    if sa_json:
-        info = json.loads(sa_json)
-        creds = service_account.Credentials.from_service_account_info(info)
-        project = info.get("project_id") or PROJECT_DEFAULT
-        return bigquery.Client(project=project, credentials=creds)
-    # Fallbacks: local file via GOOGLE_APPLICATION_CREDENTIALS or metadata if running on GCP
-    return bigquery.Client(project=PROJECT_DEFAULT)
-def bq_get_columns_fixed():
-    client = _need_bq_client()
-    table = client.get_table(BQ_TABLE_FQN)
-    cols = [sch.name for sch in table.schema]
-    time_guess = _guess(cols, ["event_time","event_date","event_datetime","timestamp","date"])
-    event_guess = _guess(cols, ["event_name","Event Name","evento","event"])
-    id_guess = _guess(cols, ["appsflyer_id","advertising_id","adid","idfa","ID","Id"])
-    appid_guess = _guess(cols, ["app_id","bundle_id","app","appId"])
-    return cols, time_guess, event_guess, id_guess, appid_guess
-def bq_list_events_fixed(event_col, time_col, app_id_col, app_id_value, start_date, end_date, limit=500):
-    client = _need_bq_client()
-    cols, t_guess, e_guess, _, a_guess = bq_get_columns_fixed()
-    event_col = event_col or e_guess
-    time_col = time_col or t_guess
-    app_id_col = app_id_col or a_guess
-    if not (event_col and time_col and app_id_col and app_id_value and start_date and end_date):
-        return [], "Completá App ID, fechas y columnas (evento/fecha/App ID)."
-    sql = f"""
-    SELECT DISTINCT CAST({event_col} AS STRING) AS ev
-    FROM `{BQ_TABLE_FQN}`
-    WHERE DATE({time_col}) BETWEEN @sd AND @ed
-      AND CAST({app_id_col} AS STRING) = @app_id
-    ORDER BY ev
-    LIMIT {int(limit)}
-    """
-    job = client.query(sql, job_config=bigquery.QueryJobConfig(
-        query_parameters=[
-            bigquery.ScalarQueryParameter("sd", "DATE", str(start_date)),
-            bigquery.ScalarQueryParameter("ed", "DATE", str(end_date)),
-            bigquery.ScalarQueryParameter("app_id", "STRING", str(app_id_value).strip()),
-        ]
-    ))
-    df = job.result().to_dataframe(create_bqstorage_client=False)
-    return sorted(df["ev"].dropna().astype(str).tolist()), f"{len(df)} eventos encontrados."
-def bq_query_to_temp_fixed(event_col, time_col, app_id_col, app_id_value, start_date, end_date, selected_events):
-    client = _need_bq_client()
-    cols, t_guess, e_guess, _, a_guess = bq_get_columns_fixed()
-    event_col = event_col or e_guess
-    time_col = time_col or t_guess
-    app_id_col = app_id_col or a_guess
-    if not (event_col and time_col and app_id_col and app_id_value and start_date and end_date):
-        raise RuntimeError("Completá App ID, fechas y columnas (evento/fecha/App ID).")
-    params = [
-        bigquery.ScalarQueryParameter("sd", "DATE", str(start_date)),
-        bigquery.ScalarQueryParameter("ed", "DATE", str(end_date)),
-        bigquery.ScalarQueryParameter("app_id", "STRING", str(app_id_value).strip()),
-    ]
-    ev_filter = ""
-    if selected_events:
-        params.append(bigquery.ArrayQueryParameter("events", "STRING", [str(v) for v in selected_events]))
-        ev_filter = f"AND CAST({event_col} AS STRING) IN UNNEST(@events)"
-    sql = f"""
-    SELECT *
-    FROM `{BQ_TABLE_FQN}`
-    WHERE DATE({time_col}) BETWEEN @sd AND @ed
-      AND CAST({app_id_col} AS STRING) = @app_id
-      {ev_filter}
-    """
-    job = client.query(sql, job_config=bigquery.QueryJobConfig(query_parameters=params))
-    df = job.result().to_dataframe(create_bqstorage_client=False)
-    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
-    df.to_csv(tmp.name, index=False)
-    return tmp.name, df.head(20).to_dict(orient="records")
-# -------------------------- File helpers --------------------------
-def file_mmp_schema(file):
-    try:
-        df = _safe_read(file)
-    except Exception as e:
-        return (gr.update(), gr.update(), gr.update(), gr.update(), f"Error al leer MMP: {e}")
-    cols = list(df.columns)
-    time_guess = _guess(cols, ["event_time","event_date","event_time_millis","timestamp","date","Date","Event Time"])
-    event_guess = _guess(cols, ["event_name","Event Name","evento","EVENTO","Event"])
-    id_guess = _guess(cols, ["appsflyer_id","Advertising ID","advertising_id","adid","idfa","ID","Id"])
-    appid_guess = _guess(cols, ["app_id","bundle_id","app","appId","App ID"])
-    return (gr.update(choices=cols, value=time_guess),
-            gr.update(choices=cols, value=event_guess),
-            gr.update(choices=cols, value=id_guess),
-            gr.update(choices=cols, value=appid_guess),
-            "Columnas detectadas (archivo MMP).")
-def file_mmp_list_events_simple(file, event_col):
     try:
-        df = _safe_read(file)
-    except Exception as e:
-        return gr.update(choices=[], value=[]), f"Error al leer MMP: {e}"
-    if not event_col or event_col not in df.columns:
-        return gr.update(choices=[], value=[]), "Elegí la columna de evento (archivo MMP)."
-    vals = sorted(pd.Series(df[event_col].astype(str).unique()).dropna().tolist())
-    return gr.update(choices=vals, value=vals), f"{len(vals)} eventos detectados (archivo MMP)."
-def file_prepare(src_file, ev_col, selected_events):
-    try:
-        df = _safe_read(src_file)
-        if selected_events:
-            df = df[df[ev_col].astype(str).isin([str(v) for v in selected_events])]
-        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
-        df.to_csv(tmp.name, index=False)
-        return tmp.name, df.head(20)
-    except Exception as e:
-        raise RuntimeError(f"Error al preparar MMP (archivo): {e}")
-# -------------------------- CLIENTE helpers --------------------------
-def cliente_map_columns(cliente_file):
-    try:
-        df = _safe_read(cliente_file)
     except Exception as e:
-        return (gr.update(), gr.update(), gr.update(), gr.update(), "Error al leer CLIENTE: "+str(e))
-    cols = list(df.columns)
-    id_guess = _guess(cols, ["appsflyer_id","Advertising ID","advertising_id","user_id","User Id","transaction_id","Transaction Id","ID","Id"])
-    valid_guess = None  # opcional
-    metric_guess = _guess(cols, ["revenue","amount","value","ticket","Event Revenue","importe","monto"])
-    event_guess = _guess(cols, ["event_name","Event Name","evento","EVENTO","Event"])
-    return (gr.update(choices=cols, value=id_guess),
-            gr.update(choices=cols, value=valid_guess),
-            gr.update(choices=cols, value=metric_guess),
-            gr.update(choices=cols, value=event_guess),
-            "Columnas de CLIENTE listas.")
 def load_validation_values(cliente_file, validation_col):
     try:
         df_c = _safe_read(cliente_file) if cliente_file else None
     except Exception as e:
         return gr.update(choices=[], value=[]), f"Error al leer CLIENTE: {e}"
     if df_c is None or not validation_col or validation_col not in df_c.columns:
-        return gr.update(choices=[], value=[]), "Omitido: sin columna de validación (se usará cruce de IDs)."
-    vals = sorted(pd.Series(df_c[validation_col].astype(str).unique()).dropna().tolist())
-    return gr.update(choices=vals, value=[]), f"{len(vals)} valores posibles de validación."
-# -------------------------- Compute --------------------------
-def compute(cliente_file, mmp_final_path,
-            id_cliente_col, id_mmp_col,
-            validation_col_client, validation_values,
-            metric_col_client,
-            client_event_col,
-            mmp_event_col,
-            selected_events_mmp):
-    if not mmp_final_path:
-        return None, None, "Primero completá la fuente MMP."
-    if not cliente_file:
-        return None, None, "Subí CLIENTE y mapeá las columnas."
     try:
         df_c = _safe_read(cliente_file)
-        df_m = _safe_read(mmp_final_path)
     except Exception as e:
-        return None, None, f"Error al leer fuentes: {e}"
     for name, col, df in [
         ("ID CLIENTE", id_cliente_col, df_c),
-        ("ID MMP", id_mmp_col, df_m),
-        ("EVENTO (CLIENTE)", client_event_col, df_c),
-        ("EVENTO (MMP)", mmp_event_col, df_m),
     ]:
         if not col or col not in df.columns:
             return None, None, f"Columna inválida: {name} = {col}"
     try:
-        merged = df_c.merge(df_m, left_on=id_cliente_col, right_on=id_mmp_col, how="left",
-                            suffixes=("_CLIENTE", "_MMP"))
     except Exception as e:
         return None, None, f"Error durante el merge por IDs: {e}"
-    # --- Resolver nombres de columnas tras el merge (por posibles sufijos) ---
-    def _resolve_merged(col, prefer_suffix):
-        if col in merged.columns:
-            return col
-        cand1 = f"{col}{prefer_suffix}"
-        cand2 = f"{col}_x"
-        cand3 = f"{col}_y"
-        for c in (cand1, cand2, cand3):
-            if c in merged.columns:
-                return c
-        # último intento: coincidencia case-insensitive
-        lower_map = {c.lower(): c for c in merged.columns}
-        return lower_map.get(col.lower(), col)
-    client_event_in_merged = _resolve_merged(client_event_col, "_CLIENTE")
-    mmp_event_in_merged = _resolve_merged(mmp_event_col, "_MMP")
-    validation_in_merged = _resolve_merged(validation_col_client, "_CLIENTE") if validation_col_client else None
-    metric_in_merged = _resolve_merged(metric_col_client, "_CLIENTE") if metric_col_client else None
-    # Si no se seleccionaron eventos MMP, usar todos los presentes en df_m (ya filtrado por tu consulta)
-    if not selected_events_mmp:
-        selected_events_mmp = sorted(df_m[mmp_event_col].astype(str).dropna().unique().tolist())
-    # Denominador: conteo directo desde MMP por evento
-    mmp_counts_map = df_m[mmp_event_col].astype(str).value_counts(dropna=False).to_dict()
-    # Validación opcional (sobre columnas del lado CLIENTE en el merged)
-    if validation_in_merged and validation_in_merged in merged.columns and validation_values:
-        valid_mask_all = merged[validation_in_merged].astype(str).isin([str(v) for v in validation_values])
-    else:
-        valid_mask_all = pd.Series(True, index=merged.index)
-    # Construcción de tablas por evento (los eventos vienen de MMP)
-    tables_by_event = {}
-    for ev in selected_events_mmp:
-        ev_str = str(ev)
-        # Numerador: filas de CLIENTE cuyo EVENTO_CLIENTE == evento MMP y cumplan validación
-        if client_event_in_merged not in merged.columns:
-            return None, None, f"No encuentro columna de evento de CLIENTE tras el merge: {client_event_in_merged}"
-        sub = merged[merged[client_event_in_merged].astype(str) == ev_str]
-        if sub.empty:
-            cliente_count = 0
-            metric_sum = 0.0
-        else:
-            valid_mask = valid_mask_all.loc[sub.index]
-            cliente_count = int(valid_mask.sum())
-            if metric_in_merged and metric_in_merged in sub.columns:
-                vals = pd.to_numeric(sub.loc[valid_mask, metric_in_merged], errors="coerce")
-                metric_sum = float(vals.sum()) if cliente_count else 0.0
-            else:
-                metric_sum = 0.0
-        mmp_total = int(mmp_counts_map.get(ev_str, 0))
-        pct = round((cliente_count / mmp_total * 100), 1) if mmp_total else 0.0
-        row = {"Cliente": cliente_count, "MMP": mmp_total, "%": pct}
-        if metric_in_merged and metric_in_merged in merged.columns:
-            row[f"CLIENTE_{metric_col_client}_suma_validado"] = metric_sum
-        tables_by_event[ev] = pd.DataFrame([row])
-    # Excel
     xls_bytes = BytesIO()
     with pd.ExcelWriter(xls_bytes, engine="xlsxwriter") as writer:
-        sheet_name = "tablas_por_EVENTO"
         start_row = 0
-        for ev, table_df in tables_by_event.items():
-            pd.DataFrame([[ev]]).to_excel(writer, sheet_name=sheet_name, startrow=start_row, index=False, header=False)
             start_row += 1
             table_df.to_excel(writer, sheet_name=sheet_name, startrow=start_row, index=False)
             start_row += len(table_df) + 2
-        # Hoja 2: raw_merge (columnas clave primero; usar nombres resueltos)
         cols_keep = []
-        for col in [id_cliente_col, id_mmp_col, client_event_in_merged, mmp_event_in_merged]:
-            if col and col in merged.columns and col not in cols_keep:
                 cols_keep.append(col)
-        if validation_in_merged and validation_in_merged in merged.columns and validation_in_merged not in cols_keep:
-            cols_keep.append(validation_in_merged)
-        if metric_in_merged and metric_in_merged in merged.columns and metric_in_merged not in cols_keep:
-            cols_keep.append(metric_in_merged)
         cols_rest = [c for c in merged.columns if c not in cols_keep]
         merged[cols_keep + cols_rest].to_excel(writer, sheet_name="raw_merge", index=False)
     xls_bytes.seek(0)
-    import tempfile
     tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
-    tmp.write(xls_bytes.getvalue()); tmp.flush(); tmp.close()
     download_path = tmp.name
-    # Preview: primera tabla
     preview = None
-    if tables_by_event:
-        first_ev = list(tables_by_event.keys())[0]
-        preview = tables_by_event[first_ev]
     return preview, download_path, "Listo ✅"
-# -------------------------- UI --------------------------
 with gr.Blocks(title=APP_TITLE) as demo:
     gr.Markdown(f"# {APP_TITLE}\n\n{APP_DESC}")
-    # ===== MMP: Selección de fuente =====
-    gr.Markdown("## Fuente 1: MMP")
-    mmp_source = gr.Radio(choices=["Subir archivo", "BigQuery"], value="Subir archivo", label="Fuente de MMP")
-    # --- BigQuery Panel (tabla fija) ---
-    with gr.Column(visible=False) as bq_panel:
-        gr.Markdown("**Paso MMP-BQ 1**: App ID y Fechas")
-        with gr.Row():
-            bq_app_id_value = gr.Textbox(label="App ID (valor exacto)", placeholder="com.tu.app")
-            bq_start = gr.Textbox(label="Fecha desde (YYYY-MM-DD)", placeholder="YYYY-MM-DD")
-            bq_end = gr.Textbox(label="Fecha hasta (YYYY-MM-DD)", placeholder="YYYY-MM-DD")
-        gr.Markdown("**Paso MMP-BQ 2**: Obtener columnas (schema)")
-        with gr.Row():
-            bq_time_col = gr.Dropdown(choices=[], label="Columna temporal (ej: event_time)")
-            mmp_event_col_bq = gr.Dropdown(choices=[], label="Columna de EVENTO en MMP (ej: event_name)")
-            id_mmp_col_bq = gr.Dropdown(choices=[], label="ID en MMP (para cruce) (ej: appsflyer_id)")
-            bq_app_id_col = gr.Dropdown(choices=[], label="Columna App ID (ej: app_id)")
-        bq_schema_btn = gr.Button("Obtener columnas (schema)")
-        bq_schema_msg = gr.Markdown()
-        gr.Markdown("**Paso MMP-BQ 3**: Listar eventos por rango")
-        mmp_events_bq = gr.CheckboxGroup(choices=[], label="Eventos detectados (BigQuery)")
-        bq_events_btn = gr.Button("Listar eventos por rango (BigQuery)")
-        bq_events_msg = gr.Markdown()
-        gr.Markdown("**Paso MMP-BQ 4**: Consultar y cargar MMP")
-        mmp_preview_bq = gr.Dataframe(label="Preview MMP (BQ)", interactive=False)
-        mmp_bq_download = gr.File(label="Descargar MMP (resultado de BigQuery)", interactive=False)
-        mmp_final_path_bq = gr.Textbox(label="Ruta MMP final (temporal BQ)", visible=False)
-        bq_query_btn = gr.Button("Consultar y cargar MMP (BigQuery)")
-        bq_query_msg = gr.Markdown()
-    # --- File Panel (simplificado) ---
-    with gr.Column(visible=True) as file_panel:
-        gr.Markdown("**Paso MMP-Archivo 1**: Subir y detectar columnas")
-        mmp_file = gr.File(label="Subí MMP.xlsx/csv", file_types=[".xlsx", ".csv"])
-        with gr.Row():
-            file_time_col = gr.Dropdown(choices=[], label="Columna temporal (archivo)")
-            mmp_event_col_file = gr.Dropdown(choices=[], label="Columna de EVENTO (archivo)")
-            id_mmp_col_file = gr.Dropdown(choices=[], label="ID en MMP (archivo)")
-            file_app_id_col = gr.Dropdown(choices=[], label="Columna App ID (archivo)")
-        file_schema_btn = gr.Button("Obtener columnas (archivo)")
-        file_schema_msg = gr.Markdown()
-        gr.Markdown("**Paso MMP-Archivo 2**: (opcional) Listar eventos del archivo y filtrar")
-        mmp_events_file = gr.CheckboxGroup(choices=[], label="Eventos detectados (archivo)")
-        file_events_btn = gr.Button("Listar eventos (archivo)")
-        file_events_msg = gr.Markdown()
-        gr.Markdown("**Paso MMP-Archivo 3**: Cargar & previsualizar")
-        mmp_preview_file = gr.Dataframe(label="Preview MMP (archivo)", interactive=False)
-        mmp_file_download = gr.File(label="Descargar MMP (archivo filtrado)", interactive=False)
-        mmp_final_path_file = gr.Textbox(label="Ruta MMP final (temporal archivo)", visible=False)
-        file_query_btn = gr.Button("Cargar MMP (archivo)")
-        file_query_msg = gr.Markdown()
-    # Toggle panels
-    def _toggle_source(src):
-        return (gr.update(visible=(src=="Subir archivo")), gr.update(visible=(src=="BigQuery")))
-    mmp_source.change(_toggle_source, inputs=[mmp_source], outputs=[file_panel, bq_panel])
-    # BQ: schema
-    def _bq_schema_fixed():
-        try:
-            cols, t_guess, e_guess, id_guess, appid_guess = bq_get_columns_fixed()
-            return (gr.update(choices=cols, value=t_guess),
-                    gr.update(choices=cols, value=e_guess),
-                    gr.update(choices=cols, value=id_guess),
-                    gr.update(choices=cols, value=appid_guess),
-                    "Schema cargado (tabla fija BQ).")
-        except Exception as e:
-            return (gr.update(choices=[], value=None),
-                    gr.update(choices=[], value=None),
-                    gr.update(choices=[], value=None),
-                    gr.update(choices=[], value=None),
-                    f"Error schema: {e}")
-    bq_schema_btn.click(_bq_schema_fixed, inputs=[], outputs=[bq_time_col, mmp_event_col_bq, id_mmp_col_bq, bq_app_id_col, bq_schema_msg])
-    # BQ: listar eventos
-    def _bq_list_events_fixed(ev_col, t_col, app_col, app_val, ds, de):
-        try:
-            vals, msg = bq_list_events_fixed(ev_col, t_col, app_col, app_val, ds, de)
-            return gr.update(choices=vals, value=vals), msg
-        except Exception as e:
-            return gr.update(choices=[], value=[]), f"Error al listar eventos: {e}"
-    bq_events_btn.click(_bq_list_events_fixed, inputs=[mmp_event_col_bq, bq_time_col, bq_app_id_col, bq_app_id_value, bq_start, bq_end], outputs=[mmp_events_bq, bq_events_msg])
-    # BQ: query final
-    def _bq_query_fixed(ev_col, t_col, app_col, app_val, ds, de, evs):
-        try:
-            path, preview_rows = bq_query_to_temp_fixed(ev_col, t_col, app_col, app_val, ds, de, evs or [])
-            preview_df = pd.DataFrame(preview_rows)
-            file_path = _safe_file_output(path)
-            return preview_df, file_path, path, "OK: MMP desde BigQuery cargado."
-        except Exception as e:
-            return gr.update(), None, "", f"Error consulta BQ: {e}"
-    bq_query_btn.click(_bq_query_fixed, inputs=[mmp_event_col_bq, bq_time_col, bq_app_id_col, bq_app_id_value, bq_start, bq_end, mmp_events_bq], outputs=[mmp_preview_bq, mmp_bq_download, mmp_final_path_bq, bq_query_msg])
-    # File: schema & events
-    file_schema_btn.click(file_mmp_schema, inputs=[mmp_file], outputs=[file_time_col, mmp_event_col_file, id_mmp_col_file, file_app_id_col, file_schema_msg])
-    file_events_btn.click(file_mmp_list_events_simple, inputs=[mmp_file, mmp_event_col_file], outputs=[mmp_events_file, file_events_msg])
-    # File: final
-    def _file_query(src_file, ev_col, evs):
-        try:
-            path, preview = file_prepare(src_file, ev_col, evs or [])
-            file_path = _safe_file_output(path)
-            return preview, file_path, path, "OK: MMP desde archivo cargado."
-        except Exception as e:
-            return gr.update(), None, "", f"Error archivo MMP: {e}"
-    file_query_btn.click(_file_query, inputs=[mmp_file, mmp_event_col_file, mmp_events_file], outputs=[mmp_preview_file, mmp_file_download, mmp_final_path_file, file_query_msg])
-    # ===== CLIENTE =====
-    gr.Markdown("## Fuente 2: CLIENTE")
     with gr.Row():
-        cliente_file = gr.File(label="CLIENTE.xlsx/csv", file_types=[".xlsx", ".csv"])
-        map_cliente_btn = gr.Button("Obtener mapeo de columnas (CLIENTE)")
     with gr.Row():
         id_cliente_col = gr.Dropdown(choices=[], label="ID en CLIENTE (para cruce)")
-        validation_col_client = gr.Dropdown(choices=[], value=None, label="Columna de validación (CLIENTE) — opcional")
     with gr.Row():
-        metric_col_client = gr.Dropdown(choices=[], value=None, label="Columna de métrica (CLIENTE) — opcional")
-        client_event_col = gr.Dropdown(choices=[], label="Columna de EVENTO (CLIENTE)")
-    cliente_msg = gr.Markdown()
-    map_cliente_btn.click(cliente_map_columns, inputs=[cliente_file], outputs=[id_cliente_col, validation_col_client, metric_col_client, client_event_col, cliente_msg])
-    gr.Markdown("### Opcional: valores de validación")
-    valid_vals = gr.CheckboxGroup(choices=[], label="Valores que significan VALIDADO (CLIENTE)")
-    load_valid_btn = gr.Button("Cargar valores de validación (CLIENTE)")
-    valid_msg = gr.Markdown()
-    load_valid_btn.click(load_validation_values, inputs=[cliente_file, validation_col_client], outputs=[valid_vals, valid_msg])
-    # ===== Generar =====
-    gr.Markdown("## Generar tablas y Excel")
-    run_btn = gr.Button("Generar tablas")
-    preview_out = gr.Dataframe(label="Preview: primera tabla por EVENTO", interactive=False)
-    xls_file = gr.File(label="Descargar Excel (tablas_por_EVENTO + raw_merge)", interactive=False)
-    gen_msg = gr.Markdown()
-    def _compute_router(cliente,
-                        source,
-                        mmp_final_file_panel, mmp_final_bq_panel,
-                        id_cli, id_mmp_file, id_mmp_bq,
-                        val_col, val_vals,
-                        metric_cli, cli_evt,
-                        mmp_evt_file, mmp_evt_bq,
-                        events_file, events_bq):
-        if source == "Subir archivo":
-            mmp_path = mmp_final_file_panel
-            id_mmp = id_mmp_file
-            mmp_evt_col = mmp_evt_file
-            selected_events = events_file
-        else:
-            mmp_path = mmp_final_bq_panel
-            id_mmp = id_mmp_bq
-            mmp_evt_col = mmp_evt_bq
-            selected_events = events_bq
-        return compute(cliente, mmp_path,
-                       id_cli, id_mmp,
-                       val_col, val_vals,
-                       metric_cli,
-                       cli_evt,
-                       mmp_evt_col,
-                       selected_events)
     run_btn.click(
-        _compute_router,
-        inputs=[cliente_file,
-                mmp_source,
-                mmp_final_path_file, mmp_final_path_bq,
-                id_cliente_col, id_mmp_col_file, id_mmp_col_bq,
-                validation_col_client, valid_vals,
-                metric_col_client, client_event_col,
-                mmp_event_col_file, mmp_event_col_bq,
-                mmp_events_file, mmp_events_bq],
-        outputs=[preview_out, xls_file, gen_msg]
     )
 if __name__ == "__main__":

 import gradio as gr
 import pandas as pd
 from io import BytesIO
+import os
 import tempfile
+from pandas.core.indexes.base import F
+APP_TITLE = "Cruce CLIENTE × LINKTRUST por MODELO (CLIENTE) y AFFILIATE (LINKTRUST)"
 APP_DESC = """
+1) Subí **CLIENTE** y **LINKTRUST** (xlsx/csv).
+2) Elegí columnas de **ID** para cruce.
+3) Elegí **AFFILIATE en LINKTRUST** (filas) y **MODELO en CLIENTE** (una tabla por MODELO).
+4) Elegí **columna de validación (CLIENTE)** y cargá los **valores que significan VALIDADO**.
+5) Generá tablas: por cada **MODELO** se crea una tabla con filas **AFFILIATE** y columnas **Cliente, LT, %**, donde **% = (Cliente / LT) × 100** (1 decimal).
+La **Hoja 1** del Excel contiene todas las tablas apiladas por MODELO. La **Hoja 2** contiene la raw data del cruce (pre-filtro).
 """
 def _read_excel(pathlike):
     return pd.read_excel(pathlike, engine="openpyxl")
     except Exception:
         return pd.read_csv(pathlike, sep=None, engine="python", on_bad_lines="skip", encoding="latin-1")
+def _safe_read(fileobj):
+    if fileobj is None:
         return None
+    path = fileobj.name if hasattr(fileobj, "name") else fileobj
     ext = os.path.splitext(str(path))[-1].lower()
     try:
         if ext in [".xlsx", ".xlsm", ".xltx", ".xltm"]:
     except Exception as e:
         raise RuntimeError(f"No se pudo leer '{os.path.basename(str(path))}': {e}")
+def _resolve_merged_col(merged, base_name, prefer_suffix=None):
+    """Devuelve el nombre real de la columna en merged (maneja _CLIENTE/_LINKTRUST y case-insensitive)."""
+    if base_name in merged.columns:
+        return base_name
+    if prefer_suffix and f"{base_name}{prefer_suffix}" in merged.columns:
+        return f"{base_name}{prefer_suffix}"
+    for suf in ["_CLIENTE", "_LINKTRUST"]:
+        cand = f"{base_name}{suf}"
+        if cand in merged.columns:
+            return cand
+    lower_map = {c.lower(): c for c in merged.columns}
+    return lower_map.get(base_name.lower(), None)
+def load_columns(cliente_file, linktrust_file):
     try:
+        df_c = _safe_read(cliente_file) if cliente_file else None
+        df_l = _safe_read(linktrust_file) if linktrust_file else None
     except Exception as e:
+        return (gr.update(), gr.update(), gr.update(), gr.update(),
+                gr.update(), gr.update(), "Error al leer archivos: " + str(e))
+    cliente_cols = list(df_c.columns) if df_c is not None else []
+    linktrust_cols = list(df_l.columns) if df_l is not None else []
+    def _guess(cols, candidates):
+        # case-insensitive first match
+        lower_map = {c.lower(): c for c in cols}
+        for cand in candidates:
+            if cand.lower() in lower_map:
+                return lower_map[cand.lower()]
+        return cols[0] if cols else None
+    guess_id_c = _guess(cliente_cols, ["Transaction Id", "ID", "Id"])
+    guess_id_l = _guess(linktrust_cols, ["Transaction Id", "ID", "Id"])
+    guess_aff = _guess(linktrust_cols, ["AffiliateId", "Affiliate Id", "AFFILIATEID", "affiliate_id"])
+    guess_modelo = _guess(cliente_cols, ["MODELO", "Modelo", "Model", "model"])
+    return (
+        gr.update(choices=cliente_cols, value=guess_id_c),     # id_cliente_col
+        gr.update(choices=linktrust_cols, value=guess_id_l),   # id_linktrust_col
+        gr.update(choices=linktrust_cols, value=guess_aff),    # affiliate_col (LINKTRUST)
+        gr.update(choices=cliente_cols, value=guess_modelo),   # modelo_col (CLIENTE)
+        gr.update(choices=cliente_cols, value=None),           # validation_col (CLIENTE)
+        gr.update(choices=[], value=[]),                       # validation values
+        "Listo: columnas cargadas."
+    )
 def load_validation_values(cliente_file, validation_col):
     try:
         df_c = _safe_read(cliente_file) if cliente_file else None
     except Exception as e:
         return gr.update(choices=[], value=[]), f"Error al leer CLIENTE: {e}"
     if df_c is None or not validation_col or validation_col not in df_c.columns:
+        return gr.update(choices=[], value=[]), "Subí CLIENTE y elegí la columna de validación (CLIENTE)."
+    vals = sorted(pd.Series(df_c[validation_col].unique(), dtype="object").astype(str).fillna(""))
+    return gr.update(choices=vals, value=[]), f"{len(vals)} valores posibles encontrados."
+def _prepare_tables(df_merged, affiliate_col_merged, modelo_col_merged,
+                    affiliate_display_name, modelo_display_name,
+                    validation_col_in_merged, valid_values):
+    # LT = todos los matches (pre-filtro) por (AFFILIATE, MODELO)
+    lt_counts = (
+        df_merged
+        .groupby([affiliate_col_merged, modelo_col_merged], dropna=False)
+        .size()
+        .rename("LT")
+        .reset_index()
+    )
+    # Cliente = sólo filas validadas según CLIENTE
+    if valid_values:
+        mask = df_merged[validation_col_in_merged].astype(str).isin([str(v) for v in valid_values])
+        df_valid = df_merged[mask].copy()
+    else:
+        df_valid = df_merged.iloc[0:0].copy()
+    cliente_counts = (
+        df_valid
+        .groupby([affiliate_col_merged, modelo_col_merged], dropna=False)
+        .size()
+        .rename("Cliente")
+        .reset_index()
+    )
+    combined = lt_counts.merge(cliente_counts, on=[affiliate_col_merged, modelo_col_merged], how="left").fillna({"Cliente": 0})
+    combined["Cliente"] = combined["Cliente"].astype(int)
+    combined["LT"] = combined["LT"].astype(int)
+    # % = Cliente / LT * 100
+    combined["%"] = (combined["Cliente"] / combined["LT"] * 100).round(1)
+    tables_by_modelo = {}
+    for modelo_val, sub in combined.groupby(modelo_col_merged, dropna=False):
+        sub = sub.rename(columns={affiliate_col_merged: affiliate_display_name, modelo_col_merged: modelo_display_name})
+        sub = sub.sort_values(by=[affiliate_display_name]).reset_index(drop=True)
+        sub = sub[[affiliate_display_name, "Cliente", "LT", "%"]]
+        tot_cliente = int(sub["Cliente"].sum())
+        tot_lt = int(sub["LT"].sum())
+        tot_pct = round((tot_cliente / tot_lt * 100), 1) if tot_lt else 0.0
+        sub = pd.concat([sub, pd.DataFrame([{affiliate_display_name: "Suma total", "Cliente": tot_cliente, "LT": tot_lt, "%": tot_pct}])], ignore_index=True)
+        tables_by_modelo[modelo_val] = sub
+    return tables_by_modelo, combined
+def compute(cliente_file, linktrust_file,
+            id_cliente_col, id_linktrust_col,
+            affiliate_col, modelo_col,
+            validation_col_client, validation_values):
+    if not cliente_file or not linktrust_file:
+        return None, None, "Faltan archivos."
     try:
         df_c = _safe_read(cliente_file)
+        df_l = _safe_read(linktrust_file)
     except Exception as e:
+        return None, None, f"Error al leer archivos: {e}"
+    # Validar columnas de entrada
     for name, col, df in [
         ("ID CLIENTE", id_cliente_col, df_c),
+        ("ID LINKTRUST", id_linktrust_col, df_l),
+        ("AFFILIATE (LINKTRUST)", affiliate_col, df_l),
+        ("MODELO (CLIENTE)", modelo_col, df_c),
     ]:
         if not col or col not in df.columns:
             return None, None, f"Columna inválida: {name} = {col}"
+    if not validation_col_client or validation_col_client not in df_c.columns:
+        return None, None, f"Elegí la columna de validación en CLIENTE."
+    # Merge por IDs
     try:
+        merged = df_c.merge(
+            df_l, left_on=id_cliente_col, right_on=id_linktrust_col, how="inner",
+            suffixes=("_CLIENTE", "_LINKTRUST")
+        )
     except Exception as e:
         return None, None, f"Error durante el merge por IDs: {e}"
+    if merged.empty:
+        return None, None, "El cruce por IDs no arrojó filas."
+    # Resolver nombres reales en merged
+    validation_col_in_merged = validation_col_client if validation_col_client in merged.columns else f"{validation_col_client}_CLIENTE"
+    if validation_col_in_merged not in merged.columns:
+        return None, None, f"No se encuentra '{validation_col_client}' en merged."
+    affiliate_in_merged = _resolve_merged_col(merged, affiliate_col, prefer_suffix="_LINKTRUST")
+    modelo_in_merged = _resolve_merged_col(merged, modelo_col, prefer_suffix="_CLIENTE")
+    if affiliate_in_merged is None:
+        return None, None, f"No se encuentra la columna AFFILIATE '{affiliate_col}' en merged."
+    if modelo_in_merged is None:
+        return None, None, f"No se encuentra la columna MODELO '{modelo_col}' en merged."
+    # Columnas internas seguras
+    merged["__AFFILIATE__"] = merged[affiliate_in_merged]
+    merged["__MODELO__"] = merged[modelo_in_erged] if 'modelo_in_erged' in locals() else merged[modelo_in_merged]
+    # Preparar tablas por MODELO
+    try:
+        tables_by_modelo, combined_counts = _prepare_tables(
+            df_merged=merged,
+            affiliate_col_merged="__AFFILIATE__",
+            modelo_col_merged="__MODELO__",
+            affiliate_display_name=affiliate_col,
+            modelo_display_name=modelo_col,
+            validation_col_in_merged=validation_col_in_merged,
+            valid_values=validation_values or []
+        )
+    except Exception as e:
+        return None, None, f"Error construyendo tablas: {e}"
+    # Excel a archivo temporal
     xls_bytes = BytesIO()
     with pd.ExcelWriter(xls_bytes, engine="xlsxwriter") as writer:
+        sheet_name = "tablas_por_MODELO"
         start_row = 0
+        for modelo_val, table_df in tables_by_modelo.items():
+            pd.DataFrame([modelo_val]).to_excel(writer, sheet_name=sheet_name, startrow=start_row, index=False, header=False)
             start_row += 1
             table_df.to_excel(writer, sheet_name=sheet_name, startrow=start_row, index=False)
             start_row += len(table_df) + 2
+        # raw merge con columnas clave primero
         cols_keep = []
+        for col in [affiliate_in_merged, modelo_in_merged, id_cliente_col, id_linktrust_col, validation_col_in_merged]:
+            if col in merged.columns and col not in cols_keep:
                 cols_keep.append(col)
         cols_rest = [c for c in merged.columns if c not in cols_keep]
         merged[cols_keep + cols_rest].to_excel(writer, sheet_name="raw_merge", index=False)
     xls_bytes.seek(0)
     tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
+    tmp.write(xls_bytes.getvalue())
+    tmp.flush(); tmp.close()
     download_path = tmp.name
+    # Preview: primera tabla por MODELO
     preview = None
+    if tables_by_modelo:
+        first_modelo = list(tables_by_modelo.keys())[0]
+        preview = tables_by_modelo[first_modelo]
     return preview, download_path, "Listo ✅"
 with gr.Blocks(title=APP_TITLE) as demo:
     gr.Markdown(f"# {APP_TITLE}\n\n{APP_DESC}")
     with gr.Row():
+        cliente_file = gr.File(label="CLIENTE.xlsx (o .csv)", file_types=[".xlsx", ".csv"])
+        linktrust_file = gr.File(label="LINKTRUST.xlsx (o .csv)", file_types=[".xlsx", ".csv"])
     with gr.Row():
         id_cliente_col = gr.Dropdown(choices=[], label="ID en CLIENTE (para cruce)")
+        id_linktrust_col = gr.Dropdown(choices=[], label="ID en LINKTRUST (para cruce)")
     with gr.Row():
+        affiliate_col = gr.Dropdown(choices=[], label="AFFILIATE en LINKTRUST (filas de cada tabla)")
+        modelo_col = gr.Dropdown(choices=[], label="MODELO en CLIENTE (una tabla por MODELO)")
+    with gr.Row():
+        validation_col_client = gr.Dropdown(choices=[], label="Columna de validación (CLIENTE)")
+        validation_vals = gr.CheckboxGroup(choices=[], label="Valores que significan VALIDADO (CLIENTE)")
+    status = gr.Markdown("Cargá archivos y presioná **Cargar columnas**.")
+    load_btn = gr.Button("Cargar columnas desde archivos")
+    load_btn.click(
+        load_columns,
+        inputs=[cliente_file, linktrust_file],
+        outputs=[id_cliente_col, id_linktrust_col, affiliate_col, modelo_col, validation_col_client, validation_vals, status]
+    )
+    load_vals_btn = gr.Button("Cargar valores de validación (desde CLIENTE)")
+    load_vals_btn.click(
+        load_validation_values,
+        inputs=[cliente_file, validation_col_client],
+        outputs=[validation_vals, status]
+    )
+    run_btn = gr.Button("Generar tablas y Excel")
+    preview_out = gr.Dataframe(label="Preview: primera tabla por MODELO", interactive=False)
+    xls_file = gr.File(label="Descargar Excel (tablas_por_MODELO + raw_merge)", interactive=False)
     run_btn.click(
+        compute,
+        inputs=[cliente_file, linktrust_file,
+                id_cliente_col, id_linktrust_col,
+                affiliate_col, modelo_col,
+                validation_col_client, validation_vals],
+        outputs=[preview_out, xls_file, status]
     )
 if __name__ == "__main__":