farmentano12 commited on
Commit
55c55d0
·
verified ·
1 Parent(s): 3d0c598

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +270 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import pandas as pd
4
+ from io import BytesIO
5
+ import os
6
+ import tempfile
7
+
8
+ APP_TITLE = "Cruce CLIENTE × MMP por EVENTO"
9
+ APP_DESC = """
10
+ **Pasos**
11
+ **1)** Subí **CLIENTE** (validación) y **MMP** (xlsx/csv), luego presioná **Cargar columnas**.
12
+ **2)** Elegí **ID CLIENTE**, **ID MMP**, **columna de validación (CLIENTE)** *(sugerimos Advertising ID/Status)* y **métrica del MMP** (opcional).
13
+ **3)** Elegí la **columna de EVENTO (MMP)** y mapeá los **eventos por los que el cliente paga**.
14
+ **4)** Cargá los **valores de validación** (CLIENTE) y marcá cuáles significan **VALIDADO**.
15
+ **5)** Generá tablas. Por cada **EVENTO** se crea una tabla con **Cliente, MMP, %** y, si definiste **métrica**, se suma **sólo en filas validadas**.
16
+ **% = (Cliente / MMP) × 100** (1 decimal).
17
+ **Hoja 1:** tablas apiladas por EVENTO. **Hoja 2:** `raw_merge` con todas las filas de CLIENTE (left join).
18
+ """
19
+
20
+ def _read_excel(pathlike):
21
+ return pd.read_excel(pathlike, engine="openpyxl")
22
+
23
+ def _read_csv_with_fallbacks(pathlike):
24
+ try:
25
+ return pd.read_csv(pathlike, sep=None, engine="python", on_bad_lines="skip", encoding="utf-8")
26
+ except Exception:
27
+ return pd.read_csv(pathlike, sep=None, engine="python", on_bad_lines="skip", encoding="latin-1")
28
+
29
+ def _safe_read(fileobj):
30
+ if fileobj is None:
31
+ return None
32
+ path = fileobj.name if hasattr(fileobj, "name") else fileobj
33
+ ext = os.path.splitext(str(path))[-1].lower()
34
+ try:
35
+ if ext in [".xlsx", ".xlsm", ".xltx", ".xltm"]:
36
+ return _read_excel(path)
37
+ elif ext == ".csv" or ext == "":
38
+ try:
39
+ return _read_excel(path)
40
+ except Exception:
41
+ return _read_csv_with_fallbacks(path)
42
+ else:
43
+ try:
44
+ return _read_excel(path)
45
+ except Exception:
46
+ return _read_csv_with_fallbacks(path)
47
+ except Exception as e:
48
+ raise RuntimeError(f"No se pudo leer '{os.path.basename(str(path))}': {e}")
49
+
50
+ def _guess(cols, candidates):
51
+ lower_map = {c.lower(): c for c in cols}
52
+ for cand in candidates:
53
+ if cand.lower() in lower_map:
54
+ return lower_map[cand.lower()]
55
+ return cols[0] if cols else None
56
+
57
+ def load_columns(cliente_file, mmp_file):
58
+ try:
59
+ df_c = _safe_read(cliente_file) if cliente_file else None
60
+ df_m = _safe_read(mmp_file) if mmp_file else None
61
+ except Exception as e:
62
+ return (gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), "Error al leer archivos: " + str(e))
63
+
64
+ cliente_cols = list(df_c.columns) if df_c is not None else []
65
+ mmp_cols = list(df_m.columns) if df_m is not None else []
66
+
67
+ id_c_guess = _guess(cliente_cols, ["Advertising ID","advertising id","advertising_id","User Id","Transaction Id","ID","Id"])
68
+ id_m_guess = _guess(mmp_cols, ["Advertising ID","advertising id","advertising_id","User Id","Transaction Id","ID","Id"])
69
+ validation_guess = _guess(cliente_cols, ["Advertising ID","advertising id","advertising_id","Validado","Validation","Status","Estado"])
70
+ metric_guess = _guess(mmp_cols, ["Event Revenue","Revenue","Amount","Value"])
71
+ event_guess = _guess(mmp_cols, ["Event Name","event_name","Evento","EVENTO","Event"])
72
+
73
+ return (
74
+ gr.update(choices=cliente_cols, value=id_c_guess), # id_cliente_col
75
+ gr.update(choices=mmp_cols, value=id_m_guess), # id_mmp_col
76
+ gr.update(choices=cliente_cols, value=validation_guess),# validation_col_client
77
+ gr.update(choices=mmp_cols, value=metric_guess), # metric_col_mmp
78
+ gr.update(choices=mmp_cols, value=event_guess), # mmp_event_col
79
+ "Columnas cargadas. Completá el Paso 2 y luego mapeá eventos."
80
+ )
81
+
82
+ def load_event_values(mmp_file, event_col):
83
+ try:
84
+ df_m = _safe_read(mmp_file) if mmp_file else None
85
+ except Exception as e:
86
+ return gr.update(choices=[], value=[]), f"Error al leer MMP: {e}"
87
+
88
+ if df_m is None or not event_col or event_col not in df_m.columns:
89
+ return gr.update(choices=[], value=[]), "Subí MMP y elegí la columna de EVENTO."
90
+ vals = sorted(pd.Series(df_m[event_col].unique(), dtype="object").astype(str).fillna(""))
91
+ return gr.update(choices=vals, value=vals), f"{len(vals)} eventos encontrados (pre-seleccionados)."
92
+
93
+ def load_validation_values(cliente_file, validation_col):
94
+ try:
95
+ df_c = _safe_read(cliente_file) if cliente_file else None
96
+ except Exception as e:
97
+ return gr.update(choices=[], value=[]), f"Error al leer CLIENTE: {e}"
98
+
99
+ if df_c is None or not validation_col or validation_col not in df_c.columns:
100
+ return gr.update(choices=[], value=[]), "Subí CLIENTE y elegí la columna de validación (CLIENTE)."
101
+ vals = sorted(pd.Series(df_c[validation_col].unique(), dtype="object").astype(str).fillna(""))
102
+ return gr.update(choices=vals, value=[]), f"{len(vals)} valores posibles de validación."
103
+
104
+ def compute(cliente_file, mmp_file,
105
+ id_cliente_col, id_mmp_col,
106
+ validation_col_client, metric_col_mmp,
107
+ event_col, selected_events, validation_values):
108
+ if not cliente_file or not mmp_file:
109
+ return None, None, "Faltan archivos."
110
+
111
+ try:
112
+ df_c = _safe_read(cliente_file)
113
+ df_m = _safe_read(mmp_file)
114
+ except Exception as e:
115
+ return None, None, f"Error al leer archivos: {e}"
116
+
117
+ for name, col, df in [
118
+ ("ID CLIENTE", id_cliente_col, df_c),
119
+ ("ID MMP", id_mmp_col, df_m),
120
+ ("Validación (CLIENTE)", validation_col_client, df_c),
121
+ ]:
122
+ if not col or col not in df.columns:
123
+ return None, None, f"Columna inválida: {name} = {col}"
124
+
125
+ try:
126
+ merged = df_c.merge(
127
+ df_m, left_on=id_cliente_col, right_on=id_mmp_col, how="left",
128
+ suffixes=("_CLIENTE", "_MMP")
129
+ )
130
+ except Exception as e:
131
+ return None, None, f"Error durante el merge por IDs: {e}"
132
+
133
+ if merged.empty:
134
+ return None, None, "El cruce no arrojó filas."
135
+
136
+ event_in_merged = event_col if (event_col and event_col in merged.columns) else (f"{event_col}_MMP" if event_col else None)
137
+ if not event_in_merged or event_in_merged not in merged.columns:
138
+ return None, None, "Elegí la columna de EVENTO en el Paso 3."
139
+
140
+ validation_in_merged = validation_col_client if validation_col_client in merged.columns else f"{validation_col_client}_CLIENTE"
141
+ if validation_in_merged not in merged.columns:
142
+ return None, None, f"No se encuentra '{validation_col_client}' en merged."
143
+
144
+ metric_in_merged = None
145
+ if metric_col_mmp and len(str(metric_col_mmp)) > 0:
146
+ metric_in_merged = metric_col_mmp if metric_col_mmp in merged.columns else f"{metric_col_mmp}_MMP"
147
+ if metric_in_merged not in merged.columns:
148
+ metric_in_merged = None
149
+
150
+ if not selected_events:
151
+ selected_events = sorted(pd.Series(merged[event_in_merged].dropna().unique(), dtype="object").astype(str))
152
+
153
+ tables_by_event = {}
154
+ for ev in selected_events:
155
+ sub = merged[merged[event_in_merged].astype(str) == str(ev)]
156
+ if sub.empty:
157
+ row = {"Cliente": 0, "MMP": 0, "%": 0.0}
158
+ if metric_in_merged:
159
+ row[f"MMP_{metric_in_merged}_suma_validado"] = 0.0
160
+ tables_by_event[ev] = pd.DataFrame([row])
161
+ continue
162
+
163
+ mmp_count = len(sub)
164
+ valid_mask = sub[validation_in_merged].astype(str).isin([str(v) for v in (validation_values or [])])
165
+ cliente_count = int(valid_mask.sum())
166
+ pct = round((cliente_count / mmp_count * 100), 1) if mmp_count else 0.0
167
+
168
+ row = {"Cliente": cliente_count, "MMP": mmp_count, "%": pct}
169
+ if metric_in_merged:
170
+ vals = pd.to_numeric(sub.loc[valid_mask, metric_in_merged], errors="coerce")
171
+ row[f"MMP_{metric_in_merged}_suma_validado"] = float(vals.sum()) if cliente_count else 0.0
172
+
173
+ tables_by_event[ev] = pd.DataFrame([row])
174
+
175
+ xls_bytes = BytesIO()
176
+ with pd.ExcelWriter(xls_bytes, engine="xlsxwriter") as writer:
177
+ sheet_name = "tablas_por_EVENTO"
178
+ start_row = 0
179
+ for ev, table_df in tables_by_event.items():
180
+ pd.DataFrame([[ev]]).to_excel(writer, sheet_name=sheet_name, startrow=start_row, index=False, header=False)
181
+ start_row += 1
182
+ table_df.to_excel(writer, sheet_name=sheet_name, startrow=start_row, index=False)
183
+ start_row += len(table_df) + 2
184
+
185
+ cols_keep = []
186
+ for col in [id_cliente_col, id_mmp_col if id_mmp_col in merged.columns else f"{id_mmp_col}_MMP", event_in_merged, validation_in_merged]:
187
+ if col in merged.columns and col not in cols_keep:
188
+ cols_keep.append(col)
189
+ if metric_in_merged and metric_in_merged in merged.columns and metric_in_merged not in cols_keep:
190
+ cols_keep.append(metric_in_merged)
191
+ cols_rest = [c for c in merged.columns if c not in cols_keep]
192
+ merged[cols_keep + cols_rest].to_excel(writer, sheet_name="raw_merge", index=False)
193
+ xls_bytes.seek(0)
194
+
195
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
196
+ tmp.write(xls_bytes.getvalue())
197
+ tmp.flush(); tmp.close()
198
+ download_path = tmp.name
199
+
200
+ preview = None
201
+ if tables_by_event:
202
+ first_ev = list(tables_by_event.keys())[0]
203
+ preview = tables_by_event[first_ev]
204
+
205
+ return preview, download_path, "Listo ✅"
206
+
207
+ with gr.Blocks(title=APP_TITLE) as demo:
208
+ gr.Markdown(f"# {APP_TITLE}\n\n{APP_DESC}")
209
+
210
+ # Paso 1
211
+ gr.Markdown("## Paso 1: Subir archivos")
212
+ with gr.Row():
213
+ cliente_file = gr.File(label="CLIENTE.xlsx (o .csv)", file_types=[".xlsx", ".csv"])
214
+ mmp_file = gr.File(label="MMP.xlsx (o .csv)", file_types=[".xlsx", ".csv"])
215
+ step1_btn = gr.Button("Paso 1: Cargar columnas")
216
+
217
+ # Paso 2
218
+ gr.Markdown("## Paso 2: Elegir columnas de ID, validación (CLIENTE) y métrica MMP (opcional)")
219
+ with gr.Row():
220
+ id_cliente_col = gr.Dropdown(choices=[], label="ID en CLIENTE (para cruce)")
221
+ id_mmp_col = gr.Dropdown(choices=[], label="ID en MMP (para cruce)")
222
+ with gr.Row():
223
+ validation_col_client = gr.Dropdown(choices=[], label="Columna de validación (CLIENTE) — sugerimos 'Advertising ID' o 'Status'")
224
+ metric_col_mmp = gr.Dropdown(choices=[], label="Columna de métrica en MMP (opcional)")
225
+ mmp_event_col = gr.Dropdown(choices=[], label="(Se usará en el Paso 3) Columna de EVENTO en MMP")
226
+ step1_btn.click(
227
+ load_columns,
228
+ inputs=[cliente_file, mmp_file],
229
+ outputs=[id_cliente_col, id_mmp_col, validation_col_client, metric_col_mmp, mmp_event_col, gr.Markdown()]
230
+ )
231
+
232
+ # Botón de mapeo (reubicado arriba del Paso 3)
233
+ map_events_btn = gr.Button("Mapear eventos desde columna de eventos de MMP")
234
+
235
+ # Paso 3
236
+ gr.Markdown("## Paso 3: Seleccionar EVENTOS")
237
+ event_vals = gr.CheckboxGroup(choices=[], label="Eventos por los que el cliente paga")
238
+ map_events_btn.click(
239
+ load_event_values,
240
+ inputs=[mmp_file, mmp_event_col],
241
+ outputs=[event_vals, gr.Markdown()]
242
+ )
243
+
244
+ # Paso 4: mover el BOTÓN arriba del título
245
+ load_valid_btn = gr.Button("Paso 4: Cargar valores de validación (CLIENTE)")
246
+ gr.Markdown("## Paso 4: Cargar valores de validación (CLIENTE) y elegirlos")
247
+ valid_vals = gr.CheckboxGroup(choices=[], label="Valores que significan VALIDADO (CLIENTE)")
248
+ load_valid_btn.click(
249
+ load_validation_values,
250
+ inputs=[cliente_file, validation_col_client],
251
+ outputs=[valid_vals, gr.Markdown()]
252
+ )
253
+
254
+ # Paso 5
255
+ gr.Markdown("## Paso 5: Generar tablas y Excel")
256
+ run_btn = gr.Button("Generar tablas")
257
+ preview_out = gr.Dataframe(label="Preview: primera tabla por EVENTO", interactive=False)
258
+ xls_file = gr.File(label="Descargar Excel (tablas_por_EVENTO + raw_merge)", interactive=False)
259
+ run_btn.click(
260
+ compute,
261
+ inputs=[cliente_file, mmp_file,
262
+ id_cliente_col, id_mmp_col,
263
+ validation_col_client, metric_col_mmp,
264
+ mmp_event_col, event_vals, valid_vals],
265
+ outputs=[preview_out, xls_file, gr.Markdown()]
266
+ )
267
+
268
+ if __name__ == "__main__":
269
+ gr.close_all()
270
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ pandas==2.2.2
2
+ gradio==4.44.0
3
+ openpyxl==3.1.5
4
+ xlsxwriter==3.2.0