Spaces:

ricardoadriano
/

atividade6

Sleeping

App Files Files Community

ricardoadriano commited on Oct 28, 2025

Commit

6bbdfd0

verified ·

1 Parent(s): 8726ddd

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +115 -105

src/streamlit_app.py CHANGED Viewed

@@ -4,7 +4,7 @@
 # Dataset: Dados/marketing_campaign.csv
 # Objetivo: permitir escolher ALVO e PREDITORES e produzir INFERÊNCIA (item e),
 #           usando Logit (alvo binário) ou OLS (alvo contínuo).
-#           Inclui engenharia de atributos inspirada no seu notebook.
 # -------------------------------------------------------------------
 import os
 import numpy as np
@@ -13,8 +13,6 @@ import streamlit as st
 import altair as alt
 from typing import List, Tuple
-# Pré-processamento e métricas (apoio)
 from sklearn.model_selection import train_test_split
 from sklearn.compose import ColumnTransformer
 from sklearn.preprocessing import OneHotEncoder, StandardScaler
@@ -24,37 +22,27 @@ from sklearn.metrics import (
     roc_auc_score, accuracy_score, confusion_matrix, roc_curve,
     r2_score, mean_squared_error
 )
-# Modelinhos rápidos para métricas (não usados na inferência)
 from sklearn.linear_model import LogisticRegression, LinearRegression
-# Inferência estatística
 import statsmodels.api as sm
 st.set_page_config(page_title="Inferência Estatística — Marketing Campaign", layout="wide")
 st.title("📊 Inferência Estatística — Customer Personality Analysis")
-st.caption("Escolha o **alvo** e as **variáveis explicativas** na barra lateral (esquerda) e obtenha inferência (item **e**).")
 DATA_PATH = "Dados/marketing_campaign.csv"
-# -----------------------------
-# Utilidades
-# -----------------------------
 @st.cache_data(show_spinner=False)
 def load_csv_try(path: str) -> pd.DataFrame:
-    """
-    Lê CSV tentando separadores: vírgula, ponto-e-vírgula e tab.
-    """
     for sep in [",", ";", "\t"]:
         try:
             df = pd.read_csv(path, sep=sep, encoding="utf-8")
-            # Heurística: se vier 1 coluna gigantesca, tenta o próximo separador
             if sep != "\t" and df.shape[1] == 1:
                 continue
             return df
         except Exception:
             continue
-    # última tentativa bruta
     return pd.read_csv(path, sep=None, engine="python")
 def split_num_cat(df: pd.DataFrame, exclude: List[str]) -> Tuple[List[str], List[str]]:
@@ -64,26 +52,32 @@ def split_num_cat(df: pd.DataFrame, exclude: List[str]) -> Tuple[List[str], List
 def is_binary_series(s: pd.Series) -> bool:
     vals = pd.unique(s.dropna())
-    if len(vals) == 2:
-        return True
-    if s.dtype == bool:
-        return True
-    return False
 def engineer_features(df: pd.DataFrame) -> pd.DataFrame:
-    """Engenharia mínima inspirada no seu notebook: tenure, totais, shares, ticket, diversidade."""
     out = df.copy()
-    # Datas → Tenure
     if "Dt_Customer" in out.columns:
         out["Dt_Customer"] = pd.to_datetime(out["Dt_Customer"], errors="coerce", dayfirst=True)
         out["TenureDays"] = (pd.Timestamp("today").normalize() - out["Dt_Customer"]).dt.days
-    # Mnt* somados
     mnt_cols = [c for c in out.columns if c.startswith("Mnt")]
     if mnt_cols:
         out["TotalMnt"] = out[mnt_cols].sum(axis=1)
-    # Compras totais e shares
     buy_cols = [c for c in ["NumWebPurchases", "NumCatalogPurchases", "NumStorePurchases"] if c in out.columns]
     if buy_cols:
         out["TotalPurchases"] = out[buy_cols].sum(axis=1)
@@ -96,16 +90,14 @@ def engineer_features(df: pd.DataFrame) -> pd.DataFrame:
     if "TotalMnt" in out.columns and "TotalPurchases" in out.columns:
         out["AvgTicket"] = out["TotalMnt"] / out["TotalPurchases"].replace(0, np.nan)
-    # Diversidade de cesta (quantos Mnt*>0)
     if mnt_cols:
         out["BasketDiversity"] = (out[mnt_cols] > 0).sum(axis=1)
     return out
 def build_preprocessor(num_cols: List[str], cat_cols: List[str]) -> ColumnTransformer:
-    """
-    Imputação + padronização (num) e OHE drop='first' (cat) para evitar colinearidade e viabilizar inferência.
-    """
     num_pipe = Pipeline([
         ("imp", SimpleImputer(strategy="median")),
         ("scaler", StandardScaler())
@@ -114,11 +106,10 @@ def build_preprocessor(num_cols: List[str], cat_cols: List[str]) -> ColumnTransf
         ("imp", SimpleImputer(strategy="most_frequent")),
         ("ohe", OneHotEncoder(handle_unknown="ignore", drop="first", sparse_output=False))
     ])
-    pre = ColumnTransformer([
         ("num", num_pipe, num_cols),
         ("cat", cat_pipe, cat_cols)
     ])
-    return pre
 def get_feature_names(pre: ColumnTransformer, num_cols: List[str], cat_cols: List[str]) -> List[str]:
     names = list(num_cols)
@@ -128,24 +119,18 @@ def get_feature_names(pre: ColumnTransformer, num_cols: List[str], cat_cols: Lis
     return names
 def fit_inference(model_type: str, X_design: pd.DataFrame, y: pd.Series):
-    """
-    Ajusta modelo de inferência (statsmodels):
-      - 'logit' → sm.Logit
-      - 'ols'   → sm.OLS
-    Retorna (resultado_statsmodels, tabela).
-    """
     X_sm = sm.add_constant(X_design, has_constant="add")
     if model_type == "logit":
         res = sm.Logit(y.values, X_sm).fit(disp=False)
-        summ = res.summary2().tables[1].copy()  # coef, std err, z, P>|z|, [0.025, 0.975]
         or_vals = np.exp(res.params)
         or_ci = np.exp(res.conf_int())
         tbl = pd.DataFrame({
-            "feature": summ.index,
             "coef": res.params.values,
             "std_err": res.bse.values,
-            "z/t": summ["z"].values,
-            "p_value": summ["P>|z|"].values,
             "ci_low": res.conf_int()[0].values,
             "ci_high": res.conf_int()[1].values,
             "odds_ratio": or_vals.values,
@@ -167,7 +152,7 @@ def fit_inference(model_type: str, X_design: pd.DataFrame, y: pd.Series):
     return res, tbl
 def recs_from_inference(tbl: pd.DataFrame, model_type: str, k: int = 5):
-    """Gera recomendações do item (e) a partir dos efeitos significativos (p<0.05), ignorando o intercepto."""
     df = tbl[tbl["feature"] != "const"].copy()
     df = df.sort_values(["p_value", "z/t"], ascending=[True, False])
     core = df[df["p_value"] < 0.05].head(k)
@@ -178,47 +163,44 @@ def recs_from_inference(tbl: pd.DataFrame, model_type: str, k: int = 5):
         if model_type == "logit":
             or_txt = f"(OR≈{r['odds_ratio']:.2f}, IC95% {r['or_ci_low']:.2f}–{r['or_ci_high']:.2f}, p={r['p_value']:.3g})"
             if sign > 0:
-                out.append(f"🔧 **Reduzir exposição associada a `{feat}`** {or_txt}, pois aumento nessa variável eleva a probabilidade do evento alvo.")
             else:
-                out.append(f"✅ **Fortalecer fatores ligados a `{feat}`** {or_txt}, pois valores maiores reduzem a probabilidade do evento alvo.")
         else:
             eff = f"(β≈{r['coef']:.3g}, IC95% {r['ci_low']:.2g}–{r['ci_high']:.2g}, p={r['p_value']:.3g})"
             if sign > 0:
                 out.append(f"🔧 **Mitigar o crescimento de `{feat}`** {eff}, pois contribui positivamente para o aumento do alvo.")
             else:
                 out.append(f"✅ **Aumentar `{feat}`** {eff}, pois está associado à redução do alvo.")
-    # Trilhas transversais
-    out.append("🧪 **Testes A/B** nas variáveis mais significativas para validar impacto causal antes de escalar.")
-    out.append("📞 **Melhorar FCR** e fluxos críticos detectados pelas variáveis top-k (treinamento, scripts, UX).")
-    out.append("🔁 **Feedback para Produto/Qualidade** baseado nos fatores com evidência estatística robusta.")
     return out[:k+3]
-# -----------------------------
-# Sidebar (lado esquerdo)
-# -----------------------------
 with st.sidebar:
     st.header("⚙️ Configuração")
     if not os.path.exists(DATA_PATH):
         st.error(f"Arquivo não encontrado: `{DATA_PATH}`. Suba o CSV em `Dados/`.")
         st.stop()
-# Carrega & engenharia
 df_raw = load_csv_try(DATA_PATH)
 df_eng = engineer_features(df_raw)
 with st.sidebar:
-    st.markdown("**Escolha o alvo (variável dependente):**")
     all_cols = df_eng.columns.tolist()
-    # Preferências de alvo conforme seu caderno
-    preferred_targets = ["Response", "Complain", "HasComplained", "Exited", "Churn", "Complaint"]
-    default_target = next((c for c in preferred_targets if c in all_cols), None)
     if default_target is None:
-        # tenta primeira binária; senão primeira numérica
         for c in all_cols:
-            if is_binary_series(df_eng[c]): default_target = c; break
-        if default_target is None:
-            for c in all_cols:
-                if np.issubdtype(df_eng[c].dtype, np.number): default_target = c; break
     target_col = st.selectbox("Alvo (y)", options=all_cols, index=all_cols.index(default_target) if default_target in all_cols else 0)
 # Variáveis explicativas
@@ -227,7 +209,6 @@ num_cols_all, cat_cols_all = split_num_cat(df_eng, exclude=exclude)
 with st.sidebar:
     st.markdown("**Variáveis explicativas (X):**")
-    # sugere alguns engenheirados primeiro
     engineered_first = [c for c in ["TenureDays","TotalMnt","TotalPurchases","OnlineShare","PromoShare","AvgTicket","BasketDiversity"] if c in num_cols_all]
     base_defaults = engineered_first + [c for c in num_cols_all if c not in engineered_first][:5] + cat_cols_all[:3]
     selected_feats = st.multiselect("Selecione X", options=(num_cols_all + cat_cols_all), default=base_defaults)
@@ -239,56 +220,93 @@ if len(selected_feats) == 0:
     st.warning("Selecione pelo menos uma variável explicativa.")
     st.stop()
-# -----------------------------
-# Amostra de dados
-# -----------------------------
-st.markdown("### 🔎 Amostra")
 st.dataframe(df_eng[[target_col] + selected_feats].head(12), use_container_width=True)
-# -----------------------------
-# Preparação / Treino
-# -----------------------------
-df = df_eng.dropna(subset=[target_col]).copy()
-y_raw = df[target_col]
 is_bin = is_binary_series(y_raw)
-model_type = "logit" if is_bin else "ols"
-# Mapear alvo binário não numérico → {0,1}
-if is_bin and not np.issubdtype(y_raw.dtype, np.number):
-    uniq = sorted(pd.unique(y_raw.dropna()).tolist(), key=lambda x: str(x))
-    y = y_raw.replace({uniq[0]: 0, uniq[1]: 1}).astype(int)
 else:
-    y = y_raw.astype(float if not is_bin else int)
-X = df[selected_feats].copy()
 sel_num = [c for c in selected_feats if np.issubdtype(X[c].dtype, np.number)]
 sel_cat = [c for c in selected_feats if (X[c].dtype == "object" or X[c].dtype.name == "category")]
 pre = build_preprocessor(sel_num, sel_cat)
-# Pipeline só para métricas/predição
-quick_est = LogisticRegression(max_iter=200) if is_bin else LinearRegression()
 pipe = Pipeline([("pre", pre), ("est", quick_est)])
 X_train, X_test, y_train, y_test = train_test_split(
-    X, y, test_size=test_size, random_state=random_state, stratify=y if is_bin else None
 )
 with st.spinner("Treinando e construindo matriz de design..."):
     pipe.fit(X_train, y_train)
     pre_fit = pipe.named_steps["pre"].fit(X_train, y_train)
     X_train_design = pre_fit.transform(X_train)
-    feat_names = get_feature_names(pre_fit, sel_num, sel_cat)
     X_train_df = pd.DataFrame(X_train_design, columns=feat_names)
-# -----------------------------
-# Inferência estatística (item e)
-# -----------------------------
 st.markdown("## 📚 Inferência estatística (Item e)")
 with st.spinner("Ajustando modelo de inferência (statsmodels)..."):
     res, infer_tbl = fit_inference(model_type, X_train_df, y_train)
-if is_bin:
-    st.caption("Modelo: **Logit** (alvo binário). Coeficientes em log-odds; também exibimos **odds ratios**.")
     cols_show = ["feature","coef","std_err","z/t","p_value","ci_low","ci_high","odds_ratio","or_ci_low","or_ci_high"]
 else:
     st.caption("Modelo: **OLS** (alvo contínuo). Coeficientes, erros-padrão, estatística t e IC 95%.")
@@ -296,11 +314,9 @@ else:
 st.dataframe(infer_tbl[cols_show].round(4), use_container_width=True)
-# -----------------------------
-# Métricas de desempenho
-# -----------------------------
 st.markdown("### 📈 Desempenho do modelo")
-if is_bin:
     y_proba = pipe.predict_proba(X_test)[:, 1] if hasattr(pipe.named_steps["est"], "predict_proba") else pipe.predict(X_test)
     y_pred = (y_proba >= 0.5).astype(int)
     auc = roc_auc_score(y_test, y_proba)
@@ -326,9 +342,7 @@ else:
     with c1: st.metric("R² (teste)", f"{r2:.3f}")
     with c2: st.metric("RMSE (teste)", f"{rmse:.3f}")
-# -----------------------------
-# Efeito visual (força por |t/z|)
-# -----------------------------
 st.markdown("### 🌟 Força dos efeitos (|t/z|)")
 eff_df = infer_tbl[infer_tbl["feature"] != "const"].copy()
 eff_df["effect_strength"] = eff_df["z/t"].abs()
@@ -338,9 +352,7 @@ eff_chart = alt.Chart(eff_df.sort_values("effect_strength", ascending=False).hea
 ).properties(height=420)
 st.altair_chart(eff_chart, use_container_width=True)
-# -----------------------------
-# Predição interativa
-# -----------------------------
 st.markdown("## 🔮 Predição interativa")
 st.caption("Ajuste valores para X e veja a probabilidade (Logit) ou valor previsto (OLS).")
@@ -349,7 +361,7 @@ with st.form("pred_form"):
     user_inputs = {}
     for i, col in enumerate(selected_feats):
         with cols[i % 3]:
-            if col in sel_num:
                 q1, q5, q95, q99 = X_train[col].quantile([0.01,0.05,0.95,0.99])
                 default_val = float(np.nan_to_num(X_train[col].median(), nan=0.0))
                 user_inputs[col] = st.number_input(
@@ -367,17 +379,15 @@ if submitted:
     x_new_df = pd.DataFrame(x_new_proc, columns=feat_names)
     X_sm_new = sm.add_constant(x_new_df, has_constant="add")
     y_hat = float(res.predict(X_sm_new)[0])
-    if is_bin:
-        st.success(f"Probabilidade prevista do evento alvo: **{y_hat:.2%}**")
     else:
         st.success(f"Valor previsto do alvo: **{y_hat:.4g}**")
-# -----------------------------
-# Recomendações (Item e)
-# -----------------------------
 st.markdown("## 🧭 Recomendações estratégicas (Item e)")
-for r in recs_from_inference(infer_tbl, model_type="logit" if is_bin else "ols", k=5):
     st.markdown("- " + r)
 st.markdown("---")
-st.caption("App Streamlit — Inferência Estatística • Dados/marketing_campaign.csv • Controles na barra lateral (esquerda).")

 # Dataset: Dados/marketing_campaign.csv
 # Objetivo: permitir escolher ALVO e PREDITORES e produzir INFERÊNCIA (item e),
 #           usando Logit (alvo binário) ou OLS (alvo contínuo).
+#           Se o alvo for categórico com >2 classes, permite one-vs-rest.
 # -------------------------------------------------------------------
 import os
 import numpy as np
 import altair as alt
 from typing import List, Tuple
 from sklearn.model_selection import train_test_split
 from sklearn.compose import ColumnTransformer
 from sklearn.preprocessing import OneHotEncoder, StandardScaler
     roc_auc_score, accuracy_score, confusion_matrix, roc_curve,
     r2_score, mean_squared_error
 )
 from sklearn.linear_model import LogisticRegression, LinearRegression
 import statsmodels.api as sm
 st.set_page_config(page_title="Inferência Estatística — Marketing Campaign", layout="wide")
 st.title("📊 Inferência Estatística — Customer Personality Analysis")
+st.caption("Escolha o **alvo** e as **variáveis explicativas** na barra lateral (esquerda) e obtenha a inferência (item **e**).")
 DATA_PATH = "Dados/marketing_campaign.csv"
+# ---------- Utilidades ----------
 @st.cache_data(show_spinner=False)
 def load_csv_try(path: str) -> pd.DataFrame:
+    """Lê CSV tentando separadores: vírgula, ponto-e-vírgula e tab."""
     for sep in [",", ";", "\t"]:
         try:
             df = pd.read_csv(path, sep=sep, encoding="utf-8")
             if sep != "\t" and df.shape[1] == 1:
                 continue
             return df
         except Exception:
             continue
     return pd.read_csv(path, sep=None, engine="python")
 def split_num_cat(df: pd.DataFrame, exclude: List[str]) -> Tuple[List[str], List[str]]:
 def is_binary_series(s: pd.Series) -> bool:
     vals = pd.unique(s.dropna())
+    return len(vals) == 2 or s.dtype == bool
+def coerce_numeric_series(s: pd.Series) -> pd.Series:
+    """Tenta converter strings numéricas para float (lida com vírgula decimal)."""
+    if np.issubdtype(s.dtype, np.number):
+        return s.astype(float)
+    # troca vírgula decimal por ponto, remove separadores comuns
+    tmp = s.astype(str).str.replace(r"[.\s]", "", regex=True).str.replace(",", ".", regex=False)
+    coerced = pd.to_numeric(tmp, errors="coerce")
+    return coerced
 def engineer_features(df: pd.DataFrame) -> pd.DataFrame:
+    """Engenharia minimalista (compatível com o CSV padrão do Kaggle)."""
     out = df.copy()
+    # Tenure (dias desde Dt_Customer)
     if "Dt_Customer" in out.columns:
         out["Dt_Customer"] = pd.to_datetime(out["Dt_Customer"], errors="coerce", dayfirst=True)
         out["TenureDays"] = (pd.Timestamp("today").normalize() - out["Dt_Customer"]).dt.days
+    # Total gasto (Mnt*)
     mnt_cols = [c for c in out.columns if c.startswith("Mnt")]
     if mnt_cols:
         out["TotalMnt"] = out[mnt_cols].sum(axis=1)
+    # Compras totais e participações
     buy_cols = [c for c in ["NumWebPurchases", "NumCatalogPurchases", "NumStorePurchases"] if c in out.columns]
     if buy_cols:
         out["TotalPurchases"] = out[buy_cols].sum(axis=1)
     if "TotalMnt" in out.columns and "TotalPurchases" in out.columns:
         out["AvgTicket"] = out["TotalMnt"] / out["TotalPurchases"].replace(0, np.nan)
+    # Diversidade de cesta (quantos tipos Mnt*>0)
     if mnt_cols:
         out["BasketDiversity"] = (out[mnt_cols] > 0).sum(axis=1)
     return out
 def build_preprocessor(num_cols: List[str], cat_cols: List[str]) -> ColumnTransformer:
+    """Imputação + padronização (num) e OHE drop='first' (cat) para evitar colinearidade."""
     num_pipe = Pipeline([
         ("imp", SimpleImputer(strategy="median")),
         ("scaler", StandardScaler())
         ("imp", SimpleImputer(strategy="most_frequent")),
         ("ohe", OneHotEncoder(handle_unknown="ignore", drop="first", sparse_output=False))
     ])
+    return ColumnTransformer([
         ("num", num_pipe, num_cols),
         ("cat", cat_pipe, cat_cols)
     ])
 def get_feature_names(pre: ColumnTransformer, num_cols: List[str], cat_cols: List[str]) -> List[str]:
     names = list(num_cols)
     return names
 def fit_inference(model_type: str, X_design: pd.DataFrame, y: pd.Series):
+    """Ajusta a inferência (statsmodels): Logit p/ binário; OLS p/ contínuo."""
     X_sm = sm.add_constant(X_design, has_constant="add")
     if model_type == "logit":
         res = sm.Logit(y.values, X_sm).fit(disp=False)
         or_vals = np.exp(res.params)
         or_ci = np.exp(res.conf_int())
         tbl = pd.DataFrame({
+            "feature": res.params.index,
             "coef": res.params.values,
             "std_err": res.bse.values,
+            "z/t": res.tvalues.values if hasattr(res, "tvalues") else res.tvalues,
+            "p_value": res.pvalues.values,
             "ci_low": res.conf_int()[0].values,
             "ci_high": res.conf_int()[1].values,
             "odds_ratio": or_vals.values,
     return res, tbl
 def recs_from_inference(tbl: pd.DataFrame, model_type: str, k: int = 5):
+    """Gera recomendações (item e) a partir dos efeitos significativos (p<0.05), ignorando 'const'."""
     df = tbl[tbl["feature"] != "const"].copy()
     df = df.sort_values(["p_value", "z/t"], ascending=[True, False])
     core = df[df["p_value"] < 0.05].head(k)
         if model_type == "logit":
             or_txt = f"(OR≈{r['odds_ratio']:.2f}, IC95% {r['or_ci_low']:.2f}–{r['or_ci_high']:.2f}, p={r['p_value']:.3g})"
             if sign > 0:
+                out.append(f"🔧 **Reduzir exposição associada a `{feat}`** {or_txt}, pois aumento nessa variável eleva a probabilidade do alvo.")
             else:
+                out.append(f"✅ **Fortalecer fatores ligados a `{feat}`** {or_txt}, pois valores maiores reduzem a probabilidade do alvo.")
         else:
             eff = f"(β≈{r['coef']:.3g}, IC95% {r['ci_low']:.2g}–{r['ci_high']:.2g}, p={r['p_value']:.3g})"
             if sign > 0:
                 out.append(f"🔧 **Mitigar o crescimento de `{feat}`** {eff}, pois contribui positivamente para o aumento do alvo.")
             else:
                 out.append(f"✅ **Aumentar `{feat}`** {eff}, pois está associado à redução do alvo.")
+    # trilhas transversais
+    out.append("🧪 **Testes A/B** nas variáveis mais significativas para validar impacto causal.")
+    out.append("📞 **Melhorar FCR/primeiro contato** nas causas evidenciadas pelos top fatores.")
+    out.append("🔁 **Feedback a Produto/Qualidade** guiado pelos efeitos com evidência estatística robusta.")
     return out[:k+3]
+# ---------- Sidebar (lado esquerdo) ----------
 with st.sidebar:
     st.header("⚙️ Configuração")
     if not os.path.exists(DATA_PATH):
         st.error(f"Arquivo não encontrado: `{DATA_PATH}`. Suba o CSV em `Dados/`.")
         st.stop()
 df_raw = load_csv_try(DATA_PATH)
 df_eng = engineer_features(df_raw)
 with st.sidebar:
+    st.markdown("**Alvo (variável dependente):**")
     all_cols = df_eng.columns.tolist()
+    # Preferência: 'Response' se existir; senão 1ª binária; senão 1ª numérica
+    default_target = "Response" if "Response" in all_cols else None
+    if default_target is None:
+        for c in all_cols:
+            if is_binary_series(df_eng[c]):
+                default_target = c; break
     if default_target is None:
         for c in all_cols:
+            if np.issubdtype(df_eng[c].dtype, np.number):
+                default_target = c; break
     target_col = st.selectbox("Alvo (y)", options=all_cols, index=all_cols.index(default_target) if default_target in all_cols else 0)
 # Variáveis explicativas
 with st.sidebar:
     st.markdown("**Variáveis explicativas (X):**")
     engineered_first = [c for c in ["TenureDays","TotalMnt","TotalPurchases","OnlineShare","PromoShare","AvgTicket","BasketDiversity"] if c in num_cols_all]
     base_defaults = engineered_first + [c for c in num_cols_all if c not in engineered_first][:5] + cat_cols_all[:3]
     selected_feats = st.multiselect("Selecione X", options=(num_cols_all + cat_cols_all), default=base_defaults)
     st.warning("Selecione pelo menos uma variável explicativa.")
     st.stop()
+# ---------- Amostra ----------
+st.markdown("### 🔎 Amostra dos dados")
 st.dataframe(df_eng[[target_col] + selected_feats].head(12), use_container_width=True)
+# ---------- Preparação do alvo ----------
+y_raw = df_eng[target_col]
+# 1) tenta identificar binário diretamente
 is_bin = is_binary_series(y_raw)
+# 2) se não binário, tenta numérico (coerção segura)
+y_numeric_try = coerce_numeric_series(y_raw) if not is_bin else None
+is_numeric_ok = False
+if not is_bin:
+    if y_numeric_try is not None:
+        # considera "ok" se pelo menos 80% foram convertidos
+        conv_rate = y_numeric_try.notna().mean()
+        is_numeric_ok = conv_rate >= 0.8
+# 3) se não binário e não numérico, vira categórico multi-classe → one-vs-rest
+with st.sidebar:
+    positive_class = None
+    if not is_bin and not is_numeric_ok:
+        uniq_vals = sorted(pd.unique(y_raw.dropna()).tolist(), key=lambda x: str(x))
+        st.markdown("**Alvo categórico com múltiplas classes**")
+        positive_class = st.selectbox("Classe 'positiva' (one-vs-rest)", options=uniq_vals, index=0)
+        st.caption("O modelo fará Logit para a classe escolhida vs. as demais.")
+# ---------- Montagem de y conforme os casos ----------
+if is_bin:
+    # Se binário não-numérico → mapear para {0,1} em ordem alfabética
+    if not np.issubdtype(y_raw.dtype, np.number):
+        uniq = sorted(pd.unique(y_raw.dropna()).tolist(), key=lambda x: str(x))
+        y = y_raw.replace({uniq[0]: 0, uniq[1]: 1}).astype(int)
+    else:
+        y = y_raw.astype(int)
+    model_type = "logit"
+elif is_numeric_ok:
+    y = y_numeric_try.astype(float)
+    model_type = "ols"
 else:
+    # one-vs-rest
+    y = (y_raw == positive_class).astype(int)
+    model_type = "logit"
+# Alinha df aos y válidos
+mask_valid = y.notna()
+df_model = df_eng.loc[mask_valid].copy()
+y = y.loc[mask_valid]
+X = df_model[selected_feats].copy()
+# ---------- Pré-processamento e treino ----------
 sel_num = [c for c in selected_feats if np.issubdtype(X[c].dtype, np.number)]
 sel_cat = [c for c in selected_feats if (X[c].dtype == "object" or X[c].dtype.name == "category")]
 pre = build_preprocessor(sel_num, sel_cat)
+quick_est = LogisticRegression(max_iter=200) if model_type == "logit" else LinearRegression()
 pipe = Pipeline([("pre", pre), ("est", quick_est)])
 X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=test_size, random_state=random_state,
+    stratify=y if model_type == "logit" else None
 )
 with st.spinner("Treinando e construindo matriz de design..."):
     pipe.fit(X_train, y_train)
     pre_fit = pipe.named_steps["pre"].fit(X_train, y_train)
     X_train_design = pre_fit.transform(X_train)
+    # nomes das features após OHE
+    ohe_names = []
+    if sel_cat:
+        ohe_names = list(pre_fit.named_transformers_["cat"].named_steps["ohe"].get_feature_names_out(sel_cat))
+    feat_names = sel_num + ohe_names
     X_train_df = pd.DataFrame(X_train_design, columns=feat_names)
+# ---------- Inferência (item e) ----------
 st.markdown("## 📚 Inferência estatística (Item e)")
 with st.spinner("Ajustando modelo de inferência (statsmodels)..."):
     res, infer_tbl = fit_inference(model_type, X_train_df, y_train)
+if model_type == "logit":
+    if positive_class is not None:
+        st.caption(f"Modelo: **Logit** (one-vs-rest). Classe positiva: **{positive_class}**.")
+    else:
+        st.caption("Modelo: **Logit** (alvo binário). Coeficientes em log-odds; exibimos **odds ratios** e IC 95%.")
     cols_show = ["feature","coef","std_err","z/t","p_value","ci_low","ci_high","odds_ratio","or_ci_low","or_ci_high"]
 else:
     st.caption("Modelo: **OLS** (alvo contínuo). Coeficientes, erros-padrão, estatística t e IC 95%.")
 st.dataframe(infer_tbl[cols_show].round(4), use_container_width=True)
+# ---------- Métricas ----------
 st.markdown("### 📈 Desempenho do modelo")
+if model_type == "logit":
     y_proba = pipe.predict_proba(X_test)[:, 1] if hasattr(pipe.named_steps["est"], "predict_proba") else pipe.predict(X_test)
     y_pred = (y_proba >= 0.5).astype(int)
     auc = roc_auc_score(y_test, y_proba)
     with c1: st.metric("R² (teste)", f"{r2:.3f}")
     with c2: st.metric("RMSE (teste)", f"{rmse:.3f}")
+# ---------- Força dos efeitos ----------
 st.markdown("### 🌟 Força dos efeitos (|t/z|)")
 eff_df = infer_tbl[infer_tbl["feature"] != "const"].copy()
 eff_df["effect_strength"] = eff_df["z/t"].abs()
 ).properties(height=420)
 st.altair_chart(eff_chart, use_container_width=True)
+# ---------- Predição interativa ----------
 st.markdown("## 🔮 Predição interativa")
 st.caption("Ajuste valores para X e veja a probabilidade (Logit) ou valor previsto (OLS).")
     user_inputs = {}
     for i, col in enumerate(selected_feats):
         with cols[i % 3]:
+            if col in [c for c in selected_feats if np.issubdtype(X[c].dtype, np.number)]:
                 q1, q5, q95, q99 = X_train[col].quantile([0.01,0.05,0.95,0.99])
                 default_val = float(np.nan_to_num(X_train[col].median(), nan=0.0))
                 user_inputs[col] = st.number_input(
     x_new_df = pd.DataFrame(x_new_proc, columns=feat_names)
     X_sm_new = sm.add_constant(x_new_df, has_constant="add")
     y_hat = float(res.predict(X_sm_new)[0])
+    if model_type == "logit":
+        st.success(f"Probabilidade prevista do alvo: **{y_hat:.2%}**")
     else:
         st.success(f"Valor previsto do alvo: **{y_hat:.4g}**")
+# ---------- Recomendações (item e) ----------
 st.markdown("## 🧭 Recomendações estratégicas (Item e)")
+for r in recs_from_inference(infer_tbl, model_type=model_type, k=5):
     st.markdown("- " + r)
 st.markdown("---")
+st.caption("Controles na barra lateral (esquerda) • Dados: `Dados/marketing_campaign.csv` • Inferência conforme item (e).")