Spaces:
Sleeping
Sleeping
File size: 10,070 Bytes
20bb670 64e83b5 20bb670 64e83b5 9e72342 64e83b5 20bb670 64e83b5 00a2070 20bb670 64e83b5 00a2070 20bb670 9f2813b 9e72342 00a2070 9e72342 9f2813b 9e72342 9f2813b 9e72342 00a2070 9e72342 20bb670 9f2813b 00a2070 20bb670 00a2070 9f2813b 20bb670 9f2813b 20bb670 00a2070 20bb670 00a2070 20bb670 9f2813b 20bb670 9f2813b 20bb670 64e83b5 20bb670 00a2070 64e83b5 20bb670 64e83b5 20bb670 64e83b5 00a2070 20bb670 64e83b5 20bb670 00a2070 20bb670 00a2070 20bb670 9f2813b 20bb670 9f2813b 20bb670 64e83b5 20bb670 00a2070 20bb670 00a2070 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 |
import re
import numpy as np
import pandas as pd
import altair as alt
import streamlit as st
from pathlib import Path
st.set_page_config(page_title="Simulação Monte Carlo (Dirichlet–Multinomial)", layout="wide")
# ===================== Sidebar: parâmetros =====================
st.sidebar.title("Parâmetros da Simulação")
N_SIM = st.sidebar.number_input("Número de simulações", min_value=1000, max_value=200_000, value=10_000, step=1000)
META_APROV = st.sidebar.slider("Meta de aprovação (≥)", 0.50, 0.95, 0.80, 0.01)
MAX_EVASAO = st.sidebar.slider("Limite de evasão (≤)", 0.00, 0.40, 0.15, 0.01)
ADD_K = st.sidebar.select_slider("Suavização add-k", options=[0.5, 1.0, 2.0], value=1.0)
N_MULT = st.sidebar.select_slider("Cenário do tamanho da turma (n ×)", options=[0.9, 1.0, 1.1], value=1.0)
SEED = st.sidebar.number_input("Semente aleatória", min_value=0, value=42, step=1)
# ===================== Helpers =====================
def _norm_cols(cols):
return [re.sub(r"\s+", " ", str(c)).strip().replace("%", "pct") for c in cols]
def _pick(col, pats):
return any(re.search(p, col, re.I) for p in pats)
def _to_num(s):
return pd.to_numeric(
s.astype(str)
.str.replace("%", "", regex=False)
.str.replace(",", ".", regex=False)
.str.strip(),
errors="coerce"
)
def _try_read_csv(path: Path):
"""Lê Dados/levantamentoTurmas.csv tentando separadores e encodings comuns."""
if not path.exists():
return None, f"Arquivo esperado não encontrado: {path}"
last_err = None
for enc in ("utf-8-sig", "utf-8", "latin1"):
for sep in (None, ",", ";", "\t"): # None = autodetect
try:
df = pd.read_csv(path, sep=sep, engine="python", encoding=enc)
if df.shape[1] == 1 and sep is None:
df = pd.read_csv(path, sep=";", engine="python", encoding=enc)
return df, {"source": str(path), "sep": sep if sep is not None else "auto", "encoding": enc}
except Exception as e:
last_err = e
continue
return None, f"Falha ao ler {path}: {last_err}"
@st.cache_data(show_spinner=False)
def load_dataframe_from_dados():
csv_path = Path("Dados/levantamentoTurmas.csv")
df, meta = _try_read_csv(csv_path)
if df is None:
return None, meta # mensagem de erro
# Normalização de cabeçalhos
df.columns = _norm_cols(df.columns)
# Renomeação inteligente
ren = {}
for c in df.columns:
lc = c.lower()
if _pick(c, [r"^turma"]): ren[c] = "Turma"
elif _pick(c, [r"matriculado"]): ren[c] = "Matriculados"
elif _pick(c, [r"\baprov"]): ren[c] = "Aprovados" if "pct" not in lc else "pct_Aprov"
elif _pick(c, [r"reprov"]): ren[c] = "Reprovados" if "pct" not in lc else "pct_Reprov"
elif _pick(c, [r"desistent|evas"]): ren[c] = "Desistentes" if "pct" not in lc else "pct_Desist"
df = df.rename(columns=ren)
# Converte números/percentuais
for c in ["Matriculados","Aprovados","Reprovados","Desistentes","pct_Aprov","pct_Reprov","pct_Desist"]:
if c in df.columns:
df[c] = _to_num(df[c])
# Reconstrói contagens quando vierem apenas em %
if "Aprovados" not in df.columns and "pct_Aprov" in df.columns:
df["Aprovados"] = (df["pct_Aprov"]/100 * df["Matriculados"]).round()
if "Reprovados" not in df.columns and "pct_Reprov" in df.columns:
df["Reprovados"] = (df["pct_Reprov"]/100 * df["Matriculados"]).round()
if "Desistentes" not in df.columns and "pct_Desist" in df.columns:
df["Desistentes"] = (df["pct_Desist"]/100 * df["Matriculados"]).round()
need = ["Turma","Matriculados","Aprovados","Reprovados","Desistentes"]
miss = [c for c in need if c not in df.columns]
if miss:
return None, f"Colunas ausentes no CSV ({csv_path}): {miss}"
base = df[need].copy()
for c in need[1:]:
base[c] = pd.to_numeric(base[c], errors="coerce").fillna(0).astype(int)
base = base[base["Matriculados"] > 0].copy()
base["Turma"] = base["Turma"].astype(str).str.strip()
# Ajuste de soma
soma = base[["Aprovados","Reprovados","Desistentes"]].sum(axis=1)
diff = soma != base["Matriculados"]
base.loc[diff, "Aprovados"] = (
base.loc[diff, "Matriculados"] - base.loc[diff, ["Reprovados","Desistentes"]].sum(axis=1)
).clip(lower=0)
if len(base) == 0:
return None, "Após limpeza, não restaram turmas válidas."
return base.reset_index(drop=True), None
@st.cache_data(show_spinner=False)
def simulate_dirichlet_multinomial(base: pd.DataFrame, n_sim: int, meta_aprov: float, max_evasao: float, add_k: float, n_mult: float, seed: int):
rng = np.random.default_rng(seed)
rows = []
for _, r in base.iterrows():
turma = r["Turma"]
n0 = int(r["Matriculados"])
n = max(1, int(round(n0 * n_mult)))
a, rp, dz = int(r["Aprovados"]), int(r["Reprovados"]), int(r["Desistentes"])
alpha = np.array([a + add_k, rp + add_k, dz + add_k], dtype=float)
P = rng.dirichlet(alpha, size=n_sim)
counts = np.vstack([rng.multinomial(n, p) for p in P])
t_ap = counts[:, 0] / n
t_dz = counts[:, 2] / n
rows.append({
"Turma": turma,
"Matriculados": n,
"Média_Aprov": t_ap.mean(),
"P5_Aprov": np.percentile(t_ap, 5),
"P50_Aprov": np.percentile(t_ap, 50),
"P95_Aprov": np.percentile(t_ap, 95),
"Média_Desist": t_dz.mean(),
"P5_Desist": np.percentile(t_dz, 5),
"P50_Desist": np.percentile(t_dz, 50),
"P95_Desist": np.percentile(t_dz, 95),
"Prob_Meta": ((t_ap >= meta_aprov) & (t_dz <= max_evasao)).mean()
})
return pd.DataFrame(rows).sort_values("Prob_Meta", ascending=False).reset_index(drop=True)
@st.cache_data(show_spinner=False)
def sample_turma(base: pd.DataFrame, turma_label: str, n_sim: int, add_k: float, n_mult: float, seed: int):
turma_label = str(turma_label).strip()
m = base["Turma"] == turma_label
if not m.any():
mc = base["Turma"].str.contains(re.escape(turma_label), case=False, na=False)
if not mc.any():
return None, None
idx = base.index[mc][0]
else:
idx = base.index[m][0]
r = base.loc[idx]
n0 = int(r["Matriculados"])
n = max(1, int(round(n0 * n_mult)))
a, rp, dz = int(r["Aprovados"]), int(r["Reprovados"]), int(r["Desistentes"])
alpha = np.array([a + add_k, rp + add_k, dz + add_k], dtype=float)
rng = np.random.default_rng(seed)
P = rng.dirichlet(alpha, size=n_sim)
C = np.vstack([rng.multinomial(n, p) for p in P])
return C[:, 0] / n, C[:, 2] / n
# ===================== App =====================
st.title("Simulação de Monte Carlo — Dirichlet–Multinomial")
st.caption("O app lê **Dados/levantamentoTurmas.csv**. Ajuste os parâmetros na lateral e simule.")
base, err = load_dataframe_from_dados()
if err:
st.error(err)
st.stop()
with st.expander("Ver dados utilizados (base limpa)", expanded=False):
st.dataframe(base)
sim_df = simulate_dirichlet_multinomial(
base=base,
n_sim=int(N_SIM),
meta_aprov=float(META_APROV),
max_evasao=float(MAX_EVASAO),
add_k=float(ADD_K),
n_mult=float(N_MULT),
seed=int(SEED)
)
st.subheader("Resultados por turma")
st.dataframe(sim_df.style.format({
"Média_Aprov": "{:.3f}", "P5_Aprov": "{:.3f}", "P50_Aprov": "{:.3f}", "P95_Aprov": "{:.3f}",
"Média_Desist": "{:.3f}", "P5_Desist": "{:.3f}", "P50_Desist": "{:.3f}", "P95_Desist": "{:.3f}",
"Prob_Meta": "{:.3f}"
}))
st.download_button(
label="Baixar resultados (CSV)",
data=sim_df.to_csv(index=False).encode("utf-8"),
file_name="resultados_simulacao.csv",
mime="text/csv"
)
st.subheader("Probabilidade de bater a meta (ordenado)")
chart_prob = (
alt.Chart(sim_df.sort_values("Prob_Meta", ascending=True))
.mark_bar()
.encode(
x=alt.X("Prob_Meta:Q", title=f"Prob. (aprovação ≥ {META_APROV:.0%} & evasão ≤ {MAX_EVASAO:.0%})"),
y=alt.Y("Turma:N", sort="-x", title="Turma"),
tooltip=[
alt.Tooltip("Turma:N"),
alt.Tooltip("Prob_Meta:Q", format=".3f"),
alt.Tooltip("Média_Aprov:Q", format=".3f"),
alt.Tooltip("Média_Desist:Q", format=".3f"),
],
).properties(height=400)
)
st.altair_chart(chart_prob, use_container_width=True)
st.subheader("Distribuições simuladas (detalhe por turma)")
col1, col2 = st.columns(2)
with col1:
turma_sel = st.selectbox("Escolha uma turma", options=sim_df["Turma"].tolist(), index=0)
with col2:
st.write(f"Meta de aprovação ≥ **{META_APROV:.0%}** | Evasão ≤ **{MAX_EVASAO:.0%}**")
st.write(f"add-k = **{ADD_K}** · n × = **{N_MULT}** · simulações = **{N_SIM}**")
t_ap, t_dz = sample_turma(base, turma_sel, int(N_SIM), float(ADD_K), float(N_MULT), int(SEED))
if t_ap is None:
st.warning("Turma não encontrada após normalização.")
else:
h_ap = (
alt.Chart(pd.DataFrame({"taxa_aprov": t_ap}))
.mark_bar()
.encode(x=alt.X("taxa_aprov:Q", bin=alt.Bin(maxbins=30), title="Taxa de aprovação"),
y=alt.Y("count()", title="Frequência"))
.properties(height=300)
)
linha_meta = alt.Chart(pd.DataFrame({"x": [META_APROV]})).mark_rule(strokeDash=[6,4]).encode(x="x:Q")
st.altair_chart(h_ap + linha_meta, use_container_width=True)
h_dz = (
alt.Chart(pd.DataFrame({"taxa_evasao": t_dz}))
.mark_bar()
.encode(x=alt.X("taxa_evasao:Q", bin=alt.Bin(maxbins=30), title="Taxa de evasão"),
y=alt.Y("count()", title="Frequência"))
.properties(height=300)
)
linha_lim = alt.Chart(pd.DataFrame({"x": [MAX_EVASAO]})).mark_rule(strokeDash=[6,4]).encode(x="x:Q")
st.altair_chart(h_dz + linha_lim, use_container_width=True) |