Spaces:

ricardoadriano
/

Atividade3

Sleeping

App Files Files Community

ricardoadriano commited on Sep 24, 2025

Commit

c4616a2

1 Parent(s): c59760d

pasta dados

Browse files

Files changed (4) hide show

Dados/AmesHousing.csv +0 -0
dados +0 -0
requirements.txt +10 -3
src/streamlit_app.py +165 -38

Dados/AmesHousing.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

dados DELETED Viewed

File without changes

requirements.txt CHANGED Viewed

@@ -1,3 +1,10 @@
-altair
-pandas
-streamlit

+pandas==1.5.3
+geopandas==0.10.2
+matplotlib==3.6.3
+seaborn==0.11.2
+plotly==5.3.0
+Pillow==9.2.0
+scipy==1.9.3
+statsmodels==0.13.5
+wordcloud==1.9.2
+numpy==1.24.0

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,167 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

+#!/usr/bin/env python
+# coding: utf-8
+# =====================================================
+# Dashboard - Testes de Hipóteses com AmesHousing
+# =====================================================
 import streamlit as st
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+import plotly.express as px
+import numpy as np
+from scipy import stats
+from scipy.stats import shapiro, levene, kruskal
+from statsmodels.formula.api import ols
+import statsmodels.api as sm
+# -----------------------------------------------------
+# Configuração da Página
+# -----------------------------------------------------
+st.set_page_config(
+    page_title="Dashboard - Testes de Hipóteses com AmesHousing",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+st.markdown("<h1 style='text-align:center;color:#003366;'>Simulador de Testes de Hipótese</h1>", unsafe_allow_html=True)
+st.markdown("<h3 style='text-align:center;color:#003366;'>Análise do Dataset AmesHousing</h3>", unsafe_allow_html=True)
+st.markdown("---")
+# -----------------------------------------------------
+# Abas do Dashboard
+# -----------------------------------------------------
+tabs = st.tabs(["Simulações Teóricas", "Análise AmesHousing"])
+# -----------------------------------------------------
+# Aba 1: Simulações Teóricas (mantida)
+# -----------------------------------------------------
+with tabs[0]:
+    st.subheader("Teste de Hipótese para Proporção de Testes Positivos de COVID-19")
+    st.sidebar.markdown("### Parâmetros do Teste (Proporção)")
+    p_pop      = st.sidebar.slider("Proporção populacional (H0)", 0.0, 1.0, 0.1, 0.01, key="p_pop")
+    p_sample   = st.sidebar.slider("Proporção amostral", 0.0, 1.0, 0.12, 0.01, key="p_sample")
+    n          = st.sidebar.slider("Tamanho da amostra", 100, 10000, 1000, 10, key="n_sample")
+    alpha_prop = st.sidebar.slider("Nível de significância (α)", 0.01, 0.10, 0.05, 0.01, key="alpha_prop")
+    se      = np.sqrt(p_pop*(1-p_pop)/n)
+    z       = (p_sample - p_pop)/se
+    p_value = 2*(1 - stats.norm.cdf(abs(z)))
+    st.write(f"**Z** = {z:.4f}")
+    st.write(f"**p-valor** = {p_value:.4f}")
+    if p_value < alpha_prop:
+        st.write("**Rejeitamos H0**: diferença significativa.")
+    else:
+        st.write("**Não rejeitamos H0**: sem diferença significativa.")
+# -----------------------------------------------------
+# Aba 2: Análise AmesHousing
+# -----------------------------------------------------
+with tabs[1]:
+    st.subheader("Análise de Variância - AmesHousing Dataset")
+    st.markdown("---")
+    # Upload ou leitura local
+    uploaded_file = st.file_uploader("Carregue o arquivo AmesHousing.csv", type=["csv"])
+    if uploaded_file is not None:
+        casa_data = pd.read_csv(uploaded_file)
+    else:
+        try:
+            casa_data = pd.read_csv("Dados/AmesHousing.csv")
+        except:
+            st.error("⚠️ Carregue o arquivo AmesHousing.csv para continuar.")
+            st.stop()
+    # Renomear colunas para evitar problemas com espaços
+    casa_data.columns = casa_data.columns.str.strip().str.replace(" ", "_")
+    # -------------------------------------------------
+    # Análise Exploratória
+    # -------------------------------------------------
+    st.markdown("### Distribuição do Preço de Venda")
+    fig, ax = plt.subplots(figsize=(8,5))
+    sns.histplot(casa_data['SalePrice'], kde=True, ax=ax)
+    ax.set_title("Distribuição do Preço de Venda")
+    st.pyplot(fig)
+    # Boxplots
+    st.markdown("### Boxplots das Variáveis Selecionadas")
+    variavel = st.selectbox("Escolha a variável categórica para comparar preços:",
+                            ["Neighborhood","Garage_Type","Fireplaces"])
+    fig2, ax2 = plt.subplots(figsize=(12,6))
+    sns.boxplot(x=variavel, y="SalePrice", data=casa_data, ax=ax2)
+    plt.xticks(rotation=90)
+    ax2.set_title(f"Preço de Venda por {variavel}")
+    st.pyplot(fig2)
+    # Scatter interativo (média de preço por bairro)
+    st.markdown("### Preço Médio de Venda por Bairro")
+    bairro_grouped = casa_data.groupby('Neighborhood').agg(
+        count=('SalePrice','size'),
+        mean_price=('SalePrice','mean')
+    ).reset_index()
+    bairro_filtered = bairro_grouped[bairro_grouped['count'] >= 5]
+    fig3 = px.scatter(
+        bairro_filtered,
+        x='mean_price',
+        y='Neighborhood',
+        size='count',
+        color='Neighborhood',
+        title='Preço Médio de Venda vs Bairro (Ames, Iowa)',
+        labels={'mean_price': 'Preço Médio de Venda', 'Neighborhood':'Bairro'},
+        opacity=0.8
+    )
+    st.plotly_chart(fig3, use_container_width=True)
+    # -------------------------------------------------
+    # ANOVA
+    # -------------------------------------------------
+    st.markdown("### ANOVA para Neighborhood, Garage_Type e Fireplaces")
+    alpha = st.sidebar.slider("Nível de significância (α) - ANOVA AmesHousing",
+                              0.01,0.10,0.05,0.01,key="alpha_ames")
+    modelos = {
+        "Neighborhood": ols('SalePrice ~ C(Neighborhood)', data=casa_data).fit(),
+        "Garage_Type": ols('SalePrice ~ C(Garage_Type)', data=casa_data).fit(),
+        "Fireplaces": ols('SalePrice ~ C(Fireplaces)', data=casa_data).fit()
+    }
+    for nome, modelo in modelos.items():
+        st.markdown(f"#### ANOVA - {nome}")
+        anova = sm.stats.anova_lm(modelo, typ=2)
+        st.dataframe(anova)
+    # -------------------------------------------------
+    # Validação dos Pressupostos
+    # -------------------------------------------------
+    st.markdown("### Validação dos Pressupostos da ANOVA")
+    st.markdown("#### Teste de Normalidade (Shapiro-Wilk)")
+    for nome, modelo in modelos.items():
+        residuos = modelo.resid
+        stat, p = shapiro(residuos.dropna())
+        st.write(f"{nome}: estatística={stat:.3f}, p={p:.3f} ➡️ "
+                 + ("resíduos normais" if p >= alpha else "violação de normalidade"))
+    st.markdown("#### Teste de Homocedasticidade (Levene)")
+    for nome in ["Neighborhood","Garage_Type","Fireplaces"]:
+        grupos = [grupo["SalePrice"].dropna() for _, grupo in casa_data.groupby(nome)]
+        stat, p = levene(*grupos)
+        st.write(f"{nome}: estatística={stat:.3f}, p={p:.3f} ➡️ "
+                 + ("variâncias iguais" if p >= alpha else "variâncias diferentes"))
+    # -------------------------------------------------
+    # Kruskal-Wallis
+    # -------------------------------------------------
+    st.markdown("### Teste não-paramétrico (Kruskal-Wallis)")
+    for nome in ["Neighborhood","Garage_Type","Fireplaces"]:
+        grupos = [grupo["SalePrice"].dropna() for _, grupo in casa_data.groupby(nome)]
+        stat, p = kruskal(*grupos)
+        st.write(f"{nome}: estatística={stat:.3f}, p={p:.3f} ➡️ "
+                 + ("diferenças significativas" if p < alpha else "sem diferença significativa"))