Atividade4

Sleeping

App Files Files Community

ricardoadriano commited on Oct 2, 2025

Commit

68a9e56

verified ·

1 Parent(s): 63b6361

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +35 -67

src/streamlit_app.py CHANGED Viewed

@@ -2,7 +2,7 @@
 # coding: utf-8
 # =====================================================
-# Dashboard - AmesHousing (Tarefa 4) - versão otimizada
 # =====================================================
 import streamlit as st
@@ -28,8 +28,6 @@ st.set_page_config(
     layout="wide",
     initial_sidebar_state="expanded"
 )
-# Nova API para manter posição estável
 st.query_params.clear()
 st.markdown("<h1 style='text-align:center;color:#003366;'>Análise do Dataset AmesHousing</h1>", unsafe_allow_html=True)
@@ -45,15 +43,13 @@ def carregar_dados():
         "/mnt/data/AmesHousing.csv",
         "../Dados/AmesHousing.csv",
     ]
-    last_err = None
     for p in paths_tentativa:
         try:
             df = pd.read_csv(p)
             return df
-        except Exception as e:
-            last_err = e
             continue
-    raise RuntimeError(f"Não foi possível carregar o AmesHousing.csv. Último erro: {last_err}")
 casa_data = carregar_dados()
 casa_data.columns = casa_data.columns.str.strip().str.replace(" ", "_")
@@ -106,12 +102,9 @@ alpha_reg = st.sidebar.slider("Nível de significância (α) — Regressão", 0.
 # Distribuição de Preço de Venda
 st.subheader("Distribuição do Preço de Venda")
 if not dados_filtrados.empty:
-    fig, ax = plt.subplots(figsize=(6,4))
     sns.histplot(dados_filtrados['SalePrice'], kde=True, ax=ax)
-    ax.set_title("Distribuição do Preço de Venda")
     st.pyplot(fig, clear_figure=True, use_container_width=False)
-else:
-    st.warning("Nenhum dado disponível com os filtros aplicados.")
 # Boxplots
 st.subheader("Boxplots das Variáveis Selecionadas")
@@ -119,53 +112,35 @@ variavel = st.selectbox(
     "Escolha a variável categórica para comparar preços:",
     ["Neighborhood","Garage_Type","Fireplaces"]
 )
-if not dados_filtrados.empty:
-    if len(dados_filtrados[variavel].dropna().unique()) > 1:
-        fig2, ax2 = plt.subplots(figsize=(8,5))
-        sns.boxplot(x=variavel, y="SalePrice", data=dados_filtrados, ax=ax2)
-        plt.xticks(rotation=90)
-        ax2.set_title(f"Preço de Venda por {variavel}")
-        st.pyplot(fig2, clear_figure=True, use_container_width=False)
-    else:
-        st.warning(f"Não é possível gerar boxplot: apenas uma categoria em {variavel} após os filtros.")
 # Scatter interativo
 st.subheader("Preço Médio de Venda por Bairro")
-@st.cache_data
-def agrupar_bairros(df):
-    return df.groupby('Neighborhood').agg(
         count=('SalePrice','size'),
         mean_price=('SalePrice','mean')
     ).reset_index()
-if not dados_filtrados.empty:
-    bairro_grouped = agrupar_bairros(dados_filtrados)
     bairro_filtered = bairro_grouped[bairro_grouped['count'] >= 5]
     if not bairro_filtered.empty:
         fig3 = px.scatter(
             bairro_filtered,
-            x='mean_price',
-            y='Neighborhood',
-            size='count',
-            color='Neighborhood',
-            title='Preço Médio de Venda vs Bairro (Ames, Iowa)',
-            labels={'mean_price': 'Preço Médio de Venda', 'Neighborhood':'Bairro'},
-            opacity=0.8
         )
-        fig3.update_layout(height=400, width=700)
         st.plotly_chart(fig3, use_container_width=False)
-    else:
-        st.warning("Não há bairros suficientes após filtros para gerar o gráfico.")
 # =================================================
 # Regressão Linear — Tarefa 4
 # =================================================
 def construir_formula(y, feats_num, feats_cat, inter_1=None, inter_2=None):
-    termos = []
-    termos += feats_num
-    termos += [f"C({c})" for c in feats_cat]
     if inter_1 and inter_2:
         a = f"C({inter_1})" if inter_1 in feats_cat else inter_1
         b = f"C({inter_2})" if inter_2 in feats_cat else inter_2
@@ -178,48 +153,41 @@ if st.button("Ajustar modelo"):
     if interagir and inter_1 and inter_2:
         cols_necessarias += [inter_1, inter_2]
     df_modelo = dados_filtrados[cols_necessarias].dropna().copy()
-    if df_modelo.empty:
-        st.error("Sem dados suficientes após remoção de NAs nas variáveis selecionadas.")
-    else:
         y_col = 'SalePrice'
         if usar_logy:
-            df_modelo['SalePrice'] = np.log(df_modelo['SalePrice'].astype(float))
-            y_col = 'SalePrice'
         formula = construir_formula(y_col, feats_num, feats_cat, inter_1 if interagir else None, inter_2 if interagir else None)
         df_treino, df_teste = train_test_split(df_modelo, test_size=teste_size, random_state=42)
         model = ols(formula, data=df_treino).fit()
-        st.markdown("#### Especificação do Modelo")
         st.code(formula)
-        st.markdown("#### Coeficientes e Inferência")
-        st.dataframe(model.summary2().tables[1])
-        # Métricas
-        y_true = df_teste['SalePrice']
-        y_pred = model.predict(df_teste)
         if usar_logy:
-            y_true = np.exp(y_true)
-            y_pred = np.exp(y_pred)
-        R2 = r2_score(y_true, y_pred)
-        RMSE = mean_squared_error(y_true, y_pred, squared=False)
-        MAE = mean_absolute_error(y_true, y_pred)
-        st.dataframe(pd.DataFrame({'Métrica':['R²','RMSE','MAE'],'Valor':[R2,RMSE,MAE]}))
-        # Gráficos diagnósticos fixos
-        residuos = model.resid
-        fitted = model.fittedvalues
         cols = st.columns(3)
         with cols[0]:
-            fig_r, ax_r = plt.subplots(figsize=(3.5,3))
             ax_r.scatter(fitted, residuos, alpha=0.5)
             ax_r.axhline(0, color='red', linestyle='--')
             st.pyplot(fig_r, clear_figure=True, use_container_width=False)
         with cols[1]:
-            fig_q = sm.qqplot(residuos, line='45', fit=True)
-            fig_q.set_size_inches(3.5,3)
             st.pyplot(fig_q, clear_figure=True, use_container_width=False)
         with cols[2]:
-            fig_h, ax_h = plt.subplots(figsize=(3.5,3))
             sns.histplot(residuos, kde=True, ax=ax_h)
             st.pyplot(fig_h, clear_figure=True, use_container_width=False)

 # coding: utf-8
 # =====================================================
+# Dashboard - AmesHousing (Tarefa 4) - versão compacta
 # =====================================================
 import streamlit as st
     layout="wide",
     initial_sidebar_state="expanded"
 )
 st.query_params.clear()
 st.markdown("<h1 style='text-align:center;color:#003366;'>Análise do Dataset AmesHousing</h1>", unsafe_allow_html=True)
         "/mnt/data/AmesHousing.csv",
         "../Dados/AmesHousing.csv",
     ]
     for p in paths_tentativa:
         try:
             df = pd.read_csv(p)
             return df
+        except:
             continue
+    return pd.DataFrame()
 casa_data = carregar_dados()
 casa_data.columns = casa_data.columns.str.strip().str.replace(" ", "_")
 # Distribuição de Preço de Venda
 st.subheader("Distribuição do Preço de Venda")
 if not dados_filtrados.empty:
+    fig, ax = plt.subplots(figsize=(5,3.5))
     sns.histplot(dados_filtrados['SalePrice'], kde=True, ax=ax)
     st.pyplot(fig, clear_figure=True, use_container_width=False)
 # Boxplots
 st.subheader("Boxplots das Variáveis Selecionadas")
     "Escolha a variável categórica para comparar preços:",
     ["Neighborhood","Garage_Type","Fireplaces"]
 )
+if not dados_filtrados.empty and len(dados_filtrados[variavel].dropna().unique()) > 1:
+    fig2, ax2 = plt.subplots(figsize=(6,4))
+    sns.boxplot(x=variavel, y="SalePrice", data=dados_filtrados, ax=ax2)
+    plt.xticks(rotation=90)
+    st.pyplot(fig2, clear_figure=True, use_container_width=False)
 # Scatter interativo
 st.subheader("Preço Médio de Venda por Bairro")
+if not dados_filtrados.empty:
+    bairro_grouped = dados_filtrados.groupby('Neighborhood').agg(
         count=('SalePrice','size'),
         mean_price=('SalePrice','mean')
     ).reset_index()
     bairro_filtered = bairro_grouped[bairro_grouped['count'] >= 5]
     if not bairro_filtered.empty:
         fig3 = px.scatter(
             bairro_filtered,
+            x='mean_price', y='Neighborhood',
+            size='count', color='Neighborhood',
+            labels={'mean_price': 'Preço Médio de Venda', 'Neighborhood':'Bairro'}
         )
+        fig3.update_layout(width=600, height=350)
         st.plotly_chart(fig3, use_container_width=False)
 # =================================================
 # Regressão Linear — Tarefa 4
 # =================================================
 def construir_formula(y, feats_num, feats_cat, inter_1=None, inter_2=None):
+    termos = feats_num + [f"C({c})" for c in feats_cat]
     if inter_1 and inter_2:
         a = f"C({inter_1})" if inter_1 in feats_cat else inter_1
         b = f"C({inter_2})" if inter_2 in feats_cat else inter_2
     if interagir and inter_1 and inter_2:
         cols_necessarias += [inter_1, inter_2]
     df_modelo = dados_filtrados[cols_necessarias].dropna().copy()
+    if not df_modelo.empty:
         y_col = 'SalePrice'
         if usar_logy:
+            df_modelo['SalePrice'] = np.log(df_modelo['SalePrice'])
         formula = construir_formula(y_col, feats_num, feats_cat, inter_1 if interagir else None, inter_2 if interagir else None)
         df_treino, df_teste = train_test_split(df_modelo, test_size=teste_size, random_state=42)
         model = ols(formula, data=df_treino).fit()
         st.code(formula)
+        st.dataframe(model.summary2().tables[1], use_container_width=False, height=200)
+        y_true, y_pred = df_teste['SalePrice'], model.predict(df_teste)
         if usar_logy:
+            y_true, y_pred = np.exp(y_true), np.exp(y_pred)
+        metrics = pd.DataFrame({
+            'Métrica': ['R²','RMSE','MAE'],
+            'Valor': [r2_score(y_true, y_pred),
+                      mean_squared_error(y_true, y_pred, squared=False),
+                      mean_absolute_error(y_true, y_pred)]
+        })
+        st.dataframe(metrics, use_container_width=False, height=120)
+        # Gráficos diagnósticos
+        residuos, fitted = model.resid, model.fittedvalues
         cols = st.columns(3)
         with cols[0]:
+            fig_r, ax_r = plt.subplots(figsize=(3,3))
             ax_r.scatter(fitted, residuos, alpha=0.5)
             ax_r.axhline(0, color='red', linestyle='--')
             st.pyplot(fig_r, clear_figure=True, use_container_width=False)
         with cols[1]:
+            fig_q, ax_q = plt.subplots(figsize=(3,3))
+            sm.qqplot(residuos, line='45', fit=True, ax=ax_q)
             st.pyplot(fig_q, clear_figure=True, use_container_width=False)
         with cols[2]:
+            fig_h, ax_h = plt.subplots(figsize=(3,3))
             sns.histplot(residuos, kde=True, ax=ax_h)
             st.pyplot(fig_h, clear_figure=True, use_container_width=False)