ML_Final_Hourly

Sleeping

App Files Files Community

Gumball2k5 commited on Nov 15, 2025

Commit

5d60768

verified ·

1 Parent(s): 1465ff4

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -15

app.py CHANGED Viewed

@@ -62,8 +62,7 @@ def load_champion_models():
                  "Hãy đảm bảo 5 file .pkl nằm trong thư mục 'models/'.")
         return []
-@st.cache_data
-def load_performance_data(file_path="data/final_5_day_results_df.csv"):
     """Tải dữ liệu hiệu suất đã tính toán trước cho Tab 3."""
     try:
         df = pd.read_csv(file_path)
@@ -80,10 +79,7 @@ models = load_champion_models()
 perf_df = load_performance_data()
 # --- TÙY CHỈNH QUAN TRỌNG ---
-# Giả định tên các cột target (thực tế) trong file CSV của bạn
-# Checklist không nói rõ, nên tôi giả định tên là 't+1', 't+2', v.v.
 TARGET_COLS = ['temp_next_1_day', 'temp_next_2_day', 'temp_next_3_day', 'temp_next_4_day', 'temp_next_5_day']
-# Giả định tên cột nhiệt độ của ngày HIỆN TẠI (dùng để vẽ lịch sử)
 CURRENT_TEMP_COL = 'temp'
 # Tách test set (dựa trên ngày trong checklist)
@@ -97,10 +93,11 @@ if not all_data_df.empty:
         test_df = all_data_df.loc[TEST_START_DATE:TEST_END_DATE].copy()
         # Giả định: 157 features là TẤT CẢ các cột KHÔNG PHẢI là target
-        feature_cols = [col for col in test_df.columns if col not in TARGET_COLS]
         # Tách X_test (features) và y_test (thực tế)
-        X_test = test_df[feature_cols]
         y_test = test_df[TARGET_COLS]
         # Đổi tên cột y_test cho dễ hiểu (dùng trong Tab 3)
@@ -114,7 +111,6 @@ else:
     st.error("Không thể tải dữ liệu chính, ứng dụng không thể tiếp tục.")
     st.stop()
 # --- 5. GIAO DIỆN SIDEBAR (THANH ĐIỀU HƯỚNG) ---
 st.sidebar.title("Navigation")
@@ -189,8 +185,14 @@ elif app_section == "Live 5-Day Forecast":
         # 1. Lấy Input Features
         selected_date_ts = pd.Timestamp(selected_date)
-        input_features = X_test.loc[[selected_date_ts]]
         if input_features.empty:
             st.error("Không tìm thấy dữ liệu cho ngày đã chọn.")
         else:
@@ -210,13 +212,44 @@ elif app_section == "Live 5-Day Forecast":
             for i in range(5):
                 with cols[i]:
                     st.metric(
                         label=f"Forecast for {forecast_dates[i].strftime('%b %d')}",
                         value=f"{predictions[i]:.1f}°C",
-                        delta=f"Actual: {actual_values[i]:.1f}°C",
                         delta_color="off" # Màu xám trung tính
                     )
             # 4. Biểu đồ (Optimal Suggestion)
             st.subheader("Historical Context & Forecast")
@@ -264,20 +297,22 @@ elif app_section == "Model Performance & Diagnostics":
     if not perf_df.empty and not y_test.empty:
         st.subheader("Performance Degradation over 5 Days")
         st.markdown("Hiệu suất mô hình thay đổi như thế nào khi dự báo xa hơn.")
         MODEL_NAME = 'Champion (Stacking)'
         champion_perf_df = perf_df[perf_df['Model'] == MODEL_NAME].copy()
         # 1. Biểu đồ suy giảm hiệu suất (RMSE & R2)
         # --- TÙY CHỈNH ---
         # Đảm bảo 'RMSE' và 'R2' là tên cột chính xác trong file 'final_5_day_results_df.csv'
         RMSE_COL_NAME = 'RMSE (Absolute Error)'
-        R2_COL_NAME = 'R-squared'
         col1, col2 = st.columns(2)
         with col1:
             fig_rmse = diag.plot_performance_degradation(
-                champion_perf_df,
                 metric_column=RMSE_COL_NAME,
                 metric_name='RMSE (Temperature °C)',
                 color='blue'
@@ -285,7 +320,7 @@ elif app_section == "Model Performance & Diagnostics":
             st.plotly_chart(fig_rmse, use_container_width=True)
         with col2:
             fig_r2 = diag.plot_performance_degradation(
-                champion_perf_df,
                 metric_column=R2_COL_NAME,
                 metric_name='R-squared (R²)',
                 color='green'

                  "Hãy đảm bảo 5 file .pkl nằm trong thư mục 'models/'.")
         return []
+@st.cache_datadef load_performance_data(file_path="data/final_5_day_results_df.csv"):
     """Tải dữ liệu hiệu suất đã tính toán trước cho Tab 3."""
     try:
         df = pd.read_csv(file_path)
 perf_df = load_performance_data()
 # --- TÙY CHỈNH QUAN TRỌNG ---
 TARGET_COLS = ['temp_next_1_day', 'temp_next_2_day', 'temp_next_3_day', 'temp_next_4_day', 'temp_next_5_day']
 CURRENT_TEMP_COL = 'temp'
 # Tách test set (dựa trên ngày trong checklist)
         test_df = all_data_df.loc[TEST_START_DATE:TEST_END_DATE].copy()
         # Giả định: 157 features là TẤT CẢ các cột KHÔNG PHẢI là target
+        feature_cols = [col for col in all_data_df.columns if col not in TARGET_COLS]
         # Tách X_test (features) và y_test (thực tế)
+        # Sửa lỗi logic: X_test phải được lấy từ test_df
+        X_test = test_df[feature_cols]
         y_test = test_df[TARGET_COLS]
         # Đổi tên cột y_test cho dễ hiểu (dùng trong Tab 3)
     st.error("Không thể tải dữ liệu chính, ứng dụng không thể tiếp tục.")
     st.stop()
 # --- 5. GIAO DIỆN SIDEBAR (THANH ĐIỀU HƯỚNG) ---
 st.sidebar.title("Navigation")
         # 1. Lấy Input Features
         selected_date_ts = pd.Timestamp(selected_date)
+        # Sửa lỗi logic: input_features phải được lấy từ X_test
+        if selected_date_ts in X_test.index:
+            input_features = X_test.loc[[selected_date_ts]]
+        else:
+            st.error("Không tìm thấy dữ liệu cho ngày đã chọn trong X_test.")
+            input_features = pd.DataFrame() # Tạo dataframe rỗng để tránh lỗi sau
         if input_features.empty:
             st.error("Không tìm thấy dữ liệu cho ngày đã chọn.")
         else:
             for i in range(5):
                 with cols[i]:
+                    # --- SỬA LỖI 1: KIỂM TRA NaN CHO ACTUAL VALUE ---
+                    actual_val = actual_values[i]
+                    if pd.isna(actual_val):
+                        delta_text = "Actual: --"
+                    else:
+                        delta_text = f"Actual: {actual_val:.1f}°C"
+                    # --- KẾT THÚC SỬA LỖI 1 ---
                     st.metric(
                         label=f"Forecast for {forecast_dates[i].strftime('%b %d')}",
                         value=f"{predictions[i]:.1f}°C",
+                        delta=delta_text, # Sử dụng delta_text đã kiểm tra
                         delta_color="off" # Màu xám trung tính
                     )
+            # --- THÊM MỚI 2: BIỂU ĐỒ DỮ LIỆU TRAINING (THEO YÊU CẦU) ---
+            st.subheader("Training Set Overview")
+            with st.expander("Hiển thị biểu đồ toàn bộ dữ liệu training (trước 2024-02-18)"):
+                # Xác định phạm vi training data
+                train_end_date = pd.Timestamp(TEST_START_DATE) - pd.Timedelta(days=1)
+                train_df = all_data_df.loc[:train_end_date][CURRENT_TEMP_COL]
+                fig_train = go.Figure()
+                fig_train.add_trace(go.Scatter(
+                    x=train_df.index, y=train_df,
+                    mode='lines', name='Training Data (Actual)',
+                    line=dict(color='#005aa7', width=1) # Màu xanh
+                ))
+                fig_train.update_layout(
+                    title="Actual Temperature - Full Training Set",
+                    xaxis_title="Date", yaxis_title="Temperature (°C)",
+                    template="plotly_white"
+                )
+                st.plotly_chart(fig_train, use_container_width=True)
+            # --- KẾT THÚC THÊM MỚI 2 ---
             # 4. Biểu đồ (Optimal Suggestion)
             st.subheader("Historical Context & Forecast")
     if not perf_df.empty and not y_test.empty:
         st.subheader("Performance Degradation over 5 Days")
         st.markdown("Hiệu suất mô hình thay đổi như thế nào khi dự báo xa hơn.")
+        # Lọc chỉ model Champion
         MODEL_NAME = 'Champion (Stacking)'
         champion_perf_df = perf_df[perf_df['Model'] == MODEL_NAME].copy()
         # 1. Biểu đồ suy giảm hiệu suất (RMSE & R2)
         # --- TÙY CHỈNH ---
         # Đảm bảo 'RMSE' và 'R2' là tên cột chính xác trong file 'final_5_day_results_df.csv'
         RMSE_COL_NAME = 'RMSE (Absolute Error)'
+        R2_COL_NAME = 'R-squared'
         col1, col2 = st.columns(2)
         with col1:
             fig_rmse = diag.plot_performance_degradation(
+                champion_perf_df, # Dùng df đã lọc
                 metric_column=RMSE_COL_NAME,
                 metric_name='RMSE (Temperature °C)',
                 color='blue'
             st.plotly_chart(fig_rmse, use_container_width=True)
         with col2:
             fig_r2 = diag.plot_performance_degradation(
+                champion_perf_df, # Dùng df đã lọc
                 metric_column=R2_COL_NAME,
                 metric_name='R-squared (R²)',
                 color='green'