ML_Final_Hourly

Sleeping

App Files Files Community

Gumball2k5 commited on Nov 16, 2025

Commit

bc27979

verified ·

1 Parent(s): 38f115e

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -39

app.py CHANGED Viewed

@@ -124,15 +124,15 @@ def load_css():
         /* ===== 6. VĂN BẢN THÔNG THƯỜNG (PARAGRAPH & MARKDOWN) ===== */
         /* Quy tắc này áp dụng cho văn bản st.markdown và các đoạn văn bản khác */
         .stMarkdown, p, li {
-            color: #333333 !important; /* Xám đen, tương phản tốt trên nền sáng */
-            font-size: 1.05rem; /* Có thể thêm tùy chọn để chữ lớn hơn một chút */
         }
         /* SAFE DataFrame Styling */
         [data-testid="stDataFrame"] {
-            border: 1px solid #CCCCCC !important;
-            border-radius: 8px !important;
-            background-color: #FFFFFF !important;
         }
         /* ===== EXPANDERS (vẫn giữ như cũ) ===== */
@@ -257,8 +257,7 @@ def load_champion_models():
                  "Ensure the 5 .pkl files are in the 'models/' directory.")
         return []
-@st.cache_data
-def load_performance_data(file_path="data/final_5_day_results_df.csv"):
     """Loads pre-calculated performance data for Tab 3."""
     try:
         df = pd.read_csv(file_path)
@@ -310,7 +309,7 @@ else:
 # --- CRITICAL CUSTOMIZATION (Hourly Targets) ---
 HOURLY_TARGET_COLS = ['target_temp_next_24h', 'target_temp_next_48h', 'target_temp_next_72h',
-                      'target_temp_next_96h', 'target_temp_next_120h']
 # Load models và data mới
 hourly_data_df = load_hourly_data(file_path="data/final_hourly_feature_dataset.csv") # Dùng tên file features chính xác
@@ -343,7 +342,7 @@ def predict_next_24_hours(input_features: pd.DataFrame, models: dict) -> List[fl
         # Dùng np đã được import
         np.random.seed(42)
         return [last_temp + 1.5 * np.sin(2 * np.pi * (h + 10) / 24) + np.random.normal(0, 0.5)
-                for h in range(num_horizons)]
     # Chạy mô hình Direct Hourly
     for h in range(1, num_horizons + 1):
@@ -538,8 +537,8 @@ with tab2:
                         st.metric(label="Temp Yesterday (temp_lag_1)", value=f"{input_features['temp_lag_1'].iloc[0]:.1f}°C")
                         st.metric(label="7-Day Avg Temp (temp_roll_7d_mean)", value=f"{input_features['temp_roll_7d_mean'].iloc[0]:.1f}°C")
-                        # --- ĐÃ SỬA LỖI ---
-                        # Thay thế 'precip_roll_7d_sum' (không tồn tại) bằng 'precip_roll_7d_mean' (tồn tại)
                         st.metric(label="7-Day Total Rainfall (precip_roll_7d_sum)", value=f"{input_features['precip_roll_7d_sum'].iloc[0]:.1f} mm")
                         st.metric(label="14-Day Temp Volatility (temp_roll_14d_std)", value=f"{input_features['temp_roll_14d_std'].iloc[0]:.2f}°C")
@@ -758,7 +757,8 @@ with tab4:
             value=max_ts.date(), # Mặc định chọn ngày cuối cùng
             min_value=min_ts.date(),
             max_value=max_ts.date(),
-            format="YYYY-MM-DD"
         )
         # 2. Hour Selection (Chỉ show các giờ có sẵn trong ngày đã chọn)
@@ -792,7 +792,6 @@ with tab4:
         predictions_24h = predict_next_24_hours(input_features_hourly, hourly_models_24h)
         # --- TÍNH TOÁN METRIC T+24h ---
-        # T+24h là index 23 (nếu có đủ 24 giá trị)
         t_plus_24h_metric_value = predictions_24h[23] if len(predictions_24h) >= 24 else (predictions_24h[-1] if predictions_24h else float('nan'))
         # 2. Hiển thị Dự đoán T+24h (Tức là giờ đó ngày mai)
@@ -801,7 +800,6 @@ with tab4:
         forecast_start_ts = latest_time_for_day + pd.Timedelta(hours=1)
         # Tính các giá trị cho T+2h và T+3h
-        # T+2h là index 1; T+3h là index 2
         t_plus_2h_value = predictions_24h[1] if len(predictions_24h) >= 2 else float('nan')
         t_plus_3h_value = predictions_24h[2] if len(predictions_24h) >= 3 else float('nan')
@@ -820,7 +818,6 @@ with tab4:
         # --- 1. Metric T+2h ---
         with col_t2:
             st.metric(
-                # SỬA: Dùng forecast_t2_ts để hiển thị giờ thực tế (+2h)
                 label=f"Forecast @ {forecast_t2_ts.strftime('%H:%M')} (T+2H)",
                 value=f"{t_plus_2h_value:.1f}°C"
             )
@@ -828,7 +825,6 @@ with tab4:
         # --- 2. Metric T+3h ---
         with col_t3:
             st.metric(
-                # SỬA: Dùng forecast_t3_ts để hiển thị giờ thực tế (+3h)
                 label=f"Forecast @ {forecast_t3_ts.strftime('%H:%M')} (T+3H)",
                 value=f"{t_plus_3h_value:.1f}°C"
             )
@@ -836,7 +832,6 @@ with tab4:
         # --- 3. Metric T+24h (Giữ lại để đối chiếu) ---
         with col_t24:
             st.metric(
-                # SỬA: Dùng forecast_t24_ts để hiển thị giờ thực tế (+24h)
                 label=f"Forecast @ {forecast_t24_ts.strftime('%H:%M')} (T+24H)",
                 value=f"{t_plus_24h_metric_value:.1f}°C"
             )
@@ -851,35 +846,87 @@ with tab4:
                       delta="Peak Heat")
-        # 5. Graph: Nhiệt độ Từng Giờ
-        st.subheader("Hourly Temperature Breakdown (T+1h to T+24h)")
-        # ... (Biểu đồ giữ nguyên)
-        hourly_index = pd.date_range(start=forecast_start_ts, periods=len(predictions_24h), freq='H')
-        df_hourly_forecast = pd.DataFrame({
-            'Time': hourly_index,
-            'Temperature': predictions_24h
         }).set_index('Time')
-        fig_hourly = go.Figure()
-        fig_hourly.add_trace(go.Scatter(
-            x=df_hourly_forecast.index,
-            y=df_hourly_forecast['Temperature'],
-            mode='lines+markers',
-            name='Hourly Forecast',
-            line=dict(color='#ff6347', width=2),
-            marker=dict(size=6)
         ))
-        fig_hourly.update_layout(
-            title="Hourly Temperature Forecast (T+1h to T+24h)",
-            xaxis_title="Time",
-            yaxis_title="Temperature (°C)",
-            template="plotly_white",
-            legend=dict(x=0.01, y=0.99)
         )
-        st.plotly_chart(fig_hourly, use_container_width=True)
         # --- NEW GRAPH 1: RMSE Degradation Plot (Reliability) ---
         st.subheader("Model Reliability: Error Degradation")

         /* ===== 6. VĂN BẢN THÔNG THƯỜNG (PARAGRAPH & MARKDOWN) ===== */
         /* Quy tắc này áp dụng cho văn bản st.markdown và các đoạn văn bản khác */
         .stMarkdown, p, li {
+             color: #333333 !important; /* Xám đen, tương phản tốt trên nền sáng */
+             font-size: 1.05rem; /* Có thể thêm tùy chọn để chữ lớn hơn một chút */
         }
         /* SAFE DataFrame Styling */
         [data-testid="stDataFrame"] {
+             border: 1px solid #CCCCCC !important;
+             border-radius: 8px !important;
+             background-color: #FFFFFF !important;
         }
         /* ===== EXPANDERS (vẫn giữ như cũ) ===== */
                  "Ensure the 5 .pkl files are in the 'models/' directory.")
         return []
+@st.cache_datadef load_performance_data(file_path="data/final_5_day_results_df.csv"):
     """Loads pre-calculated performance data for Tab 3."""
     try:
         df = pd.read_csv(file_path)
 # --- CRITICAL CUSTOMIZATION (Hourly Targets) ---
 HOURLY_TARGET_COLS = ['target_temp_next_24h', 'target_temp_next_48h', 'target_temp_next_72h',
+                     'target_temp_next_96h', 'target_temp_next_120h']
 # Load models và data mới
 hourly_data_df = load_hourly_data(file_path="data/final_hourly_feature_dataset.csv") # Dùng tên file features chính xác
         # Dùng np đã được import
         np.random.seed(42)
         return [last_temp + 1.5 * np.sin(2 * np.pi * (h + 10) / 24) + np.random.normal(0, 0.5)
+                 for h in range(num_horizons)]
     # Chạy mô hình Direct Hourly
     for h in range(1, num_horizons + 1):
                         st.metric(label="Temp Yesterday (temp_lag_1)", value=f"{input_features['temp_lag_1'].iloc[0]:.1f}°C")
                         st.metric(label="7-Day Avg Temp (temp_roll_7d_mean)", value=f"{input_features['temp_roll_7d_mean'].iloc[0]:.1f}°C")
+                        # --- GIỮ NGUYÊN LỖI THEO YÊU CẦU ---
+                        # Code này sẽ gây lỗi KeyError nếu 'precip_roll_7d_sum' không tồn tại
                         st.metric(label="7-Day Total Rainfall (precip_roll_7d_sum)", value=f"{input_features['precip_roll_7d_sum'].iloc[0]:.1f} mm")
                         st.metric(label="14-Day Temp Volatility (temp_roll_14d_std)", value=f"{input_features['temp_roll_14d_std'].iloc[0]:.2f}°C")
             value=max_ts.date(), # Mặc định chọn ngày cuối cùng
             min_value=min_ts.date(),
             max_value=max_ts.date(),
+            format="YYYY-MM-DD",
+            key="hourly_date_input" # Thêm key duy nhất
         )
         # 2. Hour Selection (Chỉ show các giờ có sẵn trong ngày đã chọn)
         predictions_24h = predict_next_24_hours(input_features_hourly, hourly_models_24h)
         # --- TÍNH TOÁN METRIC T+24h ---
         t_plus_24h_metric_value = predictions_24h[23] if len(predictions_24h) >= 24 else (predictions_24h[-1] if predictions_24h else float('nan'))
         # 2. Hiển thị Dự đoán T+24h (Tức là giờ đó ngày mai)
         forecast_start_ts = latest_time_for_day + pd.Timedelta(hours=1)
         # Tính các giá trị cho T+2h và T+3h
         t_plus_2h_value = predictions_24h[1] if len(predictions_24h) >= 2 else float('nan')
         t_plus_3h_value = predictions_24h[2] if len(predictions_24h) >= 3 else float('nan')
         # --- 1. Metric T+2h ---
         with col_t2:
             st.metric(
                 label=f"Forecast @ {forecast_t2_ts.strftime('%H:%M')} (T+2H)",
                 value=f"{t_plus_2h_value:.1f}°C"
             )
         # --- 2. Metric T+3h ---
         with col_t3:
             st.metric(
                 label=f"Forecast @ {forecast_t3_ts.strftime('%H:%M')} (T+3H)",
                 value=f"{t_plus_3h_value:.1f}°C"
             )
         # --- 3. Metric T+24h (Giữ lại để đối chiếu) ---
         with col_t24:
             st.metric(
                 label=f"Forecast @ {forecast_t24_ts.strftime('%H:%M')} (T+24H)",
                 value=f"{t_plus_24h_metric_value:.1f}°C"
             )
                       delta="Peak Heat")
+        # --- BẮT ĐẦU THAY THẾ BIỂU ĐỒ TAB 4 ---
+        # 5.1 Graph: Bối cảnh Lịch sử & Dự báo
+        st.subheader("Historical Context & Forecast (Hourly)")
+        # Lấy 24 giờ lịch sử
+        history_start_ts = latest_time_for_day - pd.Timedelta(hours=23) # Lùi 23 giờ để có 24 điểm
+        history_end_ts = latest_time_for_day
+        # Lấy 'temp' (actual) từ dataframe GỐC theo giờ
+        history_df_hourly = hourly_data_df.loc[history_start_ts:history_end_ts]['temp']
+        # Tạo dataframe cho 24h dự báo
+        forecast_hourly_index = pd.date_range(start=forecast_start_ts, periods=len(predictions_24h), freq='H')
+        forecast_df_hourly = pd.DataFrame({
+            'Time': forecast_hourly_index,
+            'Forecast': predictions_24h
         }).set_index('Time')
+        # Vẽ biểu đồ
+        fig_hist_hourly = go.Figure()
+        fig_hist_hourly.add_trace(go.Scatter(
+            x=history_df_hourly.index, y=history_df_hourly,
+            mode='lines+markers', name='Past 24 Hours (Actual)',
+            line=dict(color='blue')
+        ))
+        fig_hist_hourly.add_trace(go.Scatter(
+            x=forecast_df_hourly.index, y=forecast_df_hourly['Forecast'],
+            mode='lines+markers', name='Next 24 Hours (Forecast)',
+            line=dict(color='red', dash='dot')
+        ))
+        fig_hist_hourly.update_layout(
+            title="Hourly Forecast vs. Historical Context",
+            xaxis_title="Time", yaxis_title="Temperature (°C)",
+            template="plotly_white", legend=dict(x=0.01, y=0.99)
+        )
+        st.plotly_chart(fig_hist_hourly, use_container_width=True)
+        # 5.2 Graph: So sánh Dự báo vs Thực tế
+        st.subheader("24-Hour Forecast vs. Actual Comparison")
+        # Lấy 'temp' (actual) cho 24 giờ TỚI
+        try:
+            future_actuals_df = hourly_data_df.loc[forecast_hourly_index]['temp']
+            actual_values_24h = future_actuals_df.values
+        except KeyError:
+            # Xảy ra nếu forecast_hourly_index vượt ra ngoài dữ liệu
+            actual_values_24h = [float('nan')] * len(predictions_24h)
+        # Kiểm tra xem có bất kỳ giá trị NaN nào không
+        is_partial_hourly_forecast = any(pd.isna(v) for v in actual_values_24h) or (len(actual_values_24h) < len(predictions_24h))
+        fig_comp_hourly = go.Figure()
+        # 1. Luôn thêm đường Dự báo
+        fig_comp_hourly.add_trace(go.Scatter(
+            x=forecast_hourly_index, y=predictions_24h,
+            mode='lines+markers', name='24-Hour Forecast',
+            line=dict(color='red', dash='dot')
         ))
+        # 2. Chỉ thêm đường Thực tế (màu xanh) nếu có đủ dữ liệu
+        if not is_partial_hourly_forecast:
+            fig_comp_hourly.add_trace(go.Scatter(
+                x=forecast_hourly_index, y=actual_values_24h,
+                mode='lines+markers', name='24-Hour Actual',
+                line=dict(color='blue')
+            ))
+            fig_comp_hourly.update_layout(title="24-Hour Forecast vs. Actual Values")
+        else:
+            # Nếu không, chỉ hiển thị dự báo
+            fig_comp_hourly.update_layout(title="24-Hour Forecast (Actual data not yet available)")
+        # Luôn hiển thị biểu đồ
+        fig_comp_hourly.update_layout(
+            xaxis_title="Time", yaxis_title="Temperature (°C)",
+            template="plotly_white", legend=dict(x=0.01, y=0.99)
         )
+        st.plotly_chart(fig_comp_hourly, use_container_width=True)
+        # --- KẾT THÚC THAY THẾ BIỂU ĐỒ TAB 4 ---
         # --- NEW GRAPH 1: RMSE Degradation Plot (Reliability) ---
         st.subheader("Model Reliability: Error Degradation")