Gumball2k5 commited on
Commit
4d5afb9
·
verified ·
1 Parent(s): 5a2d802

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -174
app.py CHANGED
@@ -221,38 +221,38 @@ else:
221
  st.error("Could not load main data, application cannot continue.")
222
  st.stop()
223
 
224
- # --- 5. SIDEBAR NAVIGATION ---
225
- st.sidebar.title("Navigation")
226
 
227
- # --- THAY ĐỔI: Thêm icons vào radio ---
228
- app_section = st.sidebar.radio(
229
- "Choose a section:",
230
- ("📑 Project Overview & Methodology", "🌦️ Live 5-Day Forecast", "📊 Model Performance & Diagnostics")
231
- )
232
- # --- KẾT THÚC THAY ĐỔI ---
233
-
234
- # Date input only shows on the "Live Forecast" tab
235
  selected_date = None
236
- if app_section == "🌦️ Live 5-Day Forecast": # Cập nhật tên cho khớp
237
- st.sidebar.header("Forecast Input")
 
 
 
238
 
239
- if not X_test.empty:
240
- min_date = X_test.index.min()
241
- max_date = X_test.index.max()
242
-
243
- selected_date = st.sidebar.date_input(
244
- "Select a date from the test set:",
245
- value=min_date,
246
- min_value=min_date,
247
- max_value=max_date,
248
- format="YYYY-MM-DD"
249
- )
250
- else:
251
- st.sidebar.error("Test data could not be loaded.")
 
 
 
 
 
252
 
253
- # --- 6. MAIN PANEL DISPLAY ---
254
- if app_section == "📑 Project Overview & Methodology": # Cập nhật tên cho khớp
255
- # --- CHECKLIST ITEM 3 ---
256
  st.title("Saigon Temperature Forecasting Application 🌦️")
257
 
258
  st.subheader("Project Summary")
@@ -275,153 +275,112 @@ if app_section == "📑 Project Overview & Methodology": # Cập nhật tên cho
275
  st.subheader("Final Model Leaderboard")
276
  st.markdown("Model leaderboard ranked by average RMSE score (lower is better).")
277
 
278
- # Call function from benchmark_utils.py
279
  leaderboard_df = benchmark_utils.load_leaderboard()
280
 
281
  if not leaderboard_df.empty:
 
282
  top_10_df = leaderboard_df.head(10).reset_index(drop=True)
 
 
283
  top_10_df.index = range(1, len(top_10_df) + 1)
 
 
284
  st.dataframe(top_10_df, use_container_width=True)
285
  else:
286
  st.warning("Could not load leaderboard data.")
287
 
288
  # --------------------------------------------------------------------
289
- elif app_section == "🌦️ Live 5-Day Forecast": # Cập nhật tên cho khớp
290
- # --- CHECKLIST ITEM 4 ---
 
 
291
  st.title("Live 5-Day Forecast")
292
 
 
293
  if selected_date and not X_test.empty and models:
294
  st.header(f"5-Day Forecast from: {selected_date.strftime('%Y-%m-%d')}")
295
 
296
- # 1. Get Input Features
297
  selected_date_ts = pd.Timestamp(selected_date)
298
 
299
- # Logic fix: input_features must be from X_test
300
  if selected_date_ts in X_test.index:
301
  input_features = X_test.loc[[selected_date_ts]]
302
  else:
303
  st.error("Data not found for the selected date in X_test.")
304
- input_features = pd.DataFrame() # Create empty dataframe to avoid errors later
305
 
306
  if input_features.empty:
307
  st.error("Data not found for the selected date.")
308
  else:
309
- # 2. Generate Predictions
310
  predictions = []
311
  for i in range(5):
312
- model = models[i] # Get T+i model
313
  pred = model.predict(input_features)[0]
314
  predictions.append(pred)
315
 
316
- # 3. Display Predictions (using st.metric)
317
  forecast_dates = pd.date_range(start=selected_date, periods=6, freq='D')[1:]
318
  cols = st.columns(5)
319
 
320
- # Get actual values for comparison
321
- # --- LOGIC FIX: Get 'actual_values' from all_data_df ---
322
- # We need to get the target columns (e.g., 'temp_next_1_day')
323
- # from the ORIGINAL DATAFRAME at the selected date.
324
-
325
  actual_values = []
326
  if selected_date_ts in all_data_df.index:
327
- # Get the row from the original dataframe
328
  actual_row = all_data_df.loc[selected_date_ts]
329
-
330
- # Get values from the target columns (temp_next_1_day, etc.)
331
  for col_name in TARGET_COLS:
332
  actual_values.append(actual_row[col_name])
333
  else:
334
- # Fallback case if date not found (rare)
335
- actual_values = [float('nan')] * 5 # Create 5 NaN values
336
 
337
- # --- APPLYING LOGIC (1) FROM REFERENCE CODE ---
338
- # Check if any 'Actual' values are missing
339
  is_partial_forecast = any(pd.isna(v) for v in actual_values)
340
- # ----------------------------------------------
341
 
342
  for i in range(5):
343
  with cols[i]:
344
-
345
- # --- FIX 1 (REFINED): Use pd.notna logic from reference code ---
346
  actual_val = actual_values[i]
347
  delta_text = f"Actual: {actual_val:.1f}°C" if pd.notna(actual_val) else "Actual: --"
348
- # --- END FIX 1 ---
349
-
350
  st.metric(
351
  label=f"Forecast for {forecast_dates[i].strftime('%b %d')}",
352
  value=f"{predictions[i]:.1f}°C",
353
- delta=delta_text, # Use the checked delta_text
354
- delta_color="off" # Neutral gray color
355
  )
356
 
357
- # --- NEW ADDITION 2: TRAINING DATA PLOT (PER REQUEST) ---
358
  st.subheader("Training Set Overview")
359
  with st.expander("Show plot of all training data (before 2024-02-18)"):
360
-
361
- # Define training data range
362
  train_end_date = pd.Timestamp(TEST_START_DATE) - pd.Timedelta(days=1)
363
  train_df = all_data_df.loc[:train_end_date][CURRENT_TEMP_COL]
364
 
365
- # --- START OF MODIFICATION (USER REQUEST) ---
366
-
367
- # Calculate Y-axis range to keep it fixed
368
- y_min = train_df.min()
369
- y_max = train_df.max()
370
- padding = (y_max - y_min) * 0.05 # Add 5% padding
371
-
372
  fig_train = go.Figure()
373
  fig_train.add_trace(go.Scatter(
374
  x=train_df.index, y=train_df,
375
  mode='lines', name='Training Data (Actual)',
376
- line=dict(color='#005aa7', width=1) # Blue
377
  ))
378
-
379
  fig_train.update_layout(
380
  title="Actual Temperature - Full Training Set",
381
- xaxis_title="Date",
382
- yaxis_title="Temperature (°C)",
383
- template="plotly_white",
384
- paper_bgcolor='rgba(0,0,0,0)', # Làm nền trong suốt
385
- plot_bgcolor='rgba(255,255,255,0.6)', # Nền trắng mờ
386
-
387
- # 1. Fix the Y-axis range (Requirement 2 & 3)
388
- yaxis=dict(
389
- range=[y_min - padding, y_max + padding]
390
- ),
391
-
392
- # 2. Add the X-axis range slider (Requirement 1)
393
- xaxis=dict(
394
- rangeslider=dict(
395
- visible=True
396
- ),
397
- type="date" # Ensure it's treated as a date axis
398
- )
399
  )
400
-
401
  st.plotly_chart(fig_train, use_container_width=True)
402
-
403
- # --- END OF MODIFICATION ---
404
-
405
- # --- END NEW ADDITION 2 ---
406
 
407
- # 4. Plot (Optimal Suggestion)
408
  st.subheader("Historical Context & Forecast")
409
 
410
- # Get last 14 days of history
411
  history_start = selected_date_ts - pd.Timedelta(days=14)
412
  history_end = selected_date_ts
413
-
414
- # Get 'temp' data from the original dataframe
415
  history_df = all_data_df.loc[history_start:history_end][CURRENT_TEMP_COL]
416
 
417
- # Create dataframe for forecast
418
  forecast_df = pd.DataFrame({
419
  'Date': forecast_dates,
420
  'Forecast': predictions
421
  }).set_index('Date')
422
 
423
  fig = go.Figure()
424
-
425
  fig.add_trace(go.Scatter(
426
  x=history_df.index, y=history_df,
427
  mode='lines+markers', name='Past 14 Days (Actual)',
@@ -432,135 +391,79 @@ elif app_section == "🌦️ Live 5-Day Forecast": # Cập nhật tên cho khớ
432
  mode='lines+markers', name='5-Day Forecast',
433
  line=dict(color='red', dash='dot')
434
  ))
435
-
436
  fig.update_layout(
437
  title="Forecast vs. Historical Context",
438
  xaxis_title="Date", yaxis_title="Temperature (°C)",
439
- template="plotly_white", legend=dict(x=0.01, y=0.99),
440
- paper_bgcolor='rgba(0,0,0,0)',
441
- plot_bgcolor='rgba(255,255,255,0.6)'
442
  )
443
  st.plotly_chart(fig, use_container_width=True)
444
 
445
- # --- APPLYING LOGIC (2) FROM REFERENCE CODE ---
446
  st.subheader("5-Day Forecast vs. Actual Comparison")
447
  if is_partial_forecast:
448
  st.info("Cannot draw the Actual vs. Forecast comparison chart because "
449
  "the selected date is too close to the end of the test set (missing 'actual' data).")
450
  else:
451
  fig_comp = go.Figure()
452
-
453
- # Add Forecast trace
454
  fig_comp.add_trace(go.Scatter(
455
  x=forecast_dates, y=predictions,
456
  mode='lines+markers', name='5-Day Forecast',
457
  line=dict(color='red', dash='dot')
458
  ))
459
-
460
- # Add Actual trace
461
  fig_comp.add_trace(go.Scatter(
462
  x=forecast_dates, y=actual_values,
463
  mode='lines+markers', name='5-Day Actual',
464
  line=dict(color='blue')
465
  ))
466
-
467
  fig_comp.update_layout(
468
  title="5-Day Forecast vs. Actual Values",
469
  xaxis_title="Date", yaxis_title="Temperature (°C)",
470
- template="plotly_white", legend=dict(x=0.01, y=0.99),
471
- paper_bgcolor='rgba(0,0,0,0)',
472
- plot_bgcolor='rgba(255,255,255,0.6)'
473
  )
474
  st.plotly_chart(fig_comp, use_container_width=True)
475
- # --- END APPLYING LOGIC (2) ---
476
 
477
  else:
478
  st.warning("Please wait... Loading data or models.")
479
 
480
  # --------------------------------------------------------------------
481
- elif app_section == "📊 Model Performance & Diagnostics": # Cập nhật tên cho khớp
482
- # --- CHECKLIST ITEM 5 ---
 
 
483
  st.title("Model Performance & Diagnostics")
484
 
485
  if not perf_df.empty and not y_test.empty:
486
  st.subheader("Performance Degradation over 5 Days")
487
  st.markdown("How model performance changes as the forecast horizon increases.")
488
 
489
- # Filter for Champion model only
490
  MODEL_NAME = 'Champion (Stacking)'
491
  champion_perf_df = perf_df[perf_df['Model'] == MODEL_NAME].copy()
492
 
493
- # 1. Performance Degradation Plots (RMSE & R2)
494
-
495
- # --- CUSTOMIZATION ---
496
- # Ensure 'RMSE' and 'R2' column names are correct for 'final_5_day_results_df.csv'
497
  RMSE_COL_NAME = 'RMSE (Absolute Error)'
498
- R2_COL_NAME = 'R-squared'
499
 
500
- # --- BẮT ĐẦU SỬA LỖI CĂN LỀ VÀ CẮT CHỮ ---
501
- common_plot_layout_updates = dict(
502
- font=dict(color="#333333"), # Màu chữ chung cho biểu đồ
503
- title_font_color="#004080", # Màu chữ cho tiêu đề biểu đồ
504
- xaxis=dict(
505
- title_font_color="#005aa7",
506
- tickfont=dict(color="#333333") # Màu chữ cho nhãn trục
507
- ),
508
- yaxis=dict(
509
- title_font_color="#005aa7",
510
- tickfont=dict(color="#333333")
511
- ),
512
- legend=dict(font=dict(color="#333333")),
513
- paper_bgcolor='rgba(255,255,255,0.9)', # Nền giấy sáng hơn, hơi trong suốt
514
- plot_bgcolor='rgba(255,255,255,0.7)', # Nền plot sáng hơn, hơi trong suốt
515
-
516
- # THAY ĐỔI 1: Căn lề trái tất cả tiêu đề
517
- title_x=0, # Căn lề trái (0 = trái, 0.5 = giữa, 1 = phải)
518
- title_xanchor='left', # Đặt anchor lề trái
519
-
520
- # THAY ĐỔI 2: Thêm lề để tiêu đề không bị dính
521
- margin=dict(l=20, t=80) # l=left (20px), t=top (80px để có chỗ cho 2 dòng)
522
- )
523
- # --- KẾT THÚC SỬA LỖI CĂN LỀ V�� CẮT CHỮ ---
524
-
525
  col1, col2 = st.columns(2)
526
  with col1:
527
  fig_rmse = diag.plot_performance_degradation(
528
- champion_perf_df, # Use filtered df
529
  metric_column=RMSE_COL_NAME,
530
  metric_name='RMSE (Temperature °C)',
531
  color='blue'
532
  )
533
-
534
- # THAY ĐỔI 3: Ghi đè tiêu đề để ngắt dòng (sửa lỗi bị cắt)
535
- fig_rmse.update_layout(
536
- title="<b>RMSE (Temperature °C) <br> vs. Forecast Horizon</b>"
537
- )
538
-
539
- # Áp dụng các thay đổi màu chữ VÀ căn lề
540
- fig_rmse.update_layout(**common_plot_layout_updates)
541
  st.plotly_chart(fig_rmse, use_container_width=True)
542
  with col2:
543
  fig_r2 = diag.plot_performance_degradation(
544
- champion_perf_df, # Use filtered df
545
  metric_column=R2_COL_NAME,
546
  metric_name='R-squared (R²)',
547
  color='green'
548
  )
549
-
550
- # THAY ĐỔI 3: Ghi đè tiêu đề để ngắt dòng
551
- fig_r2.update_layout(
552
- title="<b>R-squared (R²) <br> vs. Forecast Horizon</b>"
553
- )
554
-
555
- # Áp dụng các thay đổi màu chữ VÀ căn lề
556
- fig_r2.update_layout(**common_plot_layout_updates)
557
  st.plotly_chart(fig_r2, use_container_width=True)
558
 
559
- # 2. Forecast vs. Actual Plots
560
  st.subheader("Forecast vs. Actual Comparison (on entire test set)")
561
 
562
- # This function runs predictions on the *entire* X_test (thousands of rows)
563
- # It will be slow without caching
564
  @st.cache_data
565
  def get_full_test_predictions(_models, _X_test):
566
  """Run predictions on the entire test set and cache the results."""
@@ -581,8 +484,6 @@ elif app_section == "📊 Model Performance & Diagnostics": # Cập nhật tên
581
  y_pred=y_pred_test['Day 1'],
582
  day_ahead_title="Day 1 Forecast"
583
  )
584
- # Áp dụng các thay đổi màu chữ VÀ căn lề
585
- fig_d1.update_layout(**common_plot_layout_updates)
586
  st.plotly_chart(fig_d1, use_container_width=True)
587
  with col2:
588
  fig_d5 = diag.plot_forecast_vs_actual(
@@ -590,11 +491,9 @@ elif app_section == "📊 Model Performance & Diagnostics": # Cập nhật tên
590
  y_pred=y_pred_test['Day 5'],
591
  day_ahead_title="Day 5 Forecast"
592
  )
593
- # Áp dụng các thay đổi màu chữ VÀ căn lề
594
- fig_d5.update_layout(**common_plot_layout_updates)
595
  st.plotly_chart(fig_d5, use_container_width=True)
596
 
597
- # 3. Optional: Deep Dive Expander
598
  with st.expander("Champion Model Diagnostics (Deep Dive)"):
599
  st.markdown("Detailed analysis of residuals (error = actual - predicted) for the Day 1 forecast.")
600
 
@@ -605,15 +504,11 @@ elif app_section == "📊 Model Performance & Diagnostics": # Cập nhật tên
605
  fig_res_time = diag.plot_residuals_vs_time(
606
  y_true_d1, y_pred_d1, dates_d1, "Day 1"
607
  )
608
- # Áp dụng các thay đổi màu chữ VÀ căn lề
609
- fig_res_time.update_layout(**common_plot_layout_updates)
610
  st.plotly_chart(fig_res_time, use_container_width=True)
611
 
612
  fig_res_dist = diag.plot_residuals_distribution(
613
  y_true_d1, y_pred_d1, "Day 1"
614
  )
615
- # Áp dụng các thay đổi màu chữ VÀ căn lề
616
- fig_res_dist.update_layout(**common_plot_layout_updates)
617
  st.plotly_chart(fig_res_dist, use_container_width=True)
618
  st.markdown("A good model will have residuals (errors) normally distributed (bell curve) "
619
  "around 0 and show no pattern over time.")
 
221
  st.error("Could not load main data, application cannot continue.")
222
  st.stop()
223
 
224
+ # --- 5. GIAO DIỆN SIDEBAR (THANH ĐIỀU HƯỚNG) ---
 
225
 
226
+ st.sidebar.title("Forecast Input") # Đổi tiêu đề sidebar
 
 
 
 
 
 
 
227
  selected_date = None
228
+
229
+ # Hiển thị date input cố định trên sidebar
230
+ if not X_test.empty:
231
+ min_date = X_test.index.min()
232
+ max_date = X_test.index.max()
233
 
234
+ selected_date = st.sidebar.date_input(
235
+ "Select a date from the test set:",
236
+ value=min_date,
237
+ min_value=min_date,
238
+ max_value=max_date,
239
+ format="YYYY-MM-DD"
240
+ )
241
+ else:
242
+ st.sidebar.error("Test data could not be loaded.")
243
+
244
+ # --- 6. GIAO DIỆN CHÍNH (MAIN PANEL) ---
245
+
246
+ # Tạo các tab ngang thay vì radio button
247
+ tab1, tab2, tab3 = st.tabs([
248
+ "📑 Project Overview & Methodology",
249
+ "🌦️ Live 5-Day Forecast",
250
+ "📊 Model Performance & Diagnostics"
251
+ ])
252
 
253
+ # --- TAB 1: Project Overview ---
254
+ with tab1:
255
+ # --- MỤC 3 TRONG CHECKLIST ---
256
  st.title("Saigon Temperature Forecasting Application 🌦️")
257
 
258
  st.subheader("Project Summary")
 
275
  st.subheader("Final Model Leaderboard")
276
  st.markdown("Model leaderboard ranked by average RMSE score (lower is better).")
277
 
278
+ # Gọi hàm từ benchmark_utils.py
279
  leaderboard_df = benchmark_utils.load_leaderboard()
280
 
281
  if not leaderboard_df.empty:
282
+ # Lấy 10 mô hình hàng đầu và reset index (bỏ index cũ)
283
  top_10_df = leaderboard_df.head(10).reset_index(drop=True)
284
+
285
+ # Đặt index mới bắt đầu từ 1
286
  top_10_df.index = range(1, len(top_10_df) + 1)
287
+
288
+ # Hiển thị DataFrame đã sửa
289
  st.dataframe(top_10_df, use_container_width=True)
290
  else:
291
  st.warning("Could not load leaderboard data.")
292
 
293
  # --------------------------------------------------------------------
294
+
295
+ # --- TAB 2: Live Forecast ---
296
+ with tab2:
297
+ # --- MỤC 4 TRONG CHECKLIST ---
298
  st.title("Live 5-Day Forecast")
299
 
300
+ # Biến 'selected_date' được lấy từ sidebar
301
  if selected_date and not X_test.empty and models:
302
  st.header(f"5-Day Forecast from: {selected_date.strftime('%Y-%m-%d')}")
303
 
304
+ # 1. Lấy Input Features
305
  selected_date_ts = pd.Timestamp(selected_date)
306
 
307
+ # Sửa lỗi logic: input_features phải được lấy từ X_test
308
  if selected_date_ts in X_test.index:
309
  input_features = X_test.loc[[selected_date_ts]]
310
  else:
311
  st.error("Data not found for the selected date in X_test.")
312
+ input_features = pd.DataFrame() # Tạo dataframe rỗng để tránh lỗi sau
313
 
314
  if input_features.empty:
315
  st.error("Data not found for the selected date.")
316
  else:
317
+ # 2. Tạo dự đoán
318
  predictions = []
319
  for i in range(5):
320
+ model = models[i] # Lấy mô hình T+i
321
  pred = model.predict(input_features)[0]
322
  predictions.append(pred)
323
 
324
+ # 3. Hiển thị dự đoán (dùng st.metric)
325
  forecast_dates = pd.date_range(start=selected_date, periods=6, freq='D')[1:]
326
  cols = st.columns(5)
327
 
328
+ # Lấy giá trị thực tế để so sánh
329
+ # --- SỬA LỖI LOGIC: Lấy 'actual_values' từ all_data_df ---
 
 
 
330
  actual_values = []
331
  if selected_date_ts in all_data_df.index:
 
332
  actual_row = all_data_df.loc[selected_date_ts]
 
 
333
  for col_name in TARGET_COLS:
334
  actual_values.append(actual_row[col_name])
335
  else:
336
+ actual_values = [float('nan')] * 5
 
337
 
 
 
338
  is_partial_forecast = any(pd.isna(v) for v in actual_values)
 
339
 
340
  for i in range(5):
341
  with cols[i]:
 
 
342
  actual_val = actual_values[i]
343
  delta_text = f"Actual: {actual_val:.1f}°C" if pd.notna(actual_val) else "Actual: --"
344
+
 
345
  st.metric(
346
  label=f"Forecast for {forecast_dates[i].strftime('%b %d')}",
347
  value=f"{predictions[i]:.1f}°C",
348
+ delta=delta_text,
349
+ delta_color="off"
350
  )
351
 
352
+ # --- BIỂU ĐỒ DỮ LIỆU TRAINING ---
353
  st.subheader("Training Set Overview")
354
  with st.expander("Show plot of all training data (before 2024-02-18)"):
 
 
355
  train_end_date = pd.Timestamp(TEST_START_DATE) - pd.Timedelta(days=1)
356
  train_df = all_data_df.loc[:train_end_date][CURRENT_TEMP_COL]
357
 
 
 
 
 
 
 
 
358
  fig_train = go.Figure()
359
  fig_train.add_trace(go.Scatter(
360
  x=train_df.index, y=train_df,
361
  mode='lines', name='Training Data (Actual)',
362
+ line=dict(color='#005aa7', width=1)
363
  ))
 
364
  fig_train.update_layout(
365
  title="Actual Temperature - Full Training Set",
366
+ xaxis_title="Date", yaxis_title="Temperature (°C)",
367
+ template="plotly_white"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
  )
 
369
  st.plotly_chart(fig_train, use_container_width=True)
 
 
 
 
370
 
371
+ # 4. Biểu đồ Context
372
  st.subheader("Historical Context & Forecast")
373
 
 
374
  history_start = selected_date_ts - pd.Timedelta(days=14)
375
  history_end = selected_date_ts
 
 
376
  history_df = all_data_df.loc[history_start:history_end][CURRENT_TEMP_COL]
377
 
 
378
  forecast_df = pd.DataFrame({
379
  'Date': forecast_dates,
380
  'Forecast': predictions
381
  }).set_index('Date')
382
 
383
  fig = go.Figure()
 
384
  fig.add_trace(go.Scatter(
385
  x=history_df.index, y=history_df,
386
  mode='lines+markers', name='Past 14 Days (Actual)',
 
391
  mode='lines+markers', name='5-Day Forecast',
392
  line=dict(color='red', dash='dot')
393
  ))
 
394
  fig.update_layout(
395
  title="Forecast vs. Historical Context",
396
  xaxis_title="Date", yaxis_title="Temperature (°C)",
397
+ template="plotly_white", legend=dict(x=0.01, y=0.99)
 
 
398
  )
399
  st.plotly_chart(fig, use_container_width=True)
400
 
401
+ # --- Biểu đồ so sánh Actual vs Forecast ---
402
  st.subheader("5-Day Forecast vs. Actual Comparison")
403
  if is_partial_forecast:
404
  st.info("Cannot draw the Actual vs. Forecast comparison chart because "
405
  "the selected date is too close to the end of the test set (missing 'actual' data).")
406
  else:
407
  fig_comp = go.Figure()
 
 
408
  fig_comp.add_trace(go.Scatter(
409
  x=forecast_dates, y=predictions,
410
  mode='lines+markers', name='5-Day Forecast',
411
  line=dict(color='red', dash='dot')
412
  ))
 
 
413
  fig_comp.add_trace(go.Scatter(
414
  x=forecast_dates, y=actual_values,
415
  mode='lines+markers', name='5-Day Actual',
416
  line=dict(color='blue')
417
  ))
 
418
  fig_comp.update_layout(
419
  title="5-Day Forecast vs. Actual Values",
420
  xaxis_title="Date", yaxis_title="Temperature (°C)",
421
+ template="plotly_white", legend=dict(x=0.01, y=0.99)
 
 
422
  )
423
  st.plotly_chart(fig_comp, use_container_width=True)
 
424
 
425
  else:
426
  st.warning("Please wait... Loading data or models.")
427
 
428
  # --------------------------------------------------------------------
429
+
430
+ # --- TAB 3: Model Performance ---
431
+ with tab3:
432
+ # --- MỤC 5 TRONG CHECKLIST ---
433
  st.title("Model Performance & Diagnostics")
434
 
435
  if not perf_df.empty and not y_test.empty:
436
  st.subheader("Performance Degradation over 5 Days")
437
  st.markdown("How model performance changes as the forecast horizon increases.")
438
 
 
439
  MODEL_NAME = 'Champion (Stacking)'
440
  champion_perf_df = perf_df[perf_df['Model'] == MODEL_NAME].copy()
441
 
442
+ # 1. Biểu đồ suy giảm hiệu suất (RMSE & R2)
 
 
 
443
  RMSE_COL_NAME = 'RMSE (Absolute Error)'
444
+ R2_COL_NAME = 'R-squared'
445
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
446
  col1, col2 = st.columns(2)
447
  with col1:
448
  fig_rmse = diag.plot_performance_degradation(
449
+ champion_perf_df,
450
  metric_column=RMSE_COL_NAME,
451
  metric_name='RMSE (Temperature °C)',
452
  color='blue'
453
  )
 
 
 
 
 
 
 
 
454
  st.plotly_chart(fig_rmse, use_container_width=True)
455
  with col2:
456
  fig_r2 = diag.plot_performance_degradation(
457
+ champion_perf_df,
458
  metric_column=R2_COL_NAME,
459
  metric_name='R-squared (R²)',
460
  color='green'
461
  )
 
 
 
 
 
 
 
 
462
  st.plotly_chart(fig_r2, use_container_width=True)
463
 
464
+ # 2. Biểu đồ Dự báo vs. Thực tế
465
  st.subheader("Forecast vs. Actual Comparison (on entire test set)")
466
 
 
 
467
  @st.cache_data
468
  def get_full_test_predictions(_models, _X_test):
469
  """Run predictions on the entire test set and cache the results."""
 
484
  y_pred=y_pred_test['Day 1'],
485
  day_ahead_title="Day 1 Forecast"
486
  )
 
 
487
  st.plotly_chart(fig_d1, use_container_width=True)
488
  with col2:
489
  fig_d5 = diag.plot_forecast_vs_actual(
 
491
  y_pred=y_pred_test['Day 5'],
492
  day_ahead_title="Day 5 Forecast"
493
  )
 
 
494
  st.plotly_chart(fig_d5, use_container_width=True)
495
 
496
+ # 3. Mục Tùy chọn: Deep Dive Expander
497
  with st.expander("Champion Model Diagnostics (Deep Dive)"):
498
  st.markdown("Detailed analysis of residuals (error = actual - predicted) for the Day 1 forecast.")
499
 
 
504
  fig_res_time = diag.plot_residuals_vs_time(
505
  y_true_d1, y_pred_d1, dates_d1, "Day 1"
506
  )
 
 
507
  st.plotly_chart(fig_res_time, use_container_width=True)
508
 
509
  fig_res_dist = diag.plot_residuals_distribution(
510
  y_true_d1, y_pred_d1, "Day 1"
511
  )
 
 
512
  st.plotly_chart(fig_res_dist, use_container_width=True)
513
  st.markdown("A good model will have residuals (errors) normally distributed (bell curve) "
514
  "around 0 and show no pattern over time.")