Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -221,38 +221,38 @@ else:
|
|
| 221 |
st.error("Could not load main data, application cannot continue.")
|
| 222 |
st.stop()
|
| 223 |
|
| 224 |
-
# --- 5. SIDEBAR
|
| 225 |
-
st.sidebar.title("Navigation")
|
| 226 |
|
| 227 |
-
|
| 228 |
-
app_section = st.sidebar.radio(
|
| 229 |
-
"Choose a section:",
|
| 230 |
-
("📑 Project Overview & Methodology", "🌦️ Live 5-Day Forecast", "📊 Model Performance & Diagnostics")
|
| 231 |
-
)
|
| 232 |
-
# --- KẾT THÚC THAY ĐỔI ---
|
| 233 |
-
|
| 234 |
-
# Date input only shows on the "Live Forecast" tab
|
| 235 |
selected_date = None
|
| 236 |
-
|
| 237 |
-
|
|
|
|
|
|
|
|
|
|
| 238 |
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
|
| 253 |
-
# ---
|
| 254 |
-
|
| 255 |
-
# ---
|
| 256 |
st.title("Saigon Temperature Forecasting Application 🌦️")
|
| 257 |
|
| 258 |
st.subheader("Project Summary")
|
|
@@ -275,153 +275,112 @@ if app_section == "📑 Project Overview & Methodology": # Cập nhật tên cho
|
|
| 275 |
st.subheader("Final Model Leaderboard")
|
| 276 |
st.markdown("Model leaderboard ranked by average RMSE score (lower is better).")
|
| 277 |
|
| 278 |
-
#
|
| 279 |
leaderboard_df = benchmark_utils.load_leaderboard()
|
| 280 |
|
| 281 |
if not leaderboard_df.empty:
|
|
|
|
| 282 |
top_10_df = leaderboard_df.head(10).reset_index(drop=True)
|
|
|
|
|
|
|
| 283 |
top_10_df.index = range(1, len(top_10_df) + 1)
|
|
|
|
|
|
|
| 284 |
st.dataframe(top_10_df, use_container_width=True)
|
| 285 |
else:
|
| 286 |
st.warning("Could not load leaderboard data.")
|
| 287 |
|
| 288 |
# --------------------------------------------------------------------
|
| 289 |
-
|
| 290 |
-
|
|
|
|
|
|
|
| 291 |
st.title("Live 5-Day Forecast")
|
| 292 |
|
|
|
|
| 293 |
if selected_date and not X_test.empty and models:
|
| 294 |
st.header(f"5-Day Forecast from: {selected_date.strftime('%Y-%m-%d')}")
|
| 295 |
|
| 296 |
-
# 1.
|
| 297 |
selected_date_ts = pd.Timestamp(selected_date)
|
| 298 |
|
| 299 |
-
#
|
| 300 |
if selected_date_ts in X_test.index:
|
| 301 |
input_features = X_test.loc[[selected_date_ts]]
|
| 302 |
else:
|
| 303 |
st.error("Data not found for the selected date in X_test.")
|
| 304 |
-
input_features = pd.DataFrame() #
|
| 305 |
|
| 306 |
if input_features.empty:
|
| 307 |
st.error("Data not found for the selected date.")
|
| 308 |
else:
|
| 309 |
-
# 2.
|
| 310 |
predictions = []
|
| 311 |
for i in range(5):
|
| 312 |
-
model = models[i] #
|
| 313 |
pred = model.predict(input_features)[0]
|
| 314 |
predictions.append(pred)
|
| 315 |
|
| 316 |
-
# 3.
|
| 317 |
forecast_dates = pd.date_range(start=selected_date, periods=6, freq='D')[1:]
|
| 318 |
cols = st.columns(5)
|
| 319 |
|
| 320 |
-
#
|
| 321 |
-
# ---
|
| 322 |
-
# We need to get the target columns (e.g., 'temp_next_1_day')
|
| 323 |
-
# from the ORIGINAL DATAFRAME at the selected date.
|
| 324 |
-
|
| 325 |
actual_values = []
|
| 326 |
if selected_date_ts in all_data_df.index:
|
| 327 |
-
# Get the row from the original dataframe
|
| 328 |
actual_row = all_data_df.loc[selected_date_ts]
|
| 329 |
-
|
| 330 |
-
# Get values from the target columns (temp_next_1_day, etc.)
|
| 331 |
for col_name in TARGET_COLS:
|
| 332 |
actual_values.append(actual_row[col_name])
|
| 333 |
else:
|
| 334 |
-
|
| 335 |
-
actual_values = [float('nan')] * 5 # Create 5 NaN values
|
| 336 |
|
| 337 |
-
# --- APPLYING LOGIC (1) FROM REFERENCE CODE ---
|
| 338 |
-
# Check if any 'Actual' values are missing
|
| 339 |
is_partial_forecast = any(pd.isna(v) for v in actual_values)
|
| 340 |
-
# ----------------------------------------------
|
| 341 |
|
| 342 |
for i in range(5):
|
| 343 |
with cols[i]:
|
| 344 |
-
|
| 345 |
-
# --- FIX 1 (REFINED): Use pd.notna logic from reference code ---
|
| 346 |
actual_val = actual_values[i]
|
| 347 |
delta_text = f"Actual: {actual_val:.1f}°C" if pd.notna(actual_val) else "Actual: --"
|
| 348 |
-
|
| 349 |
-
|
| 350 |
st.metric(
|
| 351 |
label=f"Forecast for {forecast_dates[i].strftime('%b %d')}",
|
| 352 |
value=f"{predictions[i]:.1f}°C",
|
| 353 |
-
delta=delta_text,
|
| 354 |
-
delta_color="off"
|
| 355 |
)
|
| 356 |
|
| 357 |
-
# ---
|
| 358 |
st.subheader("Training Set Overview")
|
| 359 |
with st.expander("Show plot of all training data (before 2024-02-18)"):
|
| 360 |
-
|
| 361 |
-
# Define training data range
|
| 362 |
train_end_date = pd.Timestamp(TEST_START_DATE) - pd.Timedelta(days=1)
|
| 363 |
train_df = all_data_df.loc[:train_end_date][CURRENT_TEMP_COL]
|
| 364 |
|
| 365 |
-
# --- START OF MODIFICATION (USER REQUEST) ---
|
| 366 |
-
|
| 367 |
-
# Calculate Y-axis range to keep it fixed
|
| 368 |
-
y_min = train_df.min()
|
| 369 |
-
y_max = train_df.max()
|
| 370 |
-
padding = (y_max - y_min) * 0.05 # Add 5% padding
|
| 371 |
-
|
| 372 |
fig_train = go.Figure()
|
| 373 |
fig_train.add_trace(go.Scatter(
|
| 374 |
x=train_df.index, y=train_df,
|
| 375 |
mode='lines', name='Training Data (Actual)',
|
| 376 |
-
line=dict(color='#005aa7', width=1)
|
| 377 |
))
|
| 378 |
-
|
| 379 |
fig_train.update_layout(
|
| 380 |
title="Actual Temperature - Full Training Set",
|
| 381 |
-
xaxis_title="Date",
|
| 382 |
-
|
| 383 |
-
template="plotly_white",
|
| 384 |
-
paper_bgcolor='rgba(0,0,0,0)', # Làm nền trong suốt
|
| 385 |
-
plot_bgcolor='rgba(255,255,255,0.6)', # Nền trắng mờ
|
| 386 |
-
|
| 387 |
-
# 1. Fix the Y-axis range (Requirement 2 & 3)
|
| 388 |
-
yaxis=dict(
|
| 389 |
-
range=[y_min - padding, y_max + padding]
|
| 390 |
-
),
|
| 391 |
-
|
| 392 |
-
# 2. Add the X-axis range slider (Requirement 1)
|
| 393 |
-
xaxis=dict(
|
| 394 |
-
rangeslider=dict(
|
| 395 |
-
visible=True
|
| 396 |
-
),
|
| 397 |
-
type="date" # Ensure it's treated as a date axis
|
| 398 |
-
)
|
| 399 |
)
|
| 400 |
-
|
| 401 |
st.plotly_chart(fig_train, use_container_width=True)
|
| 402 |
-
|
| 403 |
-
# --- END OF MODIFICATION ---
|
| 404 |
-
|
| 405 |
-
# --- END NEW ADDITION 2 ---
|
| 406 |
|
| 407 |
-
# 4.
|
| 408 |
st.subheader("Historical Context & Forecast")
|
| 409 |
|
| 410 |
-
# Get last 14 days of history
|
| 411 |
history_start = selected_date_ts - pd.Timedelta(days=14)
|
| 412 |
history_end = selected_date_ts
|
| 413 |
-
|
| 414 |
-
# Get 'temp' data from the original dataframe
|
| 415 |
history_df = all_data_df.loc[history_start:history_end][CURRENT_TEMP_COL]
|
| 416 |
|
| 417 |
-
# Create dataframe for forecast
|
| 418 |
forecast_df = pd.DataFrame({
|
| 419 |
'Date': forecast_dates,
|
| 420 |
'Forecast': predictions
|
| 421 |
}).set_index('Date')
|
| 422 |
|
| 423 |
fig = go.Figure()
|
| 424 |
-
|
| 425 |
fig.add_trace(go.Scatter(
|
| 426 |
x=history_df.index, y=history_df,
|
| 427 |
mode='lines+markers', name='Past 14 Days (Actual)',
|
|
@@ -432,135 +391,79 @@ elif app_section == "🌦️ Live 5-Day Forecast": # Cập nhật tên cho khớ
|
|
| 432 |
mode='lines+markers', name='5-Day Forecast',
|
| 433 |
line=dict(color='red', dash='dot')
|
| 434 |
))
|
| 435 |
-
|
| 436 |
fig.update_layout(
|
| 437 |
title="Forecast vs. Historical Context",
|
| 438 |
xaxis_title="Date", yaxis_title="Temperature (°C)",
|
| 439 |
-
template="plotly_white", legend=dict(x=0.01, y=0.99)
|
| 440 |
-
paper_bgcolor='rgba(0,0,0,0)',
|
| 441 |
-
plot_bgcolor='rgba(255,255,255,0.6)'
|
| 442 |
)
|
| 443 |
st.plotly_chart(fig, use_container_width=True)
|
| 444 |
|
| 445 |
-
# ---
|
| 446 |
st.subheader("5-Day Forecast vs. Actual Comparison")
|
| 447 |
if is_partial_forecast:
|
| 448 |
st.info("Cannot draw the Actual vs. Forecast comparison chart because "
|
| 449 |
"the selected date is too close to the end of the test set (missing 'actual' data).")
|
| 450 |
else:
|
| 451 |
fig_comp = go.Figure()
|
| 452 |
-
|
| 453 |
-
# Add Forecast trace
|
| 454 |
fig_comp.add_trace(go.Scatter(
|
| 455 |
x=forecast_dates, y=predictions,
|
| 456 |
mode='lines+markers', name='5-Day Forecast',
|
| 457 |
line=dict(color='red', dash='dot')
|
| 458 |
))
|
| 459 |
-
|
| 460 |
-
# Add Actual trace
|
| 461 |
fig_comp.add_trace(go.Scatter(
|
| 462 |
x=forecast_dates, y=actual_values,
|
| 463 |
mode='lines+markers', name='5-Day Actual',
|
| 464 |
line=dict(color='blue')
|
| 465 |
))
|
| 466 |
-
|
| 467 |
fig_comp.update_layout(
|
| 468 |
title="5-Day Forecast vs. Actual Values",
|
| 469 |
xaxis_title="Date", yaxis_title="Temperature (°C)",
|
| 470 |
-
template="plotly_white", legend=dict(x=0.01, y=0.99)
|
| 471 |
-
paper_bgcolor='rgba(0,0,0,0)',
|
| 472 |
-
plot_bgcolor='rgba(255,255,255,0.6)'
|
| 473 |
)
|
| 474 |
st.plotly_chart(fig_comp, use_container_width=True)
|
| 475 |
-
# --- END APPLYING LOGIC (2) ---
|
| 476 |
|
| 477 |
else:
|
| 478 |
st.warning("Please wait... Loading data or models.")
|
| 479 |
|
| 480 |
# --------------------------------------------------------------------
|
| 481 |
-
|
| 482 |
-
|
|
|
|
|
|
|
| 483 |
st.title("Model Performance & Diagnostics")
|
| 484 |
|
| 485 |
if not perf_df.empty and not y_test.empty:
|
| 486 |
st.subheader("Performance Degradation over 5 Days")
|
| 487 |
st.markdown("How model performance changes as the forecast horizon increases.")
|
| 488 |
|
| 489 |
-
# Filter for Champion model only
|
| 490 |
MODEL_NAME = 'Champion (Stacking)'
|
| 491 |
champion_perf_df = perf_df[perf_df['Model'] == MODEL_NAME].copy()
|
| 492 |
|
| 493 |
-
# 1.
|
| 494 |
-
|
| 495 |
-
# --- CUSTOMIZATION ---
|
| 496 |
-
# Ensure 'RMSE' and 'R2' column names are correct for 'final_5_day_results_df.csv'
|
| 497 |
RMSE_COL_NAME = 'RMSE (Absolute Error)'
|
| 498 |
-
R2_COL_NAME = 'R-squared'
|
| 499 |
|
| 500 |
-
# --- BẮT ĐẦU SỬA LỖI CĂN LỀ VÀ CẮT CHỮ ---
|
| 501 |
-
common_plot_layout_updates = dict(
|
| 502 |
-
font=dict(color="#333333"), # Màu chữ chung cho biểu đồ
|
| 503 |
-
title_font_color="#004080", # Màu chữ cho tiêu đề biểu đồ
|
| 504 |
-
xaxis=dict(
|
| 505 |
-
title_font_color="#005aa7",
|
| 506 |
-
tickfont=dict(color="#333333") # Màu chữ cho nhãn trục
|
| 507 |
-
),
|
| 508 |
-
yaxis=dict(
|
| 509 |
-
title_font_color="#005aa7",
|
| 510 |
-
tickfont=dict(color="#333333")
|
| 511 |
-
),
|
| 512 |
-
legend=dict(font=dict(color="#333333")),
|
| 513 |
-
paper_bgcolor='rgba(255,255,255,0.9)', # Nền giấy sáng hơn, hơi trong suốt
|
| 514 |
-
plot_bgcolor='rgba(255,255,255,0.7)', # Nền plot sáng hơn, hơi trong suốt
|
| 515 |
-
|
| 516 |
-
# THAY ĐỔI 1: Căn lề trái tất cả tiêu đề
|
| 517 |
-
title_x=0, # Căn lề trái (0 = trái, 0.5 = giữa, 1 = phải)
|
| 518 |
-
title_xanchor='left', # Đặt anchor lề trái
|
| 519 |
-
|
| 520 |
-
# THAY ĐỔI 2: Thêm lề để tiêu đề không bị dính
|
| 521 |
-
margin=dict(l=20, t=80) # l=left (20px), t=top (80px để có chỗ cho 2 dòng)
|
| 522 |
-
)
|
| 523 |
-
# --- KẾT THÚC SỬA LỖI CĂN LỀ V�� CẮT CHỮ ---
|
| 524 |
-
|
| 525 |
col1, col2 = st.columns(2)
|
| 526 |
with col1:
|
| 527 |
fig_rmse = diag.plot_performance_degradation(
|
| 528 |
-
champion_perf_df,
|
| 529 |
metric_column=RMSE_COL_NAME,
|
| 530 |
metric_name='RMSE (Temperature °C)',
|
| 531 |
color='blue'
|
| 532 |
)
|
| 533 |
-
|
| 534 |
-
# THAY ĐỔI 3: Ghi đè tiêu đề để ngắt dòng (sửa lỗi bị cắt)
|
| 535 |
-
fig_rmse.update_layout(
|
| 536 |
-
title="<b>RMSE (Temperature °C) <br> vs. Forecast Horizon</b>"
|
| 537 |
-
)
|
| 538 |
-
|
| 539 |
-
# Áp dụng các thay đổi màu chữ VÀ căn lề
|
| 540 |
-
fig_rmse.update_layout(**common_plot_layout_updates)
|
| 541 |
st.plotly_chart(fig_rmse, use_container_width=True)
|
| 542 |
with col2:
|
| 543 |
fig_r2 = diag.plot_performance_degradation(
|
| 544 |
-
champion_perf_df,
|
| 545 |
metric_column=R2_COL_NAME,
|
| 546 |
metric_name='R-squared (R²)',
|
| 547 |
color='green'
|
| 548 |
)
|
| 549 |
-
|
| 550 |
-
# THAY ĐỔI 3: Ghi đè tiêu đề để ngắt dòng
|
| 551 |
-
fig_r2.update_layout(
|
| 552 |
-
title="<b>R-squared (R²) <br> vs. Forecast Horizon</b>"
|
| 553 |
-
)
|
| 554 |
-
|
| 555 |
-
# Áp dụng các thay đổi màu chữ VÀ căn lề
|
| 556 |
-
fig_r2.update_layout(**common_plot_layout_updates)
|
| 557 |
st.plotly_chart(fig_r2, use_container_width=True)
|
| 558 |
|
| 559 |
-
# 2.
|
| 560 |
st.subheader("Forecast vs. Actual Comparison (on entire test set)")
|
| 561 |
|
| 562 |
-
# This function runs predictions on the *entire* X_test (thousands of rows)
|
| 563 |
-
# It will be slow without caching
|
| 564 |
@st.cache_data
|
| 565 |
def get_full_test_predictions(_models, _X_test):
|
| 566 |
"""Run predictions on the entire test set and cache the results."""
|
|
@@ -581,8 +484,6 @@ elif app_section == "📊 Model Performance & Diagnostics": # Cập nhật tên
|
|
| 581 |
y_pred=y_pred_test['Day 1'],
|
| 582 |
day_ahead_title="Day 1 Forecast"
|
| 583 |
)
|
| 584 |
-
# Áp dụng các thay đổi màu chữ VÀ căn lề
|
| 585 |
-
fig_d1.update_layout(**common_plot_layout_updates)
|
| 586 |
st.plotly_chart(fig_d1, use_container_width=True)
|
| 587 |
with col2:
|
| 588 |
fig_d5 = diag.plot_forecast_vs_actual(
|
|
@@ -590,11 +491,9 @@ elif app_section == "📊 Model Performance & Diagnostics": # Cập nhật tên
|
|
| 590 |
y_pred=y_pred_test['Day 5'],
|
| 591 |
day_ahead_title="Day 5 Forecast"
|
| 592 |
)
|
| 593 |
-
# Áp dụng các thay đổi màu chữ VÀ căn lề
|
| 594 |
-
fig_d5.update_layout(**common_plot_layout_updates)
|
| 595 |
st.plotly_chart(fig_d5, use_container_width=True)
|
| 596 |
|
| 597 |
-
# 3.
|
| 598 |
with st.expander("Champion Model Diagnostics (Deep Dive)"):
|
| 599 |
st.markdown("Detailed analysis of residuals (error = actual - predicted) for the Day 1 forecast.")
|
| 600 |
|
|
@@ -605,15 +504,11 @@ elif app_section == "📊 Model Performance & Diagnostics": # Cập nhật tên
|
|
| 605 |
fig_res_time = diag.plot_residuals_vs_time(
|
| 606 |
y_true_d1, y_pred_d1, dates_d1, "Day 1"
|
| 607 |
)
|
| 608 |
-
# Áp dụng các thay đổi màu chữ VÀ căn lề
|
| 609 |
-
fig_res_time.update_layout(**common_plot_layout_updates)
|
| 610 |
st.plotly_chart(fig_res_time, use_container_width=True)
|
| 611 |
|
| 612 |
fig_res_dist = diag.plot_residuals_distribution(
|
| 613 |
y_true_d1, y_pred_d1, "Day 1"
|
| 614 |
)
|
| 615 |
-
# Áp dụng các thay đổi màu chữ VÀ căn lề
|
| 616 |
-
fig_res_dist.update_layout(**common_plot_layout_updates)
|
| 617 |
st.plotly_chart(fig_res_dist, use_container_width=True)
|
| 618 |
st.markdown("A good model will have residuals (errors) normally distributed (bell curve) "
|
| 619 |
"around 0 and show no pattern over time.")
|
|
|
|
| 221 |
st.error("Could not load main data, application cannot continue.")
|
| 222 |
st.stop()
|
| 223 |
|
| 224 |
+
# --- 5. GIAO DIỆN SIDEBAR (THANH ĐIỀU HƯỚNG) ---
|
|
|
|
| 225 |
|
| 226 |
+
st.sidebar.title("Forecast Input") # Đổi tiêu đề sidebar
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
selected_date = None
|
| 228 |
+
|
| 229 |
+
# Hiển thị date input cố định trên sidebar
|
| 230 |
+
if not X_test.empty:
|
| 231 |
+
min_date = X_test.index.min()
|
| 232 |
+
max_date = X_test.index.max()
|
| 233 |
|
| 234 |
+
selected_date = st.sidebar.date_input(
|
| 235 |
+
"Select a date from the test set:",
|
| 236 |
+
value=min_date,
|
| 237 |
+
min_value=min_date,
|
| 238 |
+
max_value=max_date,
|
| 239 |
+
format="YYYY-MM-DD"
|
| 240 |
+
)
|
| 241 |
+
else:
|
| 242 |
+
st.sidebar.error("Test data could not be loaded.")
|
| 243 |
+
|
| 244 |
+
# --- 6. GIAO DIỆN CHÍNH (MAIN PANEL) ---
|
| 245 |
+
|
| 246 |
+
# Tạo các tab ngang thay vì radio button
|
| 247 |
+
tab1, tab2, tab3 = st.tabs([
|
| 248 |
+
"📑 Project Overview & Methodology",
|
| 249 |
+
"🌦️ Live 5-Day Forecast",
|
| 250 |
+
"📊 Model Performance & Diagnostics"
|
| 251 |
+
])
|
| 252 |
|
| 253 |
+
# --- TAB 1: Project Overview ---
|
| 254 |
+
with tab1:
|
| 255 |
+
# --- MỤC 3 TRONG CHECKLIST ---
|
| 256 |
st.title("Saigon Temperature Forecasting Application 🌦️")
|
| 257 |
|
| 258 |
st.subheader("Project Summary")
|
|
|
|
| 275 |
st.subheader("Final Model Leaderboard")
|
| 276 |
st.markdown("Model leaderboard ranked by average RMSE score (lower is better).")
|
| 277 |
|
| 278 |
+
# Gọi hàm từ benchmark_utils.py
|
| 279 |
leaderboard_df = benchmark_utils.load_leaderboard()
|
| 280 |
|
| 281 |
if not leaderboard_df.empty:
|
| 282 |
+
# Lấy 10 mô hình hàng đầu và reset index (bỏ index cũ)
|
| 283 |
top_10_df = leaderboard_df.head(10).reset_index(drop=True)
|
| 284 |
+
|
| 285 |
+
# Đặt index mới bắt đầu từ 1
|
| 286 |
top_10_df.index = range(1, len(top_10_df) + 1)
|
| 287 |
+
|
| 288 |
+
# Hiển thị DataFrame đã sửa
|
| 289 |
st.dataframe(top_10_df, use_container_width=True)
|
| 290 |
else:
|
| 291 |
st.warning("Could not load leaderboard data.")
|
| 292 |
|
| 293 |
# --------------------------------------------------------------------
|
| 294 |
+
|
| 295 |
+
# --- TAB 2: Live Forecast ---
|
| 296 |
+
with tab2:
|
| 297 |
+
# --- MỤC 4 TRONG CHECKLIST ---
|
| 298 |
st.title("Live 5-Day Forecast")
|
| 299 |
|
| 300 |
+
# Biến 'selected_date' được lấy từ sidebar
|
| 301 |
if selected_date and not X_test.empty and models:
|
| 302 |
st.header(f"5-Day Forecast from: {selected_date.strftime('%Y-%m-%d')}")
|
| 303 |
|
| 304 |
+
# 1. Lấy Input Features
|
| 305 |
selected_date_ts = pd.Timestamp(selected_date)
|
| 306 |
|
| 307 |
+
# Sửa lỗi logic: input_features phải được lấy từ X_test
|
| 308 |
if selected_date_ts in X_test.index:
|
| 309 |
input_features = X_test.loc[[selected_date_ts]]
|
| 310 |
else:
|
| 311 |
st.error("Data not found for the selected date in X_test.")
|
| 312 |
+
input_features = pd.DataFrame() # Tạo dataframe rỗng để tránh lỗi sau
|
| 313 |
|
| 314 |
if input_features.empty:
|
| 315 |
st.error("Data not found for the selected date.")
|
| 316 |
else:
|
| 317 |
+
# 2. Tạo dự đoán
|
| 318 |
predictions = []
|
| 319 |
for i in range(5):
|
| 320 |
+
model = models[i] # Lấy mô hình T+i
|
| 321 |
pred = model.predict(input_features)[0]
|
| 322 |
predictions.append(pred)
|
| 323 |
|
| 324 |
+
# 3. Hiển thị dự đoán (dùng st.metric)
|
| 325 |
forecast_dates = pd.date_range(start=selected_date, periods=6, freq='D')[1:]
|
| 326 |
cols = st.columns(5)
|
| 327 |
|
| 328 |
+
# Lấy giá trị thực tế để so sánh
|
| 329 |
+
# --- SỬA LỖI LOGIC: Lấy 'actual_values' từ all_data_df ---
|
|
|
|
|
|
|
|
|
|
| 330 |
actual_values = []
|
| 331 |
if selected_date_ts in all_data_df.index:
|
|
|
|
| 332 |
actual_row = all_data_df.loc[selected_date_ts]
|
|
|
|
|
|
|
| 333 |
for col_name in TARGET_COLS:
|
| 334 |
actual_values.append(actual_row[col_name])
|
| 335 |
else:
|
| 336 |
+
actual_values = [float('nan')] * 5
|
|
|
|
| 337 |
|
|
|
|
|
|
|
| 338 |
is_partial_forecast = any(pd.isna(v) for v in actual_values)
|
|
|
|
| 339 |
|
| 340 |
for i in range(5):
|
| 341 |
with cols[i]:
|
|
|
|
|
|
|
| 342 |
actual_val = actual_values[i]
|
| 343 |
delta_text = f"Actual: {actual_val:.1f}°C" if pd.notna(actual_val) else "Actual: --"
|
| 344 |
+
|
|
|
|
| 345 |
st.metric(
|
| 346 |
label=f"Forecast for {forecast_dates[i].strftime('%b %d')}",
|
| 347 |
value=f"{predictions[i]:.1f}°C",
|
| 348 |
+
delta=delta_text,
|
| 349 |
+
delta_color="off"
|
| 350 |
)
|
| 351 |
|
| 352 |
+
# --- BIỂU ĐỒ DỮ LIỆU TRAINING ---
|
| 353 |
st.subheader("Training Set Overview")
|
| 354 |
with st.expander("Show plot of all training data (before 2024-02-18)"):
|
|
|
|
|
|
|
| 355 |
train_end_date = pd.Timestamp(TEST_START_DATE) - pd.Timedelta(days=1)
|
| 356 |
train_df = all_data_df.loc[:train_end_date][CURRENT_TEMP_COL]
|
| 357 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 358 |
fig_train = go.Figure()
|
| 359 |
fig_train.add_trace(go.Scatter(
|
| 360 |
x=train_df.index, y=train_df,
|
| 361 |
mode='lines', name='Training Data (Actual)',
|
| 362 |
+
line=dict(color='#005aa7', width=1)
|
| 363 |
))
|
|
|
|
| 364 |
fig_train.update_layout(
|
| 365 |
title="Actual Temperature - Full Training Set",
|
| 366 |
+
xaxis_title="Date", yaxis_title="Temperature (°C)",
|
| 367 |
+
template="plotly_white"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 368 |
)
|
|
|
|
| 369 |
st.plotly_chart(fig_train, use_container_width=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
|
| 371 |
+
# 4. Biểu đồ Context
|
| 372 |
st.subheader("Historical Context & Forecast")
|
| 373 |
|
|
|
|
| 374 |
history_start = selected_date_ts - pd.Timedelta(days=14)
|
| 375 |
history_end = selected_date_ts
|
|
|
|
|
|
|
| 376 |
history_df = all_data_df.loc[history_start:history_end][CURRENT_TEMP_COL]
|
| 377 |
|
|
|
|
| 378 |
forecast_df = pd.DataFrame({
|
| 379 |
'Date': forecast_dates,
|
| 380 |
'Forecast': predictions
|
| 381 |
}).set_index('Date')
|
| 382 |
|
| 383 |
fig = go.Figure()
|
|
|
|
| 384 |
fig.add_trace(go.Scatter(
|
| 385 |
x=history_df.index, y=history_df,
|
| 386 |
mode='lines+markers', name='Past 14 Days (Actual)',
|
|
|
|
| 391 |
mode='lines+markers', name='5-Day Forecast',
|
| 392 |
line=dict(color='red', dash='dot')
|
| 393 |
))
|
|
|
|
| 394 |
fig.update_layout(
|
| 395 |
title="Forecast vs. Historical Context",
|
| 396 |
xaxis_title="Date", yaxis_title="Temperature (°C)",
|
| 397 |
+
template="plotly_white", legend=dict(x=0.01, y=0.99)
|
|
|
|
|
|
|
| 398 |
)
|
| 399 |
st.plotly_chart(fig, use_container_width=True)
|
| 400 |
|
| 401 |
+
# --- Biểu đồ so sánh Actual vs Forecast ---
|
| 402 |
st.subheader("5-Day Forecast vs. Actual Comparison")
|
| 403 |
if is_partial_forecast:
|
| 404 |
st.info("Cannot draw the Actual vs. Forecast comparison chart because "
|
| 405 |
"the selected date is too close to the end of the test set (missing 'actual' data).")
|
| 406 |
else:
|
| 407 |
fig_comp = go.Figure()
|
|
|
|
|
|
|
| 408 |
fig_comp.add_trace(go.Scatter(
|
| 409 |
x=forecast_dates, y=predictions,
|
| 410 |
mode='lines+markers', name='5-Day Forecast',
|
| 411 |
line=dict(color='red', dash='dot')
|
| 412 |
))
|
|
|
|
|
|
|
| 413 |
fig_comp.add_trace(go.Scatter(
|
| 414 |
x=forecast_dates, y=actual_values,
|
| 415 |
mode='lines+markers', name='5-Day Actual',
|
| 416 |
line=dict(color='blue')
|
| 417 |
))
|
|
|
|
| 418 |
fig_comp.update_layout(
|
| 419 |
title="5-Day Forecast vs. Actual Values",
|
| 420 |
xaxis_title="Date", yaxis_title="Temperature (°C)",
|
| 421 |
+
template="plotly_white", legend=dict(x=0.01, y=0.99)
|
|
|
|
|
|
|
| 422 |
)
|
| 423 |
st.plotly_chart(fig_comp, use_container_width=True)
|
|
|
|
| 424 |
|
| 425 |
else:
|
| 426 |
st.warning("Please wait... Loading data or models.")
|
| 427 |
|
| 428 |
# --------------------------------------------------------------------
|
| 429 |
+
|
| 430 |
+
# --- TAB 3: Model Performance ---
|
| 431 |
+
with tab3:
|
| 432 |
+
# --- MỤC 5 TRONG CHECKLIST ---
|
| 433 |
st.title("Model Performance & Diagnostics")
|
| 434 |
|
| 435 |
if not perf_df.empty and not y_test.empty:
|
| 436 |
st.subheader("Performance Degradation over 5 Days")
|
| 437 |
st.markdown("How model performance changes as the forecast horizon increases.")
|
| 438 |
|
|
|
|
| 439 |
MODEL_NAME = 'Champion (Stacking)'
|
| 440 |
champion_perf_df = perf_df[perf_df['Model'] == MODEL_NAME].copy()
|
| 441 |
|
| 442 |
+
# 1. Biểu đồ suy giảm hiệu suất (RMSE & R2)
|
|
|
|
|
|
|
|
|
|
| 443 |
RMSE_COL_NAME = 'RMSE (Absolute Error)'
|
| 444 |
+
R2_COL_NAME = 'R-squared'
|
| 445 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 446 |
col1, col2 = st.columns(2)
|
| 447 |
with col1:
|
| 448 |
fig_rmse = diag.plot_performance_degradation(
|
| 449 |
+
champion_perf_df,
|
| 450 |
metric_column=RMSE_COL_NAME,
|
| 451 |
metric_name='RMSE (Temperature °C)',
|
| 452 |
color='blue'
|
| 453 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 454 |
st.plotly_chart(fig_rmse, use_container_width=True)
|
| 455 |
with col2:
|
| 456 |
fig_r2 = diag.plot_performance_degradation(
|
| 457 |
+
champion_perf_df,
|
| 458 |
metric_column=R2_COL_NAME,
|
| 459 |
metric_name='R-squared (R²)',
|
| 460 |
color='green'
|
| 461 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 462 |
st.plotly_chart(fig_r2, use_container_width=True)
|
| 463 |
|
| 464 |
+
# 2. Biểu đồ Dự báo vs. Thực tế
|
| 465 |
st.subheader("Forecast vs. Actual Comparison (on entire test set)")
|
| 466 |
|
|
|
|
|
|
|
| 467 |
@st.cache_data
|
| 468 |
def get_full_test_predictions(_models, _X_test):
|
| 469 |
"""Run predictions on the entire test set and cache the results."""
|
|
|
|
| 484 |
y_pred=y_pred_test['Day 1'],
|
| 485 |
day_ahead_title="Day 1 Forecast"
|
| 486 |
)
|
|
|
|
|
|
|
| 487 |
st.plotly_chart(fig_d1, use_container_width=True)
|
| 488 |
with col2:
|
| 489 |
fig_d5 = diag.plot_forecast_vs_actual(
|
|
|
|
| 491 |
y_pred=y_pred_test['Day 5'],
|
| 492 |
day_ahead_title="Day 5 Forecast"
|
| 493 |
)
|
|
|
|
|
|
|
| 494 |
st.plotly_chart(fig_d5, use_container_width=True)
|
| 495 |
|
| 496 |
+
# 3. Mục Tùy chọn: Deep Dive Expander
|
| 497 |
with st.expander("Champion Model Diagnostics (Deep Dive)"):
|
| 498 |
st.markdown("Detailed analysis of residuals (error = actual - predicted) for the Day 1 forecast.")
|
| 499 |
|
|
|
|
| 504 |
fig_res_time = diag.plot_residuals_vs_time(
|
| 505 |
y_true_d1, y_pred_d1, dates_d1, "Day 1"
|
| 506 |
)
|
|
|
|
|
|
|
| 507 |
st.plotly_chart(fig_res_time, use_container_width=True)
|
| 508 |
|
| 509 |
fig_res_dist = diag.plot_residuals_distribution(
|
| 510 |
y_true_d1, y_pred_d1, "Day 1"
|
| 511 |
)
|
|
|
|
|
|
|
| 512 |
st.plotly_chart(fig_res_dist, use_container_width=True)
|
| 513 |
st.markdown("A good model will have residuals (errors) normally distributed (bell curve) "
|
| 514 |
"around 0 and show no pattern over time.")
|