| |
| import streamlit as st |
| import pandas as pd |
| import joblib |
| import plotly.graph_objects as go |
| |
| from typing import List |
| import numpy as np |
| import requests |
| import base64 |
| import plotly.express as px |
| from datetime import datetime |
| import os |
| import feature_engineering_live as fe_live |
| import pytz |
|
|
|
|
| def _align_features_for_model(model, live_df: pd.DataFrame, reference_columns: pd.Index): |
| """Return a DataFrame aligned to `model` expected features. |
| |
| - If model has `feature_names_in_`, use that ordering and add missing cols as NaN. |
| - Else if model has `n_features_in_`, select the first N columns from `reference_columns` |
| that exist in `live_df`, and pad missing ones with NaN. |
| - Otherwise, try to reindex to `reference_columns` and then take available columns. |
| """ |
| df = live_df.copy() |
| |
| for c in df.columns: |
| try: |
| df[c] = pd.to_numeric(df[c], errors='coerce') |
| except Exception: |
| pass |
|
|
| expected = None |
| try: |
| expected = getattr(model, 'feature_names_in_', None) |
| if expected is not None: |
| expected = list(expected) |
| except Exception: |
| expected = None |
|
|
| if expected is not None: |
| |
| for c in expected: |
| if c not in df.columns: |
| df[c] = np.nan |
| return df[expected] |
|
|
| |
| n_in = getattr(model, 'n_features_in_', None) |
| if n_in is not None: |
| |
| available = [c for c in reference_columns if c in df.columns] |
| chosen = available[:int(n_in)] |
| |
| for i in range(len(chosen), int(n_in)): |
| pad_name = f'_pad_col_{i}' |
| df[pad_name] = np.nan |
| chosen.append(pad_name) |
| return df[chosen] |
|
|
| |
| common = [c for c in reference_columns if c in df.columns] |
| if common: |
| return df[common] |
|
|
| |
| return df |
|
|
|
|
| def _format_feature_value(feature: str, value) -> str: |
| """Format a feature value with appropriate units for display in the UI. |
| |
| Returns a string (e.g., '26.0°C', '75%', '5.4 km/h', '--' for missing). |
| """ |
| try: |
| if pd.isna(value): |
| return "--" |
| except Exception: |
| pass |
|
|
| |
| units = { |
| 'temp': ('{:.1f}°C', 1), |
| 'feelslike': ('{:.1f}°C', 1), |
| 'humidity': ('{:.0f}%', 0), |
| 'cloud': ('{:.0f}%', 0), |
| 'cloudcover': ('{:.0f}%', 0), |
| 'precip': ('{:.1f} mm', 1), |
| 'rain': ('{:.1f} mm', 1), |
| 'wind': ('{:.1f} km/h', 1), |
| 'windspeed': ('{:.1f} km/h', 1), |
| 'speed': ('{:.1f}', 1), |
| 'day_of_year': ('{:.0f}', 0), |
| 'hour': ('{:.2f}', 2), |
| 'sin': ('{:.2f}', 2), |
| 'diff': ('{:.2f}°C', 2), |
| 'roll': ('{:.2f}°C', 2), |
| 'temp_lag': ('{:.1f}°C', 1), |
| } |
|
|
| key = feature.lower() |
| for k, (fmt, _) in units.items(): |
| if k in key: |
| try: |
| return fmt.format(float(value)) |
| except Exception: |
| return str(value) |
|
|
| |
| try: |
| v = float(value) |
| |
| if abs(v - int(v)) < 1e-6: |
| return f"{int(v)}" |
| return f"{v:.2f}" |
| except Exception: |
| return str(value) |
| |
|
|
| |
| try: |
| from src import benchmark_utils |
| from src import diagnostic_plots as diag |
| except ImportError: |
| st.error("Error: Could not find 'src/benchmark_utils.py' or 'src/diagnostic_plots.py'. " |
| "Please ensure they exist in the 'src/' directory.") |
| st.stop() |
|
|
| |
| st.set_page_config( |
| page_title="Saigon Temperature Forecast", |
| page_icon="🌦️", |
| layout="wide" |
| ) |
|
|
| |
| def load_css(): |
| """Tải CSS tùy chỉnh để tạo giao diện 'thời tiết' với ĐỘ TƯƠNG PHẢN CAO.""" |
| st.markdown(""" |
| <style> |
| /* ===== FONT CHUNG ===== */ |
| .stApp, .stSidebar { |
| font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; |
| } |
| |
| /* ===== NỀN CHÍNH (MAIN BACKGROUND) ===== */ |
| [data-testid="stAppViewContainer"] { |
| background-image: linear-gradient(to bottom, #B0E0E6, #F0F8FF); |
| background-attachment: fixed; |
| background-size: cover; |
| } |
| |
| /* ===== 1. THANH CHỌN TAB (st.tabs) ===== */ |
| /* Tab không được chọn */ |
| button[data-baseweb="tab"][aria-selected="false"] { |
| background-color: rgba(255, 255, 255, 0.7) !important; /* Nền mờ */ |
| color: #0E2A47 !important; /* Chữ đậm */ |
| border-top-left-radius: 8px; |
| border-top-right-radius: 8px; |
| padding: 12px 16px !important; /* <<< THÊM PADDING */ |
| } |
| |
| /* Tab ĐANG ĐƯỢC CHỌN */ |
| button[data-baseweb="tab"][aria-selected="true"] { |
| background-color: #FFFFFF !important; /* Nền TRẮNG ĐỤC */ |
| color: #004080 !important; /* Chữ MÀU XANH ĐẬM */ |
| font-weight: 700 !important; |
| border-top-left-radius: 8px; |
| border-top-right-radius: 8px; |
| border-bottom: 3px solid #004080 !important; /* Viền xanh đậm */ |
| padding: 12px 16px !important; /* <<< THÊM PADDING */ |
| } |
| |
| /* ===== 2. THẺ DỰ BÁO (METRIC CARDS) ===== */ |
| div[data-testid="stMetric"] { |
| background-color: rgba(255, 255, 255, 0.95) !important; /* Nền trắng (đục hơn) */ |
| border: 1px solid #B0C4DE; /* Thêm viền (xanh nhạt) */ |
| border-radius: 12px; |
| padding: 20px; |
| box-shadow: 0 4px 15px rgba(0, 0, 0, 0.1) !important; /* Đổ bóng đậm hơn */ |
| backdrop-filter: blur(5px); |
| transition: transform 0.2s ease; |
| } |
| div[data-testid="stMetric"]:hover { |
| transform: translateY(-3px); |
| box-shadow: 0 6px 20px rgba(0, 0, 0, 0.15) !important; |
| } |
| |
| /* Tiêu đề thẻ (Forecast for...) - đã có tương phản tốt */ |
| div[data-testid="stMetricLabel"] p { |
| font-size: 1.1rem !important; |
| font-weight: 600 !important; |
| color: #333333; /* Xám đậm */ |
| } |
| /* Giá trị nhiệt độ - đã có tương phản tốt */ |
| div[data-testid="stMetricValue"] { |
| font-size: 2.8rem !important; |
| font-weight: 700 !important; |
| color: #004080; /* Xanh navy đậm */ |
| } |
| /* Giá trị "Actual" (delta) - đã có tương phản tốt */ |
| div[data-testid="stMetricDelta"] { |
| font-size: 1rem !important; |
| font-weight: 600 !important; |
| color: #555555; /* Xám vừa */ |
| } |
| |
| /* ===== 3. TIÊU ĐỀ (HEADINGS) ===== */ |
| h1, h2, h3 { |
| color: #004080 !important; /* Dùng chung màu XANH ĐẬM NHẤT */ |
| text-shadow: 1px 1px 4px rgba(0, 0, 0, 0.15) !important; /* Thêm đổ bóng ĐEN (thay vì trắng) */ |
| } |
| |
| /* ===== 4. BẢNG (DATAFRAME) ===== */ |
| .stDataFrame { |
| background-color: #FFFFFF; /* Nền TRẮNG ĐỤC */ |
| border: 1px solid #CCCCCC !important; /* Viền xám nhạt */ |
| border-radius: 8px; |
| overflow: hidden; |
| } |
| /* Tiêu đề của bảng */ |
| [data-testid="stDataGridHeader"] { |
| background-color: #F0F8FF; /* Nền header (Alice Blue) */ |
| color: #004080; /* Chữ xanh đậm */ |
| } |
| |
| /* ===== 5. BIỂU ĐỒ (PLOTLY) ===== */ |
| .plotly-graph-div { |
| background-color: #FFFFFF; /* Nền TRẮNG ĐỤC */ |
| border: 1px solid #E0E0E0; /* Viền xám rất nhạt */ |
| border-radius: 8px; |
| } |
| |
| /* ===== 6. VĂN BẢN THÔNG THƯỜNG (PARAGRAPH & MARKDOWN) ===== */ |
| /* Quy tắc này áp dụng cho văn bản st.markdown và các đoạn văn bản khác */ |
| .stMarkdown, p, li { |
| color: #333333 !important; /* Xám đen, tương phản tốt trên nền sáng */ |
| font-size: 1.05rem; /* Có thể thêm tùy chọn để chữ lớn hơn một chút */ |
| } |
| |
| /* SAFE DataFrame Styling */ |
| [data-testid="stDataFrame"] { |
| border: 1px solid #CCCCCC !important; |
| border-radius: 8px !important; |
| background-color: #FFFFFF !important; |
| } |
| |
| /* ===== EXPANDERS (vẫn giữ như cũ) ===== */ |
| div[data-testid="stExpander"] { |
| background-color: rgba(255, 255, 255, 0.9) !important; |
| border-radius: 10px !important; |
| border: 1px solid rgba(0, 0, 0, 0.1) !important; |
| } |
| |
| /* ===== NEW: GENERIC INFO CARD (for the 4 top boxes) ===== */ |
| .info-card { |
| background-color: #FFFFFF; |
| border: 1px solid #E0E0E0; |
| border-radius: 12px; |
| padding: 20px 25px 25px 25px; /* top, right, bottom, left */ |
| box-shadow: 0 4px 15px rgba(0, 0, 0, 0.08); |
| min-height: 260px; /* enforce consistent card height */ |
| display: flex; |
| flex-direction: column; |
| justify-content: flex-start; |
| } |
| .info-card h4 { margin-bottom: 12px; color: #004080; } |
| .info-card ul { padding-left: 20px; } |
| .info-card li { margin-bottom: 8px; } |
| |
| /* ===== NEW: LEADERBOARD CONTAINER & STYLING ===== */ |
| .leaderboard-container { background-color: #FFFFFF; border: 1px solid #E0E0E0; border-radius: 12px; padding: 18px; box-shadow: 0 4px 15px rgba(0,0,0,0.08); min-height: 360px; } |
| .leaderboard-container .stDataFrame { border: none; } |
| .leaderboard-container [data-testid="stDataGridHeader"] { background-color: #F0F8FF; font-weight:700; color: #004080; } |
| .leaderboard-container [data-testid="stTable"] td, |
| .leaderboard-container [data-testid="stTable"] th { |
| border-right: 1px solid #EAEAEA; |
| border-bottom: 1px solid #EAEAEA; |
| padding: 10px 12px; |
| vertical-align: middle; |
| } |
| .leaderboard-container [data-testid="stTable"] th { border-left: 1px solid #EAEAEA; } |
| /* ===== Static leaderboard card (manual rows, scrollable) ===== */ |
| .leaderboard-card { |
| background-color: #FFFFFF; |
| border: 1px solid #E0E0E0; |
| border-radius: 10px; |
| padding: 8px; |
| /* Allow the card to expand to fit all rows so the UI shows the full leaderboard */ |
| max-height: none; |
| overflow: visible; |
| } |
| .static-row { display: flex; align-items: center; } |
| .static-cell { flex: 1 1 0; padding: 10px 12px; border-bottom: 1px solid #F2F6F9; color: #222; } |
| .static-cell.small { flex: 0 0 56px; text-align: center; } |
| .static-header { position: sticky; top: 0; background: #F0F8FF; z-index: 5; border-bottom: 2px solid #E6EEF6; } |
| .static-header .static-cell { font-weight: 700; color: #004080; } |
| /* Large animated emoji shown at top-right of each tab */ |
| .tab-header { position: relative; } |
| |
| .tab-emoji { |
| display: inline-block; |
| position: absolute; /* remove from normal flow so we can lift it */ |
| top: -6.3rem; /* lift up above the header */ |
| right: -2rem; /* move a bit to the right */ |
| font-size: 10rem; /* keep large but adjustable */ |
| line-height: 1; |
| will-change: transform; |
| transform-origin: center; |
| pointer-events: none; /* don't block clicks */ |
| z-index: 9999; /* keep on top */ |
| } |
| |
| @keyframes sway { |
| 0% { transform: translateX(0px); } |
| 25% { transform: translateX(18px); } |
| 50% { transform: translateX(0px); } |
| 75% { transform: translateX(-18px); } |
| 100% { transform: translateX(0px); } |
| } |
| |
| /* Apply sway animation with a gentle ease-in-out loop */ |
| .tab-emoji { |
| animation: sway 4.5s ease-in-out infinite; |
| } |
| |
| /* Respect user preference for reduced motion */ |
| @media (prefers-reduced-motion: reduce) { |
| .tab-emoji { animation: none !important; } |
| } |
| |
| </style> |
| """, unsafe_allow_html=True) |
|
|
| |
| load_css() |
| |
| def local_css(file_name: str): |
| try: |
| with open(file_name, 'r', encoding='utf-8') as f: |
| st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True) |
| except FileNotFoundError: |
| |
| st.warning(f"Local CSS file '{file_name}' not found. Skipping local styles.") |
|
|
| |
| local_css("style.css") |
| |
| def _time_of_day_emoji(now: datetime = None) -> str: |
| """Return a small emoji representing the current time-of-day. |
| |
| - Dawn (4-6): 🌅 |
| - Morning (6-12): 🌞 |
| - Afternoon (12-17): 🌤️ |
| - Sunset/Evening (17-20): 🌇 |
| - Night (20-4): 🌃 |
| """ |
|
|
| if now is None: |
| |
| saigon_tz = pytz.timezone('Asia/Ho_Chi_Minh') |
| |
| now = datetime.now(saigon_tz) |
| |
| h = now.hour |
|
|
| if 4 <= h < 6: |
| return "🌅" |
| if 6 <= h < 12: |
| return "🌞" |
| if 12 <= h < 17: |
| return "🌤️" |
| if 17 <= h < 20: |
| return "🌇" |
| return "🌃" |
|
|
| |
|
|
| |
| |
| @st.cache_data |
| def load_hourly_performance_data(file_path="data/hourly_120h_evaluation_results.csv"): |
| """Loads hourly RMSE/R2 performance data (T+1h to T+120h).""" |
| try: |
| df = pd.read_csv(file_path) |
| |
| if 'Horizon' not in df.columns: |
| df['Horizon'] = df.index + 1 |
| return df |
| except FileNotFoundError: |
| st.warning(f"Warning: Hourly Performance data not found at: {file_path}. Cannot show degradation plot.") |
| return pd.DataFrame() |
|
|
|
|
| @st.cache_data |
| def load_hourly_data(file_path="data/final_hourly_feature_dataset.csv"): |
| """Loads the Hourly Direct dataset and prepares a datetime index. |
| |
| Expects a column named 'datetime' containing ISO-like timestamps. Returns an indexed |
| DataFrame sorted by the datetime index. On error returns an empty DataFrame and logs a warning. |
| """ |
| try: |
| df_hourly = pd.read_csv(file_path) |
| except FileNotFoundError: |
| st.error(f"ERROR: Hourly data file not found at: {file_path}. Please check the path and file name.") |
| return pd.DataFrame() |
| except Exception as e: |
| st.error(f"An unexpected error occurred while reading hourly data: {e}") |
| return pd.DataFrame() |
|
|
| DATE_COLUMN = 'datetime' |
| if DATE_COLUMN not in df_hourly.columns: |
| st.error(f"Error: Date column '{DATE_COLUMN}' not found in hourly data CSV. Please check the column name.") |
| return pd.DataFrame() |
|
|
| try: |
| df_hourly[DATE_COLUMN] = pd.to_datetime(df_hourly[DATE_COLUMN]) |
| df_hourly = df_hourly.set_index(DATE_COLUMN) |
| df_hourly = df_hourly.sort_index() |
| return df_hourly |
| except Exception as e: |
| st.error(f"An unexpected error occurred while processing hourly data: {e}") |
| return pd.DataFrame() |
|
|
| @st.cache_resource |
| def load_24_hourly_models(): |
| """Tải 24 mô hình LGBM chuyên biệt (T+1h đến T+24h) cho biểu đồ.""" |
| hourly_models = {} |
| |
| num_horizons = 24 |
|
|
| |
| for h in range(1, num_horizons + 1): |
| file_path = f"models/lgbm_model_target_temp_next_{h}h.pkl" |
| try: |
| model = joblib.load(file_path) |
| hourly_models[h] = model |
| except FileNotFoundError: |
| |
| st.warning(f"Hourly model file not found for horizon {h}: '{file_path}'. Skipping this horizon.") |
| except Exception as e: |
| st.warning(f"Error loading hourly model for horizon {h} ('{file_path}'): {e}") |
|
|
| if len(hourly_models) < num_horizons: |
| st.info(f"Loaded {len(hourly_models)} / {num_horizons} hourly models. Missing horizons will use fallback estimates.") |
|
|
| return hourly_models |
|
|
|
|
| @st.cache_data |
| def load_precomputed_hourly_forecasts(file_path="data/final_hourly_120h_forecast_dataset.csv"): |
| """Load precomputed hourly forecasts (up to 120h) as a convenience fallback. |
| |
| The CSV is expected to contain a datetime-like column ('datetime' or 'forecast_start' or 'time') |
| plus columns for horizons or a wide format where each row contains the forecast for the next 120h. |
| We'll try to infer the format. |
| """ |
| try: |
| df = pd.read_csv(file_path) |
| except FileNotFoundError: |
| return pd.DataFrame() |
| except Exception: |
| return pd.DataFrame() |
|
|
| |
| datetime_cols = [c for c in df.columns if c.lower() in ("datetime", "forecast_start", "time", "timestamp")] |
| if datetime_cols: |
| dt_col = datetime_cols[0] |
| try: |
| df[dt_col] = pd.to_datetime(df[dt_col]) |
| df = df.set_index(dt_col).sort_index() |
| return df |
| except Exception: |
| return pd.DataFrame() |
|
|
| |
| return pd.DataFrame() |
|
|
| @st.cache_data |
| def load_feature_data(file_path="data/final_dataset_tree.csv"): |
| """Loads features and targets, converts index to datetime.""" |
| try: |
| df = pd.read_csv(file_path) |
| |
| |
| |
| DATE_COLUMN = 'datetime' |
| |
| if DATE_COLUMN not in df.columns: |
| st.error(f"Error: Date column '{DATE_COLUMN}' not found in 'final_dataset_tree.csv'. " |
| f"Please update the DATE_COLUMN variable in 'app.py'.") |
| return pd.DataFrame() |
| |
| df[DATE_COLUMN] = pd.to_datetime(df[DATE_COLUMN]) |
| df = df.set_index(DATE_COLUMN) |
| df = df.sort_index() |
| return df |
| except FileNotFoundError: |
| st.error(f"ERROR: Main data file not found at: {file_path}") |
| return pd.DataFrame() |
|
|
| @st.cache_resource |
| def load_champion_models(): |
| """Loads the 5 specialist models from the checklist.""" |
| models = [] |
| try: |
| for i in range(1, 6): |
| file_path = f"models/champion_stacking_day{i}.pkl" |
| model = joblib.load(file_path) |
| models.append(model) |
| return models |
| except FileNotFoundError as e: |
| st.error(f"ERROR: Model file not found. Checked: {e.filename}. " |
| "Ensure the 5 .pkl files are in the 'models/' directory.") |
| return [] |
|
|
| @st.cache_data |
| def load_performance_data(file_path="data/final_5_day_results_df.csv"): |
| """Loads pre-calculated performance data for Tab 3.""" |
| try: |
| df = pd.read_csv(file_path) |
| return df |
| except FileNotFoundError: |
| st.error(f"ERROR: Performance file not found at: {file_path}") |
| return pd.DataFrame() |
|
|
|
|
| @st.cache_data |
| def load_leaderboard_data(small_path: str = "data/leaderboard_for_ui.csv"): |
| """Load a small precomputed leaderboard CSV if present, otherwise fall back |
| to the full `benchmark_utils.load_leaderboard()` call. Cached to avoid |
| re-reading on every rerun and to ensure the data is available before UI |
| rendering begins (prevents render-pause-reflow). |
| """ |
| |
| |
| |
| |
| if os.path.exists(small_path): |
| try: |
| return pd.read_csv(small_path) |
| except Exception as e: |
| st.warning(f"Could not read small leaderboard UI file '{small_path}': {e}") |
| return pd.DataFrame() |
| else: |
| st.warning(f"Small leaderboard UI file not found at '{small_path}'. Please commit 'data/leaderboard_for_ui.csv' to the repo for fast startup.") |
| return pd.DataFrame() |
|
|
| |
|
|
| |
| all_data_df = load_feature_data() |
| |
| models = None |
| perf_df = load_performance_data() |
|
|
| |
| TARGET_COLS = ['temp_next_1_day', 'temp_next_2_day', 'temp_next_3_day', 'temp_next_4_day', 'temp_next_5_day'] |
| CURRENT_TEMP_COL = 'temp' |
|
|
| |
| TEST_START_DATE = "2024-02-18" |
| TEST_END_DATE = "2025-09-26" |
|
|
| X_test, y_test, test_df = pd.DataFrame(), pd.DataFrame(), pd.DataFrame() |
|
|
| if not all_data_df.empty: |
| try: |
| test_df = all_data_df.loc[TEST_START_DATE:TEST_END_DATE].copy() |
| |
| |
| feature_cols = [col for col in all_data_df.columns if col not in TARGET_COLS] |
| |
| |
| |
| X_test = test_df[feature_cols] |
| y_test = test_df[TARGET_COLS] |
| |
| |
| y_test.columns = [f'Day {i}' for i in range(1, 6)] |
| except KeyError: |
| st.error(f"Error: Target columns (e.g., '{TARGET_COLS[0]}') or " |
| f"'{CURRENT_TEMP_COL}' column not found in CSV. Please update 'app.py'.") |
| except Exception as e: |
| st.error(f"Error processing test set: {e}") |
| else: |
| st.error("Could not load main data, application cannot continue.") |
| st.stop() |
|
|
|
|
| |
| HOURLY_TARGET_COLS = ['target_temp_next_24h', 'target_temp_next_48h', 'target_temp_next_72h', |
| 'target_temp_next_96h', 'target_temp_next_120h'] |
|
|
| |
| hourly_data_df = load_hourly_data(file_path="data/final_hourly_feature_dataset.csv") |
| hourly_perf_df = load_hourly_performance_data(file_path="data/hourly_120h_evaluation_results.csv") |
| hourly_models_24h = load_24_hourly_models() |
| |
| if not hourly_data_df.empty: |
| HOURLY_FEATURE_COLS = [col for col in hourly_data_df.columns if col not in HOURLY_TARGET_COLS] |
| |
| |
| X_test_hourly = hourly_data_df.loc[TEST_START_DATE:TEST_END_DATE][HOURLY_FEATURE_COLS].copy() |
|
|
| |
| columns_to_drop_objects = ['sunrise', 'sunset'] |
| X_test_hourly = X_test_hourly.drop(columns=columns_to_drop_objects, errors='ignore') |
|
|
| HOURLY_FEATURE_COLS = X_test_hourly.columns.tolist() |
| else: |
| X_test_hourly = pd.DataFrame() |
|
|
| |
| def predict_next_24_hours(input_features: pd.DataFrame, models: dict) -> List[float]: |
| |
| predictions = [] |
|
|
| |
| desired_horizons = 24 |
|
|
| |
| available_model_count = len(models) if models else 0 |
|
|
| |
| if input_features.empty or available_model_count == 0: |
| last_temp = input_features['temp'].iloc[-1] if (not input_features.empty and 'temp' in input_features.columns) else 28.0 |
| np.random.seed(42) |
| return [last_temp + 1.5 * np.sin(2 * np.pi * (h + 10) / 24) + np.random.normal(0, 0.5) |
| for h in range(desired_horizons)] |
|
|
| |
| |
| last_temp = input_features['temp'].iloc[-1] if 'temp' in input_features.columns else 28.0 |
| for h in range(1, desired_horizons + 1): |
| model = models.get(h) if isinstance(models, dict) else (models[h-1] if (isinstance(models, list) and len(models) >= h) else None) |
| if model is None: |
| |
| pred = last_temp + 1.5 * np.sin(2 * np.pi * (h + 10) / 24) |
| predictions.append(float(pred)) |
| continue |
|
|
| try: |
| |
| pred_val = model.predict(input_features) |
| |
| pred = float(pred_val[0]) if hasattr(pred_val, '__getitem__') else float(pred_val) |
| predictions.append(pred) |
| except Exception: |
| |
| pred = last_temp + 1.5 * np.sin(2 * np.pi * (h + 10) / 24) |
| predictions.append(float(pred)) |
|
|
| return predictions |
|
|
|
|
| |
| @st.cache_data(ttl=600) |
| def get_live_weather(api_key: str): |
| """Return current weather JSON from OpenWeatherMap for Ho Chi Minh City.""" |
| LAT, LON = 10.7769, 106.7009 |
| URL = f"https://api.openweathermap.org/data/2.5/weather?lat={LAT}&lon={LON}&appid={api_key}&units=metric" |
| try: |
| resp = requests.get(URL, timeout=10) |
| resp.raise_for_status() |
| return resp.json() |
| except Exception as e: |
| st.warning(f"Could not fetch live weather: {e}") |
| return {} |
|
|
|
|
| @st.cache_data(ttl=3600) |
| def get_5_day_forecast(api_key: str): |
| """Return 5-day / 3-hour forecast JSON from OpenWeatherMap for Ho Chi Minh City.""" |
| LAT, LON = 10.7769, 106.7009 |
| URL = f"https://api.openweathermap.org/data/2.5/forecast?lat={LAT}&lon={LON}&appid={api_key}&units=metric" |
| try: |
| resp = requests.get(URL, timeout=10) |
| resp.raise_for_status() |
| return resp.json() |
| except Exception as e: |
| st.warning(f"Could not fetch 5-day forecast: {e}") |
| return {} |
|
|
| |
| |
|
|
| |
| |
| leaderboard_df = load_leaderboard_data() |
|
|
| |
| tab1, tab2, tab3, tab4, tab5 = st.tabs([ |
| "📄 Project Overview", |
| "🔮 Live 5-Day Forecast", |
| "📈 Model Performance", |
| "🕒 Hourly Prediction", |
| "📡 Live & Future Forecast" |
| ]) |
|
|
| |
| with tab1: |
| emoji = _time_of_day_emoji() |
| st.markdown(f""" |
| <div class="tab-header" style="display:flex;justify-content:space-between;align-items:center;position:relative;"> |
| <h1 style="margin:0">Saigon Temperature Forecasting Application 🌦️</h1> |
| <div class="tab-emoji">{emoji}</div> |
| </div> |
| """, unsafe_allow_html=True) |
|
|
| |
| |
| local_candidates = [ |
| "assets/banner.jpg", |
| "static/banner.jpg", |
| "data/banner.jpg", |
| ] |
| banner_url = "https://bambooairways.com/jp/en/explore/destinations/southeast-asia/vietnam/ho-chi-minh" |
| for p in local_candidates: |
| if os.path.exists(p): |
| try: |
| with open(p, 'rb') as _f: |
| _b = _f.read() |
| _b64 = base64.b64encode(_b).decode('ascii') |
| banner_url = f"data:image/jpg;base64,{_b64}" |
| except Exception: |
| |
| pass |
| break |
| |
| is_image_src = False |
| try: |
| is_image_src = str(banner_url).startswith('data:image') or str(banner_url).lower().endswith(('.jpg', '.jpeg', '.png', '.webp')) |
| except Exception: |
| is_image_src = False |
|
|
| if is_image_src: |
| |
| banner_html = f""" |
| <div style="width:100%;border-radius:10px;overflow:hidden;box-shadow:0 6px 18px rgba(3,37,76,0.06);"> |
| <img src="{banner_url}" style="width:100%;height:360px;object-fit:cover;display:block;" alt="Ho Chi Minh City" /> |
| </div> |
| <div style="width:100%;text-align:center;margin-top:8px;color:#666;font-size:0.95rem;font-weight:600;">Ho Chi Minh City — Credit: Bamboo Airways</div> |
| """ |
| else: |
| |
| banner_html = """ |
| <div style="min-height:360px;width:100%;border-radius:10px;overflow:hidden;position:relative;background-color:#cfeef3;box-shadow:0 6px 18px rgba(3,37,76,0.06);"> |
| </div> |
| <div style="width:100%;text-align:center;margin-top:8px;color:#666;font-size:0.95rem;font-weight:600;">Ho Chi Minh City — Credit: Bamboo Airways</div> |
| """ |
| st.markdown(banner_html, unsafe_allow_html=True) |
|
|
| |
| st.markdown('<div class="card-row">', unsafe_allow_html=True) |
| row1_col1, row1_col2 = st.columns(2) |
| with row1_col1: |
| st.markdown( |
| ''' |
| <div class="info-card"> |
| <h4>📝 Project Summary</h4> |
| <p>The goal of this project is to execute a full end-to-end machine learning workflow to forecast the <strong>average daily temperature</strong> for Ho Chi Minh City over the <strong>next 5 days</strong>.</p> |
| <ul> |
| <li><strong>Data Source:</strong> 10 years of historical daily and hourly weather data from <strong>Visual Crossing</strong>.</li> |
| <li><strong>Forecasting Method:</strong> We employ a <strong>Direct Forecasting Strategy</strong>, using 5 independent 'specialist' models, where each model is individually optimized to predict a specific day's temperature (T+1 to T+5).</li> |
| <li><strong>Final Champion Model:</strong> The best-performing architecture is a <strong>Stacking (Optuna) Ensemble</strong>, which synergistically combines multiple machine learning models to achieve the highest accuracy.</li> |
| </ul> |
| </div> |
| ''', |
| unsafe_allow_html=True |
| ) |
|
|
| with row1_col2: |
| st.markdown( |
| ''' |
| <div class="info-card"> |
| <h4>🧬 Our 'Two-Stream' Data Strategy</h4> |
| <p>To optimize performance for different model families, we applied a "Two-Stream" data preparation strategy:</p> |
| <ul> |
| <li><strong>Stream 1 (For Linear Models):</strong> Features were pruned using an <strong>iterative Variance Inflation Factor (VIF)</strong> process to create a stable, non-collinear feature set.</li> |
| <li><strong>Stream 2 (For Tree-based Models):</strong> Advanced models (LightGBM, CatBoost) were trained on a <strong>comprehensive set of ~156 features</strong>, leveraging their robustness to multicollinearity.</li> |
| </ul> |
| <p>Our final Champion Model is a <strong>Stacking Ensemble</strong> that strategically uses components from both streams.</p> |
| </div> |
| ''', |
| unsafe_allow_html=True |
| ) |
| st.markdown('</div>', unsafe_allow_html=True) |
|
|
| |
| st.markdown('<div class="card-row">', unsafe_allow_html=True) |
|
|
| |
| row2_col1, row2_col2 = st.columns(2) |
| with row2_col1: |
| st.markdown( |
| ''' |
| <div class="info-card"> |
| <h4>🔬 Key Analytical Findings</h4> |
| <ul> |
| <li><strong>Non-Stationary Climate:</strong> Observed a long-term warming trend and extreme events (e.g., 2024 El Niño-driven heatwave) supporting adaptive modeling.</li> |
| <li><strong>Signal vs. Noise:</strong> A clean daily dataset outperformed noisier hourly-derived aggregates for daily average forecasting.</li> |
| <li><strong>Meteorological Logic:</strong> Model feature importance aligns with domain knowledge (thermal inertia, heat index, wind vectors).</li> |
| </ul> |
| </div> |
| ''', |
| unsafe_allow_html=True |
| ) |
|
|
| with row2_col2: |
| st.markdown( |
| ''' |
| <div class="info-card" style="min-height: 265px;"> |
| <h4>🚀 Final MLOps Strategy</h4> |
| <ul> |
| <li><strong>Retraining Trigger:</strong> Performance-based safety net (live RMSE), drift detection (PSI/KS-Test), and a fixed 90-day refresh cadence.</li> |
| <li><strong>Deployment Efficiency (ONNX):</strong> Converted scoring components to ONNX for faster, lighter inference and reduced latency.</li> |
| </ul> |
| </div> |
| ''', |
| unsafe_allow_html=True |
| ) |
| st.markdown('</div>', unsafe_allow_html=True) |
|
|
| |
| |
| |
| |
| title_html = '<h3>🏆 Final 5-Day Daily Model Leaderboard</h3>' |
|
|
| if 'leaderboard_df' in globals() and not leaderboard_df.empty: |
| |
| display_df = leaderboard_df.sort_values(by='RMSE (Absolute Error)').reset_index(drop=True) |
| display_df.index = range(1, len(display_df) + 1) |
|
|
| |
| |
| |
| raw_height = 56 + len(display_df) * 48 + 40 |
| estimated_height = int(max(360, raw_height * 1.2)) |
| placeholder = st.empty() |
| placeholder.markdown(f'<div style="min-height:{estimated_height}px;"></div>', unsafe_allow_html=True) |
|
|
| |
| cols = list(display_df.columns) |
| |
| header_cells = '<div class="static-row static-header">' |
| header_cells += '<div class="static-cell small">#</div>' |
| for c in cols: |
| header_cells += f'<div class="static-cell">{c}</div>' |
| header_cells += '</div>' |
|
|
| |
| body_rows = '' |
| for idx, row in display_df.iterrows(): |
| body_rows += '<div class="static-row">' |
| body_rows += f'<div class="static-cell small">{idx}</div>' |
| for c in cols: |
| val = row[c] |
| |
| try: |
| if pd.isna(val): |
| cell = '--' |
| elif isinstance(val, (float, np.floating)): |
| cell = f'{val:.3f}' if abs(val) < 1000 else f'{val:.0f}' |
| else: |
| cell = str(val) |
| except Exception: |
| cell = str(val) |
| body_rows += f'<div class="static-cell">{cell}</div>' |
| body_rows += '</div>' |
|
|
| static_html = f'<div class="leaderboard-card">{header_cells}{body_rows}</div>' |
| full_html = f'<div class="leaderboard-container">{title_html}{static_html}</div>' |
| |
| placeholder.markdown(full_html, unsafe_allow_html=True) |
| else: |
| placeholder = st.empty() |
| placeholder.markdown('<div style="min-height:360px;"></div>', unsafe_allow_html=True) |
| empty_html = f'<div class="leaderboard-container">{title_html}<div style="padding:18px;color:#666;">Could not load leaderboard data.</div></div>' |
| placeholder.markdown(empty_html, unsafe_allow_html=True) |
|
|
|
|
| |
|
|
| |
| with tab2: |
| |
| emoji = _time_of_day_emoji() |
| st.markdown(f""" |
| <div class="tab-header" style="display:flex;justify-content:space-between;align-items:center;position:relative;"> |
| <h1 style="margin:0">🔮 Live 5-Day Forecast</h1> |
| <div class="tab-emoji">{emoji}</div> |
| </div> |
| """, unsafe_allow_html=True) |
|
|
| |
| if models is None: |
| try: |
| with st.spinner("Loading champion models..."): |
| models = load_champion_models() |
| except Exception as e: |
| st.warning(f"Could not load champion models immediately: {e}") |
|
|
| |
| st.subheader("Forecast Input") |
| selected_date = None |
| |
| if not X_test.empty: |
| min_date = X_test.index.min() |
| max_date = X_test.index.max() |
| |
| selected_date = st.date_input( |
| "Select a date from the test set:", |
| value=min_date, |
| min_value=min_date, |
| max_value=max_date, |
| format="YYYY-MM-DD" |
| ) |
| else: |
| st.error("Test data could not be loaded.") |
| |
| st.divider() |
|
|
| |
| if selected_date and not X_test.empty and models: |
| st.header(f"📍 5-Day Forecast from: {selected_date.strftime('%Y-%m-%d')}") |
|
|
| |
| selected_date_ts = pd.Timestamp(selected_date) |
| |
| |
| if selected_date_ts in X_test.index: |
| input_features = X_test.loc[[selected_date_ts]] |
| else: |
| st.error("Data not found for the selected date in X_test.") |
| input_features = pd.DataFrame() |
|
|
| if input_features.empty: |
| st.error("Data not found for the selected date.") |
| else: |
| |
| predictions = [] |
| for i in range(5): |
| model = models[i] |
| pred = model.predict(input_features)[0] |
| predictions.append(pred) |
| |
| |
| forecast_dates = pd.date_range(start=selected_date, periods=6, freq='D')[1:] |
| cols = st.columns(5) |
| |
| |
| actual_values = [] |
| if selected_date_ts in all_data_df.index: |
| actual_row = all_data_df.loc[selected_date_ts] |
| for col_name in TARGET_COLS: |
| actual_values.append(actual_row[col_name]) |
| else: |
| actual_values = [float('nan')] * 5 |
| |
| is_partial_forecast = any(pd.isna(v) for v in actual_values) |
|
|
| for i in range(5): |
| with cols[i]: |
| actual_val = actual_values[i] |
| delta_text = f"Actual: {actual_val:.1f}°C" if pd.notna(actual_val) else "Actual: --" |
|
|
| st.metric( |
| label=f"Forecast for {forecast_dates[i].strftime('%b %d')}", |
| value=f"{predictions[i]:.1f}°C", |
| delta=delta_text, |
| delta_color="off" |
| ) |
| |
| |
| st.subheader("Forecast Insights (Why?)") |
| |
| |
| temp_lag_1 = input_features['temp_lag_1'].iloc[0] |
| precip_today = input_features['precip'].iloc[0] |
| |
| |
| if temp_lag_1 > 30: |
| st.info(f"💡 Insight: Yesterday was very hot ({temp_lag_1:.1f}°C). The model is using this strong 'persistence' signal for tomorrow's forecast.") |
| elif temp_lag_1 < 25: |
| st.info(f"💡 Insight: Yesterday was cool ({temp_lag_1:.1f}°C). This is likely pulling the initial forecast down.") |
| |
| if precip_today > 10: |
| st.info(f"💡 Insight: The selected day had {precip_today:.1f}mm of rain. This humidity and cloud cover is factored into the forecast.") |
| elif 'temp_lag_1' not in locals() or (temp_lag_1 >= 25 and temp_lag_1 <= 30): |
| st.info("💡 Insight: Weather conditions appear stable. The forecast is primarily driven by seasonal trends and recent temperature history.") |
| |
|
|
| |
| st.markdown("---") |
| |
| with st.expander("🔍 Feature Inspector: What the Model Saw on this Day"): |
|
|
| if not input_features.empty: |
| |
| col1, col2, col3 = st.columns(3) |
|
|
| |
| with col1: |
| st.subheader("Core Conditions") |
| st.metric(label="🌡️ Today's Avg Temp (temp)", value=f"{input_features['temp'].iloc[0]:.1f}°C") |
| st.metric(label="🌡️ Today's 'Feels Like' (feelslike)", value=f"{input_features['feelslike'].iloc[0]:.1f}°C") |
| st.metric(label="💧 Humidity", value=f"{input_features['humidity'].iloc[0]:.1f}%") |
| st.metric(label="☁️ Cloud Cover", value=f"{input_features['cloudcover'].iloc[0]:.1f}%") |
| st.metric(label="🌧️ Precipitation", value=f"{input_features['precip'].iloc[0]:.1f} mm") |
|
|
| |
| with col2: |
| st.subheader("Recent History") |
| st.metric(label="🌡️ Temp Yesterday (temp_lag_1)", value=f"{input_features['temp_lag_1'].iloc[0]:.1f}°C") |
| st.metric(label="📈 7-Day Avg Temp (temp_roll_7d_mean)", value=f"{input_features['temp_roll_7d_mean'].iloc[0]:.1f}°C") |
| |
| |
| |
| st.metric(label="🌧️ 7-Day Total Rainfall (precip_roll_7d_sum)", value=f"{input_features['precip_roll_7d_sum'].iloc[0]:.1f} mm") |
| |
| st.metric(label="📊 14-Day Temp Volatility (temp_roll_14d_std)", value=f"{input_features['temp_roll_14d_std'].iloc[0]:.2f}°C") |
|
|
| |
| with col3: |
| st.subheader("Seasonal Context") |
| st.metric(label="📅 Day of Year", value=f"{input_features['day_of_year'].iloc[0]}") |
| st.metric(label="🔻 Sea Level Pressure", value=f"{input_features['sealevelpressure'].iloc[0]:.1f} hPa") |
| st.metric(label="💨 Wind Speed", value=f"{input_features['windspeed'].iloc[0]:.1f} km/h") |
| st.metric(label="🧭 Wind Direction", value=f"{input_features['winddir'].iloc[0]:.0f}°") |
|
|
| else: |
| st.warning("No feature data available for the selected date.") |
| |
|
|
| |
| st.subheader("Training Set Overview") |
| with st.expander("Show plot of all training data (before 2024-02-18)"): |
| train_end_date = pd.Timestamp(TEST_START_DATE) - pd.Timedelta(days=1) |
| train_df = all_data_df.loc[:train_end_date][CURRENT_TEMP_COL] |
| |
| fig_train = go.Figure() |
| fig_train.add_trace(go.Scatter( |
| x=train_df.index, y=train_df, |
| mode='lines', name='Training Data (Actual)', |
| line=dict(color='#005aa7', width=1) |
| )) |
| fig_train.update_layout( |
| title="Actual Temperature - Full Training Set", |
| xaxis_title="Date", yaxis_title="Temperature (°C)", |
| template="plotly_white", |
| xaxis_rangeslider_visible=True, |
| yaxis_fixedrange=True |
| ) |
| st.plotly_chart(fig_train, use_container_width=True) |
|
|
| |
| st.subheader("Historical Context & Forecast") |
| |
| history_start = selected_date_ts - pd.Timedelta(days=14) |
| history_end = selected_date_ts |
| history_df = all_data_df.loc[history_start:history_end][CURRENT_TEMP_COL] |
| |
| forecast_df = pd.DataFrame({ |
| 'Date': forecast_dates, |
| 'Forecast': predictions |
| }).set_index('Date') |
| |
| fig = go.Figure() |
| fig.add_trace(go.Scatter( |
| x=history_df.index, y=history_df, |
| mode='lines+markers', name='Past 14 Days (Actual)', |
| line=dict(color='blue') |
| )) |
| fig.add_trace(go.Scatter( |
| x=forecast_df.index, y=forecast_df['Forecast'], |
| mode='lines+markers', name='5-Day Forecast', |
| line=dict(color='red', dash='dot') |
| )) |
| fig.update_layout( |
| title="Forecast vs. Historical Context", |
| xaxis_title="Date", yaxis_title="Temperature (°C)", |
| template="plotly_white", legend=dict(x=0.01, y=0.99) |
| ) |
| st.plotly_chart(fig, use_container_width=True) |
|
|
| |
| st.subheader("5-Day Forecast vs. Actual Comparison") |
| |
| fig_comp = go.Figure() |
| |
| |
| fig_comp.add_trace(go.Scatter( |
| x=forecast_dates, y=predictions, |
| mode='lines+markers', name='5-Day Forecast', |
| line=dict(color='red', dash='dot') |
| )) |
| |
| |
| if not is_partial_forecast: |
| fig_comp.add_trace(go.Scatter( |
| x=forecast_dates, y=actual_values, |
| mode='lines+markers', name='5-Day Actual', |
| line=dict(color='blue') |
| )) |
| fig_comp.update_layout(title="5-Day Forecast vs. Actual Values") |
| else: |
| |
| fig_comp.update_layout(title="5-Day Forecast (Actual data not yet available)") |
|
|
| |
| fig_comp.update_layout( |
| xaxis_title="Date", yaxis_title="Temperature (°C)", |
| template="plotly_white", legend=dict(x=0.01, y=0.99) |
| ) |
| st.plotly_chart(fig_comp, use_container_width=True) |
| |
|
|
| else: |
| |
| if not selected_date: |
| st.warning("Test data could not be loaded.") |
| else: |
| st.warning("Please wait... Loading data or models.") |
|
|
| |
|
|
| |
| with tab3: |
| |
| emoji = _time_of_day_emoji() |
| st.markdown(f""" |
| <div class="tab-header" style="display:flex;justify-content:space-between;align-items:center;position:relative;"> |
| <h1 style="margin:0">📈 Model Performance & Diagnostics</h1> |
| <div class="tab-emoji">{emoji}</div> |
| </div> |
| """, unsafe_allow_html=True) |
| |
| if not perf_df.empty and not y_test.empty: |
| st.subheader("Performance Degradation over 5 Days") |
| st.markdown("How model performance changes as the forecast horizon increases.") |
| |
| MODEL_NAME = 'Champion (Stacking)' |
| champion_perf_df = perf_df[perf_df['Model'] == MODEL_NAME].copy() |
|
|
| |
| RMSE_COL_NAME = 'RMSE (Absolute Error)' |
| R2_COL_NAME = 'R-squared' |
| |
| col1, col2 = st.columns(2) |
| with col1: |
| fig_rmse = diag.plot_performance_degradation( |
| champion_perf_df, |
| metric_column=RMSE_COL_NAME, |
| metric_name='RMSE (Temperature °C)', |
| color='blue' |
| ) |
| st.plotly_chart(fig_rmse, use_container_width=True) |
| with col2: |
| fig_r2 = diag.plot_performance_degradation( |
| champion_perf_df, |
| metric_column=R2_COL_NAME, |
| metric_name='R-squared (R²)', |
| color='green' |
| ) |
| st.plotly_chart(fig_r2, use_container_width=True) |
| |
| |
| st.subheader("Interactive Forecast vs. Actual Comparison") |
| |
| |
| selected_horizon = st.slider( |
| "Select Forecast Horizon (Day) to inspect:", |
| 1, 5, 1 |
| ) |
| |
| |
| @st.cache_data |
| def get_full_test_predictions(_models, _X_test): |
| """Run predictions on the entire test set and cache the results.""" |
| all_preds = {} |
| for i in range(5): |
| model = _models[i] |
| preds = model.predict(_X_test) |
| all_preds[f'Day {i+1}'] = preds |
| return pd.DataFrame(all_preds, index=_X_test.index) |
|
|
| with st.spinner("Running predictions on entire test set... (This is cached for next time)"): |
| y_pred_test = get_full_test_predictions(models, X_test) |
|
|
| |
| y_true_selected = y_test[f'Day {selected_horizon}'] |
| y_pred_selected = y_pred_test[f'Day {selected_horizon}'] |
|
|
| |
| fig_interactive = diag.plot_forecast_vs_actual( |
| y_true=y_true_selected, |
| y_pred=y_pred_selected, |
| day_ahead_title=f"Day {selected_horizon} Forecast" |
| ) |
| st.plotly_chart(fig_interactive, use_container_width=True) |
| |
|
|
|
|
| |
| with st.expander("Champion Model Diagnostics (Deep Dive)"): |
| st.markdown("Detailed analysis of residuals (error = actual - predicted) for the Day 1 forecast.") |
| |
| y_true_d1 = y_test['Day 1'] |
| y_pred_d1 = y_pred_test['Day 1'] |
| dates_d1 = y_test.index |
| |
| fig_res_time = diag.plot_residuals_vs_time( |
| y_true_d1, y_pred_d1, dates_d1, "Day 1" |
| ) |
| st.plotly_chart(fig_res_time, use_container_width=True) |
| |
| fig_res_dist = diag.plot_residuals_distribution( |
| y_true_d1, y_pred_d1, "Day 1" |
| ) |
| st.plotly_chart(fig_res_dist, use_container_width=True) |
| st.markdown("A good model will have residuals (errors) normally distributed (bell curve) " |
| "around 0 and show no pattern over time.") |
| |
| else: |
| st.warning("Loading performance data...") |
|
|
| |
| with tab4: |
| emoji = _time_of_day_emoji() |
| st.markdown(f""" |
| <div class="tab-header" style="display:flex;justify-content:space-between;align-items:center;position:relative;"> |
| <h1 style="margin:0">🕒 Hourly Prediction (Next 24 Hours)</h1> |
| <div class="tab-emoji">{emoji}</div> |
| </div> |
| """, unsafe_allow_html=True) |
|
|
| st.subheader("Forecast Start Time") |
| |
| if not X_test_hourly.empty: |
| min_ts = X_test_hourly.index.min() |
| max_ts = X_test_hourly.index.max() |
| |
| |
| selected_date = st.date_input( |
| "Select the date:", |
| value=max_ts.date(), |
| min_value=min_ts.date(), |
| max_value=max_ts.date(), |
| format="YYYY-MM-DD", |
| key="hourly_date_input" |
| ) |
| |
| |
| available_hours_in_day = X_test_hourly[X_test_hourly.index.date == selected_date].index.hour.unique().sort_values() |
| |
| if available_hours_in_day.empty: |
| st.warning(f"No hourly data found for {selected_date}. Please select a different date.") |
| st.stop() |
| |
| |
| default_hour = available_hours_in_day.max() |
| default_hour_index = available_hours_in_day.get_loc(default_hour) |
|
|
| selected_hour = st.selectbox( |
| "Select the latest known hour:", |
| options=available_hours_in_day.tolist(), |
| index=default_hour_index, |
| format_func=lambda x: f"{x:02d}:00:00" |
| ) |
| |
| |
| latest_time_for_day = pd.to_datetime(f"{selected_date} {selected_hour:02d}:00:00") |
| |
| |
| input_features_hourly = X_test_hourly.loc[[latest_time_for_day]] |
| |
| st.info(f"The model runs based on data up to the latest known hour: **{latest_time_for_day.strftime('%Y-%m-%d %H:%M:%S')}**") |
| st.divider() |
|
|
| |
| predictions_24h = predict_next_24_hours(input_features_hourly, hourly_models_24h) |
|
|
| |
| precomputed_df = load_precomputed_hourly_forecasts() |
|
|
| |
| try: |
| loaded_hourly_models_count = len(hourly_models_24h) if isinstance(hourly_models_24h, dict) else (len(hourly_models_24h) if hasattr(hourly_models_24h, '__len__') else 0) |
| except Exception: |
| loaded_hourly_models_count = 0 |
|
|
| |
| used_precomputed = False |
| if loaded_hourly_models_count < 24 and not precomputed_df.empty: |
| if latest_time_for_day in precomputed_df.index: |
| row = precomputed_df.loc[latest_time_for_day] |
| |
| nums = [v for v in row.values if isinstance(v, (int, float)) and not pd.isna(v)] |
| if len(nums) >= 24: |
| predictions_24h = [float(nums[i]) for i in range(24)] |
| used_precomputed = True |
| st.info("Using precomputed 120h forecast for this timestamp (deterministic fallback).") |
|
|
| |
| |
| |
| t_plus_24h_metric_value = predictions_24h[23] if len(predictions_24h) >= 24 else (predictions_24h[-1] if predictions_24h else float('nan')) |
| |
| |
| st.subheader(f"Summary Forecast for Next Day (Starting {latest_time_for_day.strftime('%H:%M')})") |
| |
| forecast_start_ts = latest_time_for_day + pd.Timedelta(hours=1) |
| |
| |
| t_plus_2h_value = predictions_24h[1] if len(predictions_24h) >= 2 else float('nan') |
| t_plus_3h_value = predictions_24h[2] if len(predictions_24h) >= 3 else float('nan') |
| |
| |
| avg_temp = np.nanmean(predictions_24h) |
| max_temp = np.nanmax(predictions_24h) |
| |
|
|
| |
| |
| actual_hourly_index = pd.date_range(start=forecast_start_ts, periods=24, freq='H') |
| |
| |
| try: |
| |
| actual_temps_24h_series = hourly_data_df['temp'].reindex(actual_hourly_index) |
| except Exception: |
| |
| actual_temps_24h_series = pd.Series([float('nan')] * 24, index=actual_hourly_index) |
| |
| |
| forecast_t2_ts = forecast_start_ts + pd.Timedelta(hours=1) |
| forecast_t3_ts = forecast_start_ts + pd.Timedelta(hours=2) |
| forecast_t24_ts = forecast_start_ts + pd.Timedelta(hours=23) |
| |
| actual_t2_val = actual_temps_24h_series.get(forecast_t2_ts) |
| actual_t3_val = actual_temps_24h_series.get(forecast_t3_ts) |
| actual_t24_val = actual_temps_24h_series.get(forecast_t24_ts) |
| |
| |
| is_partial_hourly = actual_temps_24h_series.isna().any() |
| |
| if is_partial_hourly: |
| actual_avg_val = float('nan') |
| actual_max_val = float('nan') |
| else: |
| actual_avg_val = np.nanmean(actual_temps_24h_series) |
| actual_max_val = np.nanmax(actual_temps_24h_series) |
|
|
| |
| delta_t2 = f"Actual: {actual_t2_val:.1f}°C" if pd.notna(actual_t2_val) else "Actual: --" |
| delta_t3 = f"Actual: {actual_t3_val:.1f}°C" if pd.notna(actual_t3_val) else "Actual: --" |
| delta_t24 = f"Actual: {actual_t24_val:.1f}°C" if pd.notna(actual_t24_val) else "Actual: --" |
| delta_avg = f"Actual: {actual_avg_val:.1f}°C" if pd.notna(actual_avg_val) else "Actual: --" |
| delta_max = f"Actual: {actual_max_val:.1f}°C" if pd.notna(actual_max_val) else "Actual: --" |
| |
|
|
|
|
| |
| col_t2, col_t3, col_t24, col_avg, col_max = st.columns(5) |
| |
| |
| with col_t2: |
| st.metric( |
| label=f"Forecast @ {forecast_t2_ts.strftime('%H:%M')} (T+2H)", |
| value=f"{t_plus_2h_value:.1f}°C", |
| delta=delta_t2, |
| delta_color="off" |
| ) |
| |
| |
| with col_t3: |
| st.metric( |
| label=f"Forecast @ {forecast_t3_ts.strftime('%H:%M')} (T+3H)", |
| value=f"{t_plus_3h_value:.1f}°C", |
| delta=delta_t3, |
| delta_color="off" |
| ) |
| |
| |
| with col_t24: |
| st.metric( |
| label=f"Forecast @ {forecast_t24_ts.strftime('%H:%M')} (T+24H)", |
| value=f"{t_plus_24h_metric_value:.1f}°C", |
| delta=delta_t24, |
| delta_color="off" |
| ) |
|
|
| |
| with col_avg: |
| st.metric( |
| label="Next 24h Average Temp", |
| value=f"{avg_temp:.1f}°C", |
| delta=delta_avg, |
| delta_color="off" |
| ) |
| |
| |
| with col_max: |
| st.metric( |
| label="Next 24h Max Temp", |
| value=f"{max_temp:.1f}°C", |
| delta=delta_max, |
| delta_color="off" |
| ) |
| |
| |
| |
| |
| |
| st.subheader("Historical Context & Forecast (Hourly)") |
| |
| |
| history_start_ts = latest_time_for_day - pd.Timedelta(hours=23) |
| history_end_ts = latest_time_for_day |
| |
| |
| history_df_hourly = hourly_data_df.loc[history_start_ts:history_end_ts]['temp'] |
| |
| |
| forecast_hourly_index = pd.date_range(start=forecast_start_ts, periods=len(predictions_24h), freq='H') |
| forecast_df_hourly = pd.DataFrame({ |
| 'Time': forecast_hourly_index, |
| 'Forecast': predictions_24h |
| }).set_index('Time') |
| |
| |
| fig_hist_hourly = go.Figure() |
| fig_hist_hourly.add_trace(go.Scatter( |
| x=history_df_hourly.index, y=history_df_hourly, |
| mode='lines+markers', name='Past 24 Hours (Actual)', |
| line=dict(color='blue') |
| )) |
| fig_hist_hourly.add_trace(go.Scatter( |
| x=forecast_df_hourly.index, y=forecast_df_hourly['Forecast'], |
| mode='lines+markers', name='Next 24 Hours (Forecast)', |
| line=dict(color='red', dash='dot') |
| )) |
| fig_hist_hourly.update_layout( |
| title="Hourly Forecast vs. Historical Context", |
| xaxis_title="Time", yaxis_title="Temperature (°C)", |
| template="plotly_white", legend=dict(x=0.01, y=0.99) |
| ) |
| st.plotly_chart(fig_hist_hourly, use_container_width=True) |
|
|
| |
| st.subheader("24-Hour Forecast vs. Actual Comparison") |
| |
| |
| actual_values_24h = actual_temps_24h_series.values |
| is_partial_hourly_forecast = is_partial_hourly |
|
|
| fig_comp_hourly = go.Figure() |
| |
| |
| fig_comp_hourly.add_trace(go.Scatter( |
| x=forecast_hourly_index, y=predictions_24h, |
| mode='lines+markers', name='24-Hour Forecast', |
| line=dict(color='red', dash='dot') |
| )) |
| |
| if not is_partial_hourly_forecast: |
| fig_comp_hourly.add_trace(go.Scatter( |
| x=forecast_hourly_index, y=actual_values_24h, |
| mode='lines+markers', name='24-Hour Actual', |
| line=dict(color='blue') |
| )) |
| fig_comp_hourly.update_layout(title="24-Hour Forecast vs. Actual Values") |
| else: |
| |
| fig_comp_hourly.update_layout(title="24-Hour Forecast (Actual data not yet available)") |
| |
| |
| fig_comp_hourly.update_layout( |
| xaxis_title="Time", yaxis_title="Temperature (°C)", |
| template="plotly_white", legend=dict(x=0.01, y=0.99) |
| ) |
| st.plotly_chart(fig_comp_hourly, use_container_width=True) |
| |
| |
|
|
| |
| st.subheader("Model Reliability: Error Degradation") |
| if not hourly_perf_df.empty: |
| |
| |
| |
| |
| df_plot = hourly_perf_df.head(24) |
| |
| |
| fig_rmse_hourly = go.Figure() |
| fig_rmse_hourly.add_trace(go.Scatter( |
| x=df_plot['Horizon'], |
| y=df_plot['RMSE'], |
| mode='lines+markers', |
| name='RMSE', |
| line=dict(color='#005aa7') |
| )) |
| fig_rmse_hourly.update_layout( |
| title="RMSE Degradation: Forecast Error vs. Hour Ahead (T+1h to T+24h)", |
| xaxis_title="Forecast Horizon (Hours)", |
| yaxis_title="RMSE (°C)", |
| template="plotly_white", |
| yaxis_range=[0, df_plot['RMSE'].max() * 1.1 if not df_plot['RMSE'].empty else 1], |
| height=400 |
| ) |
| st.plotly_chart(fig_rmse_hourly, use_container_width=True) |
| else: |
| st.warning("Could not load Hourly RMSE Degradation data from hourly_120h_evaluation_results.csv.") |
|
|
| |
| st.markdown("---") |
| with st.expander("🔍 Feature Inspector: Hourly Inputs for the Forecast"): |
| if not input_features_hourly.empty: |
| important_hourly_features = [ |
| 'temp', 'humidity', 'windspeed', 'cloudcover', |
| 'temp_lag_1h', 'humidity_lag_24h', 'temp_diff_24h', |
| 'temp_roll_24h_mean', 'humidity_roll_24h_mean', |
| 'hour_sin', 'day_of_year_sin' |
| ] |
| col_h1, col_h2, col_h3 = st.columns(3) |
|
|
| |
| emoji_map = { |
| 'temp': '🌡️', 'humidity': '💧', 'windspeed': '💨', 'cloudcover': '☁️', |
| 'temp_lag_1h': '⏳', 'humidity_lag_24h': '📉', 'temp_diff_24h': '🔺', |
| 'temp_roll_24h_mean': '📈', 'humidity_roll_24h_mean': '📊', |
| 'hour_sin': '🕒', 'day_of_year_sin': '📅' |
| } |
|
|
| for i, feature in enumerate(important_hourly_features): |
| if feature in input_features_hourly.columns: |
| raw_value = input_features_hourly[feature].iloc[0] |
| prefix = emoji_map.get(feature, '') |
| label = f"{prefix} {feature.replace('_', ' ').title()}".strip() |
|
|
| formatted = _format_feature_value(feature, raw_value) |
| target_col = [col_h1, col_h2, col_h3][i % 3] |
| with target_col: |
| try: |
| st.metric(label=label, value=formatted) |
| except Exception: |
| st.write(f"{label}: {formatted}") |
| else: |
| st.warning("No hourly feature data available for the selected hour.") |
| |
| else: |
| st.warning("Please wait... Loading hourly data or models.") |
|
|
|
|
| |
| with tab5: |
| emoji = _time_of_day_emoji() |
| st.markdown(f""" |
| <div class="tab-header" style="display:flex;justify-content:space-between;align-items:center;position:relative;"> |
| <h1 style="margin:0">📡Live Weather & OpenWeather Forecast</h1> |
| <div class="tab-emoji">{emoji}</div> |
| </div> |
| """, unsafe_allow_html=True) |
|
|
| |
| api_key = None |
| try: |
| api_key = st.secrets["OPENWEATHER_API_KEY"] |
| except Exception: |
| api_key = None |
|
|
| if not api_key: |
| st.warning("OpenWeather API key not found. To enable live forecasts, add OPENWEATHER_API_KEY to .streamlit/secrets.toml") |
| else: |
| with st.spinner("Fetching live weather..."): |
| live = get_live_weather(api_key) |
| if live: |
| st.header("Live Weather in Ho Chi Minh City 🌆") |
| col1, col2, col3 = st.columns(3) |
| try: |
| col1.metric("🌡️ Current Temperature", f"{live['main']['temp']:.1f}°C") |
| col2.metric("💧 Humidity", f"{live['main']['humidity']}%") |
| |
| wind_kmh = live.get('wind', {}).get('speed', 0) * 3.6 |
| col3.metric("💨 Wind Speed", f"{wind_kmh:.1f} km/h") |
|
|
| |
| feels_like = live.get('main', {}).get('feels_like') |
| cloud_cover = None |
| try: |
| cloud_cover = live.get('clouds', {}).get('all') if isinstance(live.get('clouds'), dict) else None |
| except Exception: |
| cloud_cover = None |
|
|
| |
| precip_val = None |
| rain_block = live.get('rain') if isinstance(live.get('rain'), dict) else None |
| snow_block = live.get('snow') if isinstance(live.get('snow'), dict) else None |
| if rain_block: |
| precip_val = rain_block.get('1h') or rain_block.get('3h') |
| elif snow_block: |
| precip_val = snow_block.get('1h') or snow_block.get('3h') |
|
|
| |
| col4, col5, col6 = st.columns(3) |
| try: |
| col4.metric("🌡️ Feels Like", f"{feels_like:.1f}°C" if feels_like is not None else "--") |
| except Exception: |
| col4.metric("🌡️ Feels Like", "--") |
| try: |
| col5.metric("☁️ Cloud Cover", f"{cloud_cover:.0f}%" if cloud_cover is not None else "--") |
| except Exception: |
| col5.metric("☁️ Cloud Cover", "--") |
| try: |
| col6.metric("🌧️ Precipitation (1h)", f"{precip_val:.1f} mm" if precip_val is not None else "--") |
| except Exception: |
| col6.metric("🌧️ Precipitation (1h)", "--") |
|
|
| except Exception: |
| st.write(live) |
|
|
| st.markdown("---") |
| with st.spinner("Fetching 5-day forecast from OpenWeatherMap..."): |
| forecast_json = get_5_day_forecast(api_key) |
|
|
| if forecast_json and 'list' in forecast_json: |
| forecast_list = [] |
| for item in forecast_json['list']: |
| forecast_list.append({ |
| 'time': pd.to_datetime(item['dt'], unit='s'), |
| 'temp': item['main']['temp'] |
| }) |
| forecast_df = pd.DataFrame(forecast_list) |
|
|
| st.header("5-Day Forecast (OpenWeatherMap)") |
| fig_live = px.line(forecast_df, x='time', y='temp', title='Temperature Forecast for the Next 5 Days', markers=True) |
| st.plotly_chart(fig_live, use_container_width=True) |
| st.info("Note: This forecast is provided by OpenWeatherMap and is independent of our trained ML models.") |
|
|
| |
| |
| |
| |
| try: |
| if models and not all_data_df.empty: |
| with st.spinner("Generating model-based 5-day forecast using live features..."): |
| |
| live_summary = { |
| 'temp': live.get('main', {}).get('temp'), |
| 'feelslike': live.get('main', {}).get('feels_like'), |
| 'humidity': live.get('main', {}).get('humidity'), |
| 'clouds': live.get('clouds', {}).get('all') if isinstance(live.get('clouds'), dict) else None, |
| 'windspeed': wind_kmh, |
| } |
|
|
| |
| rain = live.get('rain', {}) if isinstance(live.get('rain', {}), dict) else {} |
| if '1h' in rain: |
| live_summary['precip'] = rain['1h'] |
| elif '3h' in rain: |
| live_summary['precip'] = rain['3h'] |
|
|
| |
| live_feature_vector = fe_live.create_live_feature_vector(live_summary, all_data_df) |
|
|
| |
| try: |
| live_feature_vector = live_feature_vector.reindex(columns=all_data_df.columns, fill_value=np.nan) |
| except Exception: |
| pass |
|
|
| |
| model_forecasts = [] |
| for i, mdl in enumerate(models): |
| try: |
| |
| X_in = _align_features_for_model(mdl, live_feature_vector, all_data_df.columns) |
| pred = mdl.predict(X_in)[0] |
| except Exception as e: |
| st.warning(f"Model for Day {i+1} prediction failed: {e}") |
| pred = float('nan') |
| model_forecasts.append(float(pred)) |
|
|
| |
| st.header("Our Model: 5-Day Forecast") |
| five_day_dates = [pd.Timestamp.now().normalize() + pd.Timedelta(days=i) for i in range(1, 6)] |
| fig_model = go.Figure() |
| fig_model.add_trace(go.Scatter(x=five_day_dates, y=model_forecasts, mode='lines+markers', name='Model Forecast', line=dict(color='#d62728'))) |
| fig_model.update_layout(title="Model 5-Day Forecast (Our Stacking Models)", xaxis_title="Date", yaxis_title="Temperature (°C)", template='plotly_white') |
| st.plotly_chart(fig_model, use_container_width=True) |
|
|
| |
| current_temp = live_summary.get('temp', float('nan')) |
| prev_day_temp = all_data_df['temp'].iloc[-1] if 'temp' in all_data_df.columns else float('nan') |
| st.subheader("Model 5-Day Outlook — Champion Stacking") |
|
|
| |
| try: |
| cards = st.columns(5) |
| for i, pred in enumerate(model_forecasts): |
| date = five_day_dates[i] |
| try: |
| delta_now = pred - current_temp if pd.notna(current_temp) else float('nan') |
| except Exception: |
| delta_now = float('nan') |
| try: |
| delta_prev = pred - prev_day_temp if pd.notna(prev_day_temp) else float('nan') |
| except Exception: |
| delta_prev = float('nan') |
|
|
| if pd.notna(pred): |
| if pred >= 30: |
| summary_text = "Hot and humid. Stay hydrated and watch for heat stress." |
| elif pred <= 20: |
| summary_text = "Cool conditions expected; light jacket recommended." |
| else: |
| summary_text = "Comfortable temperatures expected." |
| else: |
| summary_text = "Model could not produce a reliable prediction for this day." |
|
|
| card_html = f""" |
| <div style='background:#ffffff;border-radius:10px;padding:12px;border:1px solid #e6eef8;box-shadow:0 2px 8px rgba(3,37,76,0.04);'> |
| <div style='font-size:0.95rem;color:#004080;font-weight:700;margin-bottom:6px;'>{date.strftime('%A, %b %d')}</div> |
| <div style='font-size:28px;font-weight:800;color:#d62728;margin-bottom:6px;'>{pred:.1f}°C</div> |
| <div style='font-size:0.85rem;color:#333;margin-bottom:6px;'> |
| <span style='color:#2b8a3e;font-weight:600;'>{delta_now:+.1f}° vs now</span> |
| | |
| <span style='color:#1f77b4;font-weight:600;'>{delta_prev:+.1f}° vs prev</span> |
| </div> |
| <div style='font-size:0.85rem;color:#555;margin-top:6px;'>{summary_text}</div> |
| </div> |
| """ |
|
|
| with cards[i]: |
| st.markdown(card_html, unsafe_allow_html=True) |
| except Exception: |
| |
| st.subheader("Daily Outlook (Model)") |
| for i, pred in enumerate(model_forecasts): |
| date = five_day_dates[i] |
| st.write(f"{date.strftime('%A, %b %d')} — Predicted Avg: {pred:.1f}°C") |
| else: |
| st.info("Model-based forecast unavailable: champion models or historical data not found.") |
| except Exception as e: |
| st.warning(f"Could not run model-based forecast: {e}") |
| else: |
| st.warning("Could not retrieve 5-day forecast from OpenWeatherMap.") |