import gradio as gr import pandas as pd import plotly.express as px import requests from bs4 import BeautifulSoup import csv from datetime import datetime import calendar color_map = { "Shek Pik": "blue", "Quarry Bay": "red" } def get_end_date_from_month(month_str): try: dt = datetime.strptime(month_str, "%Y-%m") except ValueError: raise ValueError("Invalid format. Please use YYYY-MM (e.g., '2023-07')") last_day = calendar.monthrange(dt.year, dt.month)[1] return dt.year, dt.month, f"{dt.year}-{dt.month:02d}-{last_day:02d}" def fetch_measured_data(station_name, endtime, period="30"): station_codes = {"Quarry Bay": "quar", "Shek Pik": "shek"} code = station_codes.get(station_name) if not code: raise ValueError(f"Invalid station name: {station_name}") if len(endtime) == 10: endtime_full = endtime + " 23:59:59" else: endtime_full = endtime url = f"https://www.ioc-sealevelmonitoring.org/bgraph.php?code={code}&output=tab&period={period}&endtime={endtime_full}" try: response = requests.get(url) response.raise_for_status() except requests.RequestException as e: raise RuntimeError(f"Error fetching data: {e}") soup = BeautifulSoup(response.text, 'html.parser') table = soup.find('table') if not table: raise ValueError(f"No data table found in HTML for station {station_name} at {endtime_full}") rows = table.find_all('tr') data = [[col.get_text(strip=True) for col in row.find_all(['td', 'th'])] for row in rows] output_csv = f"{code}_tide_data.csv" with open(output_csv, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) writer.writerows(data) return output_csv def load_measured_csv(file_path, station_name): df = pd.read_csv(file_path) df.columns = df.columns.str.strip() df['Time (UTC)'] = pd.to_datetime(df['Time (UTC)'], errors='coerce') df = df.dropna(subset=['Time (UTC)']) df['Time (UTC+8)'] = df['Time (UTC)'].dt.tz_localize('UTC').dt.tz_convert('Asia/Hong_Kong') df['Station'] = station_name return df[['Time (UTC+8)', 'flt(m)', 'Station']].rename(columns={'flt(m)': 'Measured'}) def fetch_hko_tide_data(url, station_name, year): try: response = requests.get(url) response.raise_for_status() except requests.RequestException: return None soup = BeautifulSoup(response.text, 'html.parser') rows = soup.find_all('tr')[1:] data = [] for row in rows: cols = [td.get_text(strip=True) for td in row.find_all(['td', 'th'])] if len(cols) >= 26: mm, dd = cols[0], cols[1] for hour in range(24): tide_str = cols[hour + 2] if tide_str == '': continue try: tide = float(tide_str) dt = datetime(year, int(mm), int(dd), hour) data.append({'Datetime': dt, 'Tide Height (m)': tide, 'Station': station_name}) except ValueError: continue return pd.DataFrame(data) def tide_analysis_for_month_gradio(month_str): logs = [] if not month_str: return "Please enter a month in YYYY-MM format.", None, None, None try: logs.append(f"Parsing input month: {month_str}") year, month, end_date = get_end_date_from_month(month_str) logs.append(f"End date calculated: {end_date}") # Fetch measured data logs.append("Fetching measured data for Shek Pik...") file_shek = fetch_measured_data("Shek Pik", end_date) logs.append("Fetching measured data for Quarry Bay...") file_quar = fetch_measured_data("Quarry Bay", end_date) logs.append("Loading and processing measured CSV data...") df_shek = load_measured_csv(file_shek, "Shek Pik") df_quar = load_measured_csv(file_quar, "Quarry Bay") df_measured = pd.concat([df_shek, df_quar], ignore_index=True) min_time = df_measured['Time (UTC+8)'].min() max_time = df_measured['Time (UTC+8)'].max() logs.append(f"Measured data range: {min_time} to {max_time}") # Fetch predicted tide data logs.append("Fetching predicted tide data from HKO...") url_quar = f"https://www.hko.gov.hk/tide/QUBtextPH{year}.htm" url_shek = f"https://www.hko.gov.hk/tide/SPWtextPH{year}.htm" df_pred_quar = fetch_hko_tide_data(url_quar, "Quarry Bay", year) df_pred_shek = fetch_hko_tide_data(url_shek, "Shek Pik", year) if df_pred_quar is None or df_pred_shek is None: logs.append("Failed to fetch predicted tide data.") return "\n".join(logs), None, None, None logs.append("Processing predicted tide data...") df_pred = pd.concat([df_pred_quar, df_pred_shek], ignore_index=True) df_pred['Time (UTC+8)'] = pd.to_datetime(df_pred['Datetime']).dt.tz_localize('Asia/Hong_Kong') df_pred = df_pred.rename(columns={'Tide Height (m)': 'Predicted'}) df_pred = df_pred[(df_pred['Time (UTC+8)'] >= min_time) & (df_pred['Time (UTC+8)'] <= max_time)] logs.append("Generating plot for predicted tide...") fig_pred = px.line(df_pred, x='Time (UTC+8)', y='Predicted', color='Station', title='Predicted Tide', labels={'Predicted': 'Tide Height (m)', 'Time (UTC+8)': 'Time (UTC+8)'}, color_discrete_map=color_map) fig_pred.update_traces(mode='lines+markers') logs.append("Generating plot for measured tide...") fig_meas = px.line(df_measured, x='Time (UTC+8)', y='Measured', color='Station', title='Measured Tide', labels={'Measured': 'Tide Height (m)', 'Time (UTC+8)': 'Time (UTC+8)'}, color_discrete_map=color_map) fig_meas.update_traces(mode='lines+markers') logs.append("Calculating and plotting residuals...") df_merged = pd.merge(df_measured, df_pred[['Time (UTC+8)', 'Predicted', 'Station']], on=['Time (UTC+8)', 'Station'], how='inner') df_merged['Residual'] = df_merged['Measured'] - df_merged['Predicted'] fig_resid = px.line(df_merged, x='Time (UTC+8)', y='Residual', color='Station', title='Tide Residuals (Measured - Predicted)', labels={'Residual': 'Residual (m)', 'Time (UTC+8)': 'Time (UTC+8)'}, color_discrete_map=color_map) fig_resid.update_traces(mode='lines+markers') logs.append("Analysis completed successfully.") return "\n".join(logs), fig_pred, fig_meas, fig_resid except Exception as e: logs.append(f"Error during processing: {e}") return "\n".join(logs), None, None, None with gr.Blocks() as demo: gr.Markdown("## Tide Time Series Analysis by Month") # --- First Row: Controls --- with gr.Row(): month_input = gr.Textbox(label="Enter Month (YYYY-MM)", placeholder="e.g. 2023-07") run_btn = gr.Button("Run Analysis") # --- Sample Storm Surge Buttons (small and inline) --- gr.Markdown("#### Sample Storm Surge Months") with gr.Row(): sample_1 = gr.Button("2025-07 (Wipha)", scale=1) sample_2 = gr.Button("2021-10 (Lionrock)", scale=1) sample_3 = gr.Button("2022-08 (Ma-on)", scale=1) sample_4 = gr.Button("2022-11 (Nalgae)", scale=1) # --- Second Row: Plot Area --- with gr.Row(): with gr.Column(): with gr.Row(): plot_meas = gr.Plot(label="Measured Tide") plot_resid = gr.Plot(label="Residuals") with gr.Row(): plot_pred = gr.Plot(label="Predicted Tide") status_output = gr.Textbox(label="Status / Error", interactive=False, lines=1) # --- Main Run Button Action --- run_btn.click(fn=tide_analysis_for_month_gradio, inputs=month_input, outputs=[status_output, plot_pred, plot_meas, plot_resid]) # --- Sample Buttons Actions --- sample_1.click(fn=lambda: "2025-07", inputs=[], outputs=month_input).then( fn=tide_analysis_for_month_gradio, inputs=month_input, outputs=[status_output, plot_pred, plot_meas, plot_resid] ) sample_2.click(fn=lambda: "2021-10", inputs=[], outputs=month_input).then( fn=tide_analysis_for_month_gradio, inputs=month_input, outputs=[status_output, plot_pred, plot_meas, plot_resid] ) sample_3.click(fn=lambda: "2022-08", inputs=[], outputs=month_input).then( fn=tide_analysis_for_month_gradio, inputs=month_input, outputs=[status_output, plot_pred, plot_meas, plot_resid] ) sample_4.click(fn=lambda: "2022-11", inputs=[], outputs=month_input).then( fn=tide_analysis_for_month_gradio, inputs=month_input, outputs=[status_output, plot_pred, plot_meas, plot_resid] ) if __name__ == "__main__": demo.launch()