| import streamlit as st |
| import pandas as pd |
| import polars as pl |
| import requests |
| from datetime import datetime, timedelta |
| import plotly.express as px |
|
|
| |
| data = { |
| 'city': ['Rexburg', 'Rexburg', 'Rexburg', 'Provo', 'Provo', 'Laie', 'Laie'], |
| 'date': ['2024-07-01', '2024-07-01', '2024-07-02', '2024-07-01', '2024-07-01', '2024-07-01', '2024-07-01'], |
| 'hour': [0, 1, 0, 0, 1, 0, 1], |
| 'temperature': [15, 14, 16, 20, 19, 25, 24] |
| } |
|
|
| |
| df = pl.DataFrame(data) |
|
|
| |
| df = df.with_columns(pl.col("date").str.strptime(pl.Date, "%Y-%m-%d")) |
|
|
| |
|
|
| |
| locations = { |
| "Rexburg": {"latitude": 43.8260, "longitude": -111.7897}, |
| "Provo": {"latitude": 40.2338, "longitude": -111.6585}, |
| "Laie": {"latitude": 21.6478, "longitude": -157.9234} |
| } |
|
|
| |
| def get_historical_forecast_data(location, latitude, longitude, start_date, end_date): |
| api_url = "https://api.open-meteo.com/v1/forecast" |
| params = { |
| "latitude": latitude, |
| "longitude": longitude, |
| "start_date": start_date, |
| "end_date": end_date, |
| "hourly": "temperature_2m", |
| "timezone": "America/Denver" |
| } |
| |
| response = requests.get(api_url, params=params) |
| data = response.json() |
| |
| |
| hourly_data = data['hourly'] |
| timestamps = hourly_data['time'] |
| temperatures = hourly_data['temperature_2m'] |
| |
| |
| historical_forecast_df = pl.DataFrame({ |
| "location": location, |
| "timestamp": timestamps, |
| "temperature_2m": temperatures, |
| "data_type": "historical forecast" |
| }) |
| |
| return historical_forecast_df |
|
|
| |
| start_date = "2024-06-01" |
| end_date = "2024-07-15" |
|
|
| |
| forecast_dfs = [get_historical_forecast_data(loc, info['latitude'], info['longitude'], start_date, end_date) for loc, info in locations.items()] |
| forecast_combined_df = pl.concat(forecast_dfs) |
|
|
| |
| forecast_combined_df = forecast_combined_df.with_columns([ |
| pl.col("timestamp").str.strptime(pl.Datetime).alias("datetime"), |
| pl.col("timestamp").str.strptime(pl.Datetime).dt.date().alias("date"), |
| pl.col("timestamp").str.strptime(pl.Datetime).dt.weekday().alias("day_of_week"), |
| pl.col("timestamp").str.strptime(pl.Datetime).dt.hour().alias("hour_of_day"), |
| (pl.col("temperature_2m") * (9 / 5) + 32).alias("temperature_2m") |
| ]) |
|
|
| |
| forecast_combined_df = forecast_combined_df.select([ |
| "location", "datetime", "date", "day_of_week", "hour_of_day", "temperature_2m", "data_type" |
| ]) |
|
|
|
|
|
|
| |
|
|
| locations = { |
| "Rexburg": {"latitude": 43.8260, "longitude": -111.7897}, |
| "Provo": {"latitude": 40.2338, "longitude": -111.6585}, |
| "Laie": {"latitude": 21.6478, "longitude": -157.9234} |
| } |
|
|
| |
| def get_historical_weather_data(location, latitude, longitude, start_date, end_date): |
| api_url = "https://api.open-meteo.com/v1/forecast" |
| params = { |
| "latitude": latitude, |
| "longitude": longitude, |
| "start_date": start_date, |
| "end_date": end_date, |
| "hourly": "temperature_2m,dewpoint_2m,wind_gusts_10m,visibility,cloudcover,precipitation_probability,relative_humidity_2m,sunshine_duration,vapour_pressure_deficit,rain,soil_temperature_0_to_7cm", |
| "timezone": "America/Denver" |
| } |
| |
| response = requests.get(api_url, params=params) |
| data = response.json() |
| |
| |
| if 'hourly' not in data: |
| raise KeyError(f"'hourly' key not found in API response for {location}") |
| |
| |
| hourly_data = data['hourly'] |
| timestamps = hourly_data['time'] |
| temperatures = hourly_data.get('temperature_2m', []) |
| dewpoints = hourly_data.get('dewpoint_2m', []) |
| wind_gusts = hourly_data.get('wind_gusts_10m', []) |
| visibility = hourly_data.get('visibility', []) |
| cloud_cover = hourly_data.get('cloudcover', []) |
| precipitation_prob = hourly_data.get('precipitation_probability', []) |
| relative_humidity = hourly_data.get('relative_humidity_2m', []) |
| sunshine_duration = hourly_data.get('sunshine_duration', []) |
| vapor_pressure = hourly_data.get('vapour_pressure_deficit',[]) |
| rain = hourly_data.get('rain', []) |
| soil_temp = hourly_data.get('soil_temperature_0_to_7cm',[]) |
| |
| |
| historical_weather_df = pl.DataFrame({ |
| "location": location, |
| "timestamp": timestamps, |
| "temperature_2m": temperatures, |
| "dewpoint_2m": dewpoints, |
| "wind_gusts_10m": wind_gusts, |
| "visibility": visibility, |
| "cloudcover": cloud_cover, |
| "precipitation_probability": precipitation_prob, |
| "relative_humidity_2m": relative_humidity, |
| "sunshine_duration": sunshine_duration, |
| "vapor_pressure": vapor_pressure, |
| "rain": rain, |
| "soil_temp": soil_temp, |
| "data_type": "historical" |
| }) |
| |
| return historical_weather_df |
|
|
| |
| start_date = "2024-06-01" |
| end_date = "2024-07-15" |
|
|
| |
| data_frames = [] |
| for loc, info in locations.items(): |
| try: |
| df = get_historical_weather_data(loc, info['latitude'], info['longitude'], start_date, end_date) |
| data_frames.append(df) |
| except KeyError as e: |
| print(e) |
| continue |
|
|
| if data_frames: |
| combined_df = pl.concat(data_frames) |
| else: |
| raise ValueError("No data fetched for any location.") |
|
|
| |
| combined_df = combined_df.with_columns([ |
| pl.col("timestamp").str.strptime(pl.Datetime).alias("datetime"), |
| pl.col("timestamp").str.strptime(pl.Datetime).dt.date().alias("date"), |
| pl.col("timestamp").str.strptime(pl.Datetime).dt.weekday().alias("day_of_week"), |
| pl.col("timestamp").str.strptime(pl.Datetime).dt.hour().alias("hour_of_day"), |
| (pl.col("temperature_2m") * (9 / 5) + 32).alias("temperature_2m_f") |
| ]) |
|
|
| |
| weather_combined_df = combined_df.select([ |
| "location", "datetime", "date", "day_of_week", "hour_of_day", "temperature_2m_f", "dewpoint_2m", "wind_gusts_10m", "visibility", "cloudcover", "precipitation_probability", "relative_humidity_2m", "sunshine_duration",'vapor_pressure','rain' ,'soil_temp',"data_type" |
| ]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
|
|
| df = forecast_combined_df.join( |
| weather_combined_df, |
| left_on=["location",'date','day_of_week','hour_of_day'], |
| right_on=["location",'date','day_of_week','hour_of_day'], |
| how="inner" |
| ) |
|
|
| day_name_map = {0: "Monday", 1: "Tuesday", 2: "Wednesday", 3: "Thursday", 4: "Friday", 5: "Saturday", 6: "Sunday"} |
|
|
| df = df.with_columns([ |
| pl.col("temperature_2m").alias("historical_forecast"), |
| pl.col("temperature_2m_f").alias("historical"), |
| pl.col('date').dt.weekday().map_dict(day_name_map).alias('day_of_week') |
| ]) |
|
|
| df= df.drop(["temperature_2m", "temperature_2m_f",'data_type','data_type_right']) |
|
|
|
|
|
|
| |
|
|
| rexburg = df.filter(pl.col('location') == 'Rexburg') |
| rexburg = rexburg.select([ |
| pl.col('date').alias('Date'), |
| pl.col('hour_of_day').alias('Hour'), |
| pl.col('historical_forecast').alias('Historical_Forecast'), |
| pl.col('historical').alias('Historical') |
| ]) |
|
|
| laie = df.filter(pl.col('location') == 'Laie') |
| laie = laie.select([ |
| pl.col('date').alias('Date'), |
| pl.col('hour_of_day').alias('Hour'), |
| pl.col('historical_forecast').alias('Historical_Forecast'), |
| pl.col('historical').alias('Historical') |
| ]) |
|
|
| provo = df.filter(pl.col("location") == 'Provo') |
| provo = provo.select([ |
| pl.col('date').alias('Date'), |
| pl.col('hour_of_day').alias('Hour'), |
| pl.col('historical_forecast').alias('Historical_Forecast'), |
| pl.col('historical').alias('Historical') |
| ]) |
|
|
|
|
|
|
| |
|
|
| df_streamlit_select = df.groupby('location','date').agg( |
| pl.col('historical').max().alias('daily_high') |
| ) |
|
|
| df_streamlit_select = df_streamlit_select.sort(['location', 'date']) |
|
|
|
|
|
|
|
|
|
|
|
|
| |
|
|
| rexburg_streamlit = rexburg.to_pandas() |
| laie_streamlit = laie.to_pandas() |
| provo_streamlit = provo.to_pandas() |
|
|
| def main(): |
| st.title("Weather Data: Historical Vs Historical Forecast") |
|
|
| |
| col1, col2, col3 = st.columns(3) |
|
|
| |
| with col1: |
| st.write("### Rexburg Data Table") |
| st.dataframe(rexburg_streamlit) |
|
|
| with col2: |
| st.write("### Laie Data Table") |
| st.dataframe(laie_streamlit) |
|
|
| with col3: |
| st.write("### Provo Data Table") |
| st.dataframe(provo_streamlit) |
|
|
| if __name__ == "__main__": |
| main() |
|
|
|
|
|
|
| |
|
|
| all_cities = df.select([ |
| pl.col('location').alias('City'), |
| pl.col('date').alias('Date'), |
| pl.col('hour_of_day').alias('Hour'), |
| pl.col('historical').alias('Temperature') |
| ]) |
|
|
| all_cities = all_cities.sort(by = ['Date','Hour']) |
|
|
| cities_streamlit = all_cities.to_pandas() |
|
|
|
|
|
|
| |
| |
| st.sidebar.title("Filters") |
|
|
| df_streamlit_select = df.groupby('location','date').agg( |
| pl.col('historical').max().alias('daily_high') |
| ) |
|
|
| df_streamlit_select = df_streamlit_select.sort(['location', 'date']) |
|
|
| kpi_streamlit = df.to_pandas() |
|
|
|
|
| |
|
|
| date_min = kpi_streamlit['date'].min() |
| date_max = kpi_streamlit['date'].max() |
|
|
| |
| cities = kpi_streamlit['location'].unique() |
| selected_city = st.sidebar.selectbox('Select a city', cities, key='city_selector') |
|
|
| date_min = kpi_streamlit['date'].min() |
| date_max = kpi_streamlit['date'].max() |
| selected_dates = st.sidebar.date_input('Select start and end date', [date_min, date_max], key='date_range_selector') |
|
|
| metrics = [ |
| 'dewpoint_2m', |
| 'wind_gusts_10m', |
| 'visibility', |
| 'cloudcover', |
| 'precipitation', |
| 'relative_humidity_2m', |
| 'sunshine_duration', |
| 'vapor_pressure', |
| 'rain', |
| 'soil_temp' |
| ] |
|
|
|
|
| selected_metric = st.sidebar.selectbox('Select a metric', metrics, key='metric_selector') |
|
|
| dow_data = df.to_pandas() |
|
|
| days_of_week = dow_data['day_of_week'].unique() |
| selected_day = st.sidebar.selectbox('Select a day of the week', days_of_week) |
|
|
|
|
| |
| weather_variables = [ |
| "dewpoint_2m", "wind_gusts_10m", "visibility", |
| "cloudcover", "precipitation_probability", |
| "relative_humidity_2m", "sunshine_duration", |
| 'vapor_pressure','soil_temp', |
| ] |
| selected_variable = st.sidebar.selectbox('Select a weather variable', weather_variables) |
|
|
|
|
|
|
| |
|
|
| |
|
|
|
|
| df_pandas = df_streamlit_select.to_pandas() |
|
|
|
|
| |
| date_min = df_pandas['date'].min() |
| date_max = df_pandas['date'].max() |
| selected_dates = st.date_input('Select date range', [date_min, date_max]) |
|
|
| |
| filtered_df = df_streamlit_select.filter( |
| (pl.col('date') >= pl.lit(pd.to_datetime(selected_dates[0]))) & |
| (pl.col('date') <= pl.lit(pd.to_datetime(selected_dates[1]))) |
| ) |
|
|
| |
| filtered_df_pandas = filtered_df.to_pandas() |
|
|
| |
| fig = px.line(filtered_df_pandas, x='date', y='daily_high', color='location', |
| title='Daily High Temperatures by Location') |
|
|
| fig.update_layout( |
| xaxis_title='Date', |
| yaxis_title='Daily High Temperature (°F)' |
| ) |
|
|
| |
| st.plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
|
| |
|
|
| hour_df = df.select( |
| pl.col('location'), |
| pl.col('datetime'), |
| pl.col('date'), |
| pl.col('historical') |
| ) |
|
|
| df_pandas = hour_df.to_pandas() |
|
|
| fig = px.box( |
| df_pandas, |
| x='location', |
| y='historical', |
| title='Hourly Temperature Distribution by Location', |
| labels={'location': 'Location', 'historical': 'Hourly Temperature'} |
| ) |
|
|
| |
| st.plotly_chart(fig) |
|
|
|
|
|
|
| |
|
|
| fig = px.histogram( |
| df_pandas, |
| x='historical', |
| facet_col='location', |
| title='Histogram of Historical Temperatures by Location', |
| labels={'historical': 'Historical Temperature', 'location': 'Location', 'count':'Frequency'}, |
| nbins=30 |
| ) |
|
|
| fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1])) |
|
|
| |
| st.plotly_chart(fig) |
|
|
|
|
|
|
| |
|
|
| |
|
|
| |
|
|
| |
| filtered_df = kpi_streamlit[ |
| (kpi_streamlit['location'] == selected_city) & |
| (kpi_streamlit['date'] >= pd.to_datetime(selected_dates[0])) & |
| (kpi_streamlit['date'] <= pd.to_datetime(selected_dates[1])) |
| ] |
|
|
| |
| max_value = filtered_df[selected_metric].max() |
|
|
| |
| fig = px.bar( |
| x=[selected_metric.replace('_', ' ').title()], |
| y=[max_value], |
| labels={'x': selected_metric.replace('_', ' ').title(), 'y': 'Value'}, |
| title=f"Max {selected_metric.replace('_', ' ').title()}", |
| color_discrete_sequence=['darkblue'] |
| ) |
|
|
| |
| fig.update_layout( |
| xaxis=dict( |
| tickvals=[], |
| title='' |
| ), |
| yaxis=dict( |
| tickvals=[], |
| title='', |
| range=[0, max_value * 1.2] |
| ), |
| plot_bgcolor='white' |
| ) |
|
|
| import plotly.graph_objects as go |
|
|
| fig = go.Figure(go.Indicator( |
| mode = "gauge+number", |
| value = max_value, |
| domain = {'x': [0, 1], 'y': [0, 1]}, |
| title={'text': f"Max {selected_metric.replace('_', ' ').title()}: {max_value}"})) |
|
|
|
|
|
|
|
|
| |
|
|
| min_value = filtered_df[selected_metric].min() |
|
|
| |
| thing = px.bar( |
| x=[selected_metric.replace('_', ' ').title()], |
| y=[min_value], |
| labels={'x': selected_metric.replace('_', ' ').title(), 'y': 'Value'}, |
| title=f"Min {selected_metric.replace('_', ' ').title()}", |
| color_discrete_sequence=['darkblue'] |
| ) |
|
|
| |
| thing.update_layout( |
| xaxis=dict( |
| tickvals=[], |
| title='' |
| ), |
| yaxis=dict( |
| tickvals=[], |
| title='', |
| range=[0, min_value * 1.2] |
| ), |
| plot_bgcolor='white' |
| ) |
|
|
| import plotly.graph_objects as go |
|
|
| thing = go.Figure(go.Indicator( |
| mode = "gauge+number", |
| value = min_value, |
| domain = {'x': [0, 1], 'y': [0, 1]}, |
| title={'text': f"Min {selected_metric.replace('_', ' ').title()}: {min_value}"})) |
|
|
| thing = go.Figure(go.Indicator( |
| mode = "gauge+number", |
| value = min_value, |
| domain = {'x': [0, 1], 'y': [0, 1]}, |
| title={'text': f"Min {selected_metric.replace('_', ' ').title()}: {min_value}"}, |
| gauge={ |
| 'axis': {'range': [None, min_value * 1.2]}, |
| 'bar': {'color': 'red'} |
| } |
| )) |
|
|
|
|
|
|
|
|
| |
|
|
| col1, col2 = st.columns(2) |
|
|
| with col1: |
| st.plotly_chart(fig) |
|
|
| with col2: |
| st.plotly_chart(thing) |
|
|
|
|
|
|
| |
|
|
|
|
|
|
| st.title("Average Temperature by City") |
|
|
| |
|
|
| |
| filtered_df = dow_data[dow_data['day_of_week'] == selected_day] |
|
|
| |
| avg_temp_per_city = filtered_df.groupby('location')['historical'].mean().reset_index() |
|
|
| |
| my_chart = px.bar( |
| avg_temp_per_city, |
| x='location', |
| y='historical', |
| labels={'location': 'City', 'historical': 'Average Temperature (°F)'}, |
| title=f"Average Temperature for Each City on {selected_day}", |
| color='historical', |
| color_continuous_scale=px.colors.sequential.Plasma |
| ) |
|
|
| |
| |
|
|
|
|
|
|
| |
| |
|
|
| conditions_data = df.to_pandas() |
|
|
| conditions_data['day_of_week'] = pd.Categorical(conditions_data['day_of_week'], categories=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], ordered=True) |
|
|
|
|
| |
| filtered_df = conditions_data.groupby(['day_of_week', 'location'])[selected_variable].mean().reset_index() |
|
|
| |
| bobby = px.line( |
| filtered_df, |
| x='day_of_week', |
| y=selected_variable, |
| color='location', |
| title=f"Average {selected_variable.replace('_', ' ').title()} by Day of the Week", |
| labels={selected_variable: f'Average {selected_variable.replace("_", " ").title()}'} |
| ) |
|
|
| |
| bobby.update_xaxes( |
| title = 'Day of Week', |
| categoryorder='array', |
| categoryarray=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']) |
|
|
|
|
| |
| |
|
|
|
|
|
|
| |
|
|
| col1, col2 = st.columns(2) |
|
|
| with col1: |
| st.plotly_chart(my_chart) |
|
|
| with col2: |
| st.plotly_chart(bobby) |