kenny_streamlit / weather_example.py
k32462's picture
upload weather_example.py
5d94970 verified
import streamlit as st
import pandas as pd
import polars as pl
import requests
from datetime import datetime, timedelta
import plotly.express as px
# Create a sample DataFrame
data = {
'city': ['Rexburg', 'Rexburg', 'Rexburg', 'Provo', 'Provo', 'Laie', 'Laie'],
'date': ['2024-07-01', '2024-07-01', '2024-07-02', '2024-07-01', '2024-07-01', '2024-07-01', '2024-07-01'],
'hour': [0, 1, 0, 0, 1, 0, 1],
'temperature': [15, 14, 16, 20, 19, 25, 24]
}
# Create Polars DataFrame
df = pl.DataFrame(data)
# Convert date column to datetime
df = df.with_columns(pl.col("date").str.strptime(pl.Date, "%Y-%m-%d"))
# HISTORICAL FORECAST
# Define the locations with their respective latitude and longitude
locations = {
"Rexburg": {"latitude": 43.8260, "longitude": -111.7897},
"Provo": {"latitude": 40.2338, "longitude": -111.6585},
"Laie": {"latitude": 21.6478, "longitude": -157.9234}
}
# Function to get historical forecast data
def get_historical_forecast_data(location, latitude, longitude, start_date, end_date):
api_url = "https://api.open-meteo.com/v1/forecast"
params = {
"latitude": latitude,
"longitude": longitude,
"start_date": start_date,
"end_date": end_date,
"hourly": "temperature_2m",
"timezone": "America/Denver" # Adjust timezone as needed
}
response = requests.get(api_url, params=params)
data = response.json()
# Extract hourly data
hourly_data = data['hourly']
timestamps = hourly_data['time']
temperatures = hourly_data['temperature_2m']
# Create DataFrame for historical forecast data
historical_forecast_df = pl.DataFrame({
"location": location,
"timestamp": timestamps,
"temperature_2m": temperatures,
"data_type": "historical forecast" # Label the data as historical forecast
})
return historical_forecast_df
# Define the date range for historical data
start_date = "2024-06-01"
end_date = "2024-07-15"
# Fetch and concatenate historical forecast data for all locations
forecast_dfs = [get_historical_forecast_data(loc, info['latitude'], info['longitude'], start_date, end_date) for loc, info in locations.items()]
forecast_combined_df = pl.concat(forecast_dfs)
# Process timestamp to extract date, day of the week, and hour of day
forecast_combined_df = forecast_combined_df.with_columns([
pl.col("timestamp").str.strptime(pl.Datetime).alias("datetime"),
pl.col("timestamp").str.strptime(pl.Datetime).dt.date().alias("date"),
pl.col("timestamp").str.strptime(pl.Datetime).dt.weekday().alias("day_of_week"),
pl.col("timestamp").str.strptime(pl.Datetime).dt.hour().alias("hour_of_day"),
(pl.col("temperature_2m") * (9 / 5) + 32).alias("temperature_2m")
])
# Select and reorder columns
forecast_combined_df = forecast_combined_df.select([
"location", "datetime", "date", "day_of_week", "hour_of_day", "temperature_2m", "data_type"
])
# Show the updated DataFrame
print(forecast_combined_df)
# HISTORICAL DATA
locations = {
"Rexburg": {"latitude": 43.8260, "longitude": -111.7897},
"Provo": {"latitude": 40.2338, "longitude": -111.6585},
"Laie": {"latitude": 21.6478, "longitude": -157.9234}
}
# Function to get historical weather data
def get_historical_weather_data(location, latitude, longitude, start_date, end_date):
api_url = "https://api.open-meteo.com/v1/forecast"
params = {
"latitude": latitude,
"longitude": longitude,
"start_date": start_date,
"end_date": end_date,
"hourly": "temperature_2m,dewpoint_2m,wind_gusts_10m,visibility,cloudcover,precipitation_probability,relative_humidity_2m,sunshine_duration,vapour_pressure_deficit,rain,soil_temperature_0_to_7cm",
"timezone": "America/Denver" # Adjust timezone as needed
}
response = requests.get(api_url, params=params)
data = response.json()
# Check if 'hourly' data is available
if 'hourly' not in data:
raise KeyError(f"'hourly' key not found in API response for {location}")
# Extract hourly data
hourly_data = data['hourly']
timestamps = hourly_data['time']
temperatures = hourly_data.get('temperature_2m', [])
dewpoints = hourly_data.get('dewpoint_2m', [])
wind_gusts = hourly_data.get('wind_gusts_10m', [])
visibility = hourly_data.get('visibility', [])
cloud_cover = hourly_data.get('cloudcover', [])
precipitation_prob = hourly_data.get('precipitation_probability', [])
relative_humidity = hourly_data.get('relative_humidity_2m', [])
sunshine_duration = hourly_data.get('sunshine_duration', [])
vapor_pressure = hourly_data.get('vapour_pressure_deficit',[])
rain = hourly_data.get('rain', [])
soil_temp = hourly_data.get('soil_temperature_0_to_7cm',[])
# Create DataFrame for historical weather data
historical_weather_df = pl.DataFrame({
"location": location,
"timestamp": timestamps,
"temperature_2m": temperatures,
"dewpoint_2m": dewpoints,
"wind_gusts_10m": wind_gusts,
"visibility": visibility,
"cloudcover": cloud_cover,
"precipitation_probability": precipitation_prob,
"relative_humidity_2m": relative_humidity,
"sunshine_duration": sunshine_duration,
"vapor_pressure": vapor_pressure,
"rain": rain,
"soil_temp": soil_temp,
"data_type": "historical" # Label the data as historical
})
return historical_weather_df
# Define the date range for historical data
start_date = "2024-06-01"
end_date = "2024-07-15"
# Fetch and concatenate historical weather data for all locations
data_frames = []
for loc, info in locations.items():
try:
df = get_historical_weather_data(loc, info['latitude'], info['longitude'], start_date, end_date)
data_frames.append(df)
except KeyError as e:
print(e)
continue
if data_frames:
combined_df = pl.concat(data_frames)
else:
raise ValueError("No data fetched for any location.")
# Process timestamp to extract date, day of the week, and hour of day
combined_df = combined_df.with_columns([
pl.col("timestamp").str.strptime(pl.Datetime).alias("datetime"),
pl.col("timestamp").str.strptime(pl.Datetime).dt.date().alias("date"),
pl.col("timestamp").str.strptime(pl.Datetime).dt.weekday().alias("day_of_week"),
pl.col("timestamp").str.strptime(pl.Datetime).dt.hour().alias("hour_of_day"),
(pl.col("temperature_2m") * (9 / 5) + 32).alias("temperature_2m_f") # Convert temperature to Fahrenheit
])
# Select and reorder columns
weather_combined_df = combined_df.select([
"location", "datetime", "date", "day_of_week", "hour_of_day", "temperature_2m_f", "dewpoint_2m", "wind_gusts_10m", "visibility", "cloudcover", "precipitation_probability", "relative_humidity_2m", "sunshine_duration",'vapor_pressure','rain' ,'soil_temp',"data_type"
])
# Show the updated DataFrame
print(weather_combined_df)
# COMBINING DATA
df = forecast_combined_df.join(
weather_combined_df,
left_on=["location",'date','day_of_week','hour_of_day'],
right_on=["location",'date','day_of_week','hour_of_day'],
how="inner"
)
day_name_map = {0: "Monday", 1: "Tuesday", 2: "Wednesday", 3: "Thursday", 4: "Friday", 5: "Saturday", 6: "Sunday"}
df = df.with_columns([
pl.col("temperature_2m").alias("historical_forecast"),
pl.col("temperature_2m_f").alias("historical"),
pl.col('date').dt.weekday().map_dict(day_name_map).alias('day_of_week')
])
df= df.drop(["temperature_2m", "temperature_2m_f",'data_type','data_type_right'])
# CITY TABLES
rexburg = df.filter(pl.col('location') == 'Rexburg')
rexburg = rexburg.select([
pl.col('date').alias('Date'),
pl.col('hour_of_day').alias('Hour'),
pl.col('historical_forecast').alias('Historical_Forecast'),
pl.col('historical').alias('Historical')
])
laie = df.filter(pl.col('location') == 'Laie')
laie = laie.select([
pl.col('date').alias('Date'),
pl.col('hour_of_day').alias('Hour'),
pl.col('historical_forecast').alias('Historical_Forecast'),
pl.col('historical').alias('Historical')
])
provo = df.filter(pl.col("location") == 'Provo')
provo = provo.select([
pl.col('date').alias('Date'),
pl.col('hour_of_day').alias('Hour'),
pl.col('historical_forecast').alias('Historical_Forecast'),
pl.col('historical').alias('Historical')
])
# Sidebar
df_streamlit_select = df.groupby('location','date').agg(
pl.col('historical').max().alias('daily_high')
)
df_streamlit_select = df_streamlit_select.sort(['location', 'date'])
# STREAMLIT TABLES
rexburg_streamlit = rexburg.to_pandas()
laie_streamlit = laie.to_pandas()
provo_streamlit = provo.to_pandas()
def main():
st.title("Weather Data: Historical Vs Historical Forecast")
# Create three columns for side-by-side display
col1, col2, col3 = st.columns(3)
# Display each DataFrame in its respective column
with col1:
st.write("### Rexburg Data Table")
st.dataframe(rexburg_streamlit)
with col2:
st.write("### Laie Data Table")
st.dataframe(laie_streamlit)
with col3:
st.write("### Provo Data Table")
st.dataframe(provo_streamlit)
if __name__ == "__main__":
main()
# ALL CITIES
all_cities = df.select([
pl.col('location').alias('City'),
pl.col('date').alias('Date'),
pl.col('hour_of_day').alias('Hour'),
pl.col('historical').alias('Temperature')
])
all_cities = all_cities.sort(by = ['Date','Hour'])
cities_streamlit = all_cities.to_pandas()
# SIDE BAR
st.sidebar.title("Filters")
df_streamlit_select = df.groupby('location','date').agg(
pl.col('historical').max().alias('daily_high')
)
df_streamlit_select = df_streamlit_select.sort(['location', 'date'])
kpi_streamlit = df.to_pandas()
# Create date range selection widget with unique key
date_min = kpi_streamlit['date'].min()
date_max = kpi_streamlit['date'].max()
# Create city selection widget
cities = kpi_streamlit['location'].unique()
selected_city = st.sidebar.selectbox('Select a city', cities, key='city_selector')
date_min = kpi_streamlit['date'].min()
date_max = kpi_streamlit['date'].max()
selected_dates = st.sidebar.date_input('Select start and end date', [date_min, date_max], key='date_range_selector')
metrics = [
'dewpoint_2m',
'wind_gusts_10m',
'visibility',
'cloudcover',
'precipitation',
'relative_humidity_2m',
'sunshine_duration',
'vapor_pressure',
'rain',
'soil_temp'
]
selected_metric = st.sidebar.selectbox('Select a metric', metrics, key='metric_selector')
dow_data = df.to_pandas()
days_of_week = dow_data['day_of_week'].unique()
selected_day = st.sidebar.selectbox('Select a day of the week', days_of_week)
# Create a widget for selecting the weather variable
weather_variables = [
"dewpoint_2m", "wind_gusts_10m", "visibility",
"cloudcover", "precipitation_probability",
"relative_humidity_2m", "sunshine_duration",
'vapor_pressure','soil_temp',
]
selected_variable = st.sidebar.selectbox('Select a weather variable', weather_variables)
##### Interactive Dashboard
# Line Chart
df_pandas = df_streamlit_select.to_pandas()
# Create date range selection widget
date_min = df_pandas['date'].min()
date_max = df_pandas['date'].max()
selected_dates = st.date_input('Select date range', [date_min, date_max])
# Filter data based on user input
filtered_df = df_streamlit_select.filter(
(pl.col('date') >= pl.lit(pd.to_datetime(selected_dates[0]))) &
(pl.col('date') <= pl.lit(pd.to_datetime(selected_dates[1])))
)
# Convert filtered Polars DataFrame to Pandas DataFrame for Streamlit display
filtered_df_pandas = filtered_df.to_pandas()
# Create a line chart using Plotly Express with multiple lines
fig = px.line(filtered_df_pandas, x='date', y='daily_high', color='location',
title='Daily High Temperatures by Location')
fig.update_layout(
xaxis_title='Date',
yaxis_title='Daily High Temperature (°F)'
)
# Show the Plotly chart in Streamlit
st.plotly_chart(fig, use_container_width=True)
# BOX PLOT
hour_df = df.select(
pl.col('location'),
pl.col('datetime'),
pl.col('date'),
pl.col('historical')
)
df_pandas = hour_df.to_pandas()
fig = px.box(
df_pandas,
x='location',
y='historical',
title='Hourly Temperature Distribution by Location',
labels={'location': 'Location', 'historical': 'Hourly Temperature'}
)
# Display the boxplot in Streamlit
st.plotly_chart(fig)
# HISTOGRAM
fig = px.histogram(
df_pandas,
x='historical',
facet_col='location',
title='Histogram of Historical Temperatures by Location',
labels={'historical': 'Historical Temperature', 'location': 'Location', 'count':'Frequency'},
nbins=30 # Adjust the number of bins as needed
)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
# Display the faceted histogram in Streamlit
st.plotly_chart(fig)
# MAX VALUE
# Convert Polars DataFrame to Pandas for Streamlit use
# Create date range selection widget with unique key
# Filter data based on user input
filtered_df = kpi_streamlit[
(kpi_streamlit['location'] == selected_city) &
(kpi_streamlit['date'] >= pd.to_datetime(selected_dates[0])) &
(kpi_streamlit['date'] <= pd.to_datetime(selected_dates[1]))
]
# Get the maximum value for the selected metric
max_value = filtered_df[selected_metric].max()
# Create and display a gauge chart using Plotly Express
fig = px.bar(
x=[selected_metric.replace('_', ' ').title()],
y=[max_value],
labels={'x': selected_metric.replace('_', ' ').title(), 'y': 'Value'},
title=f"Max {selected_metric.replace('_', ' ').title()}",
color_discrete_sequence=['darkblue']
)
# Customize the layout to make it look like a gauge
fig.update_layout(
xaxis=dict(
tickvals=[],
title=''
),
yaxis=dict(
tickvals=[],
title='',
range=[0, max_value * 1.2]
),
plot_bgcolor='white'
)
import plotly.graph_objects as go
fig = go.Figure(go.Indicator(
mode = "gauge+number",
value = max_value,
domain = {'x': [0, 1], 'y': [0, 1]},
title={'text': f"Max {selected_metric.replace('_', ' ').title()}: {max_value}"}))
# MIN VALUE
min_value = filtered_df[selected_metric].min()
# Create and display a gauge chart using Plotly Express
thing = px.bar(
x=[selected_metric.replace('_', ' ').title()],
y=[min_value],
labels={'x': selected_metric.replace('_', ' ').title(), 'y': 'Value'},
title=f"Min {selected_metric.replace('_', ' ').title()}",
color_discrete_sequence=['darkblue']
)
# Customize the layout to make it look like a gauge
thing.update_layout(
xaxis=dict(
tickvals=[],
title=''
),
yaxis=dict(
tickvals=[],
title='',
range=[0, min_value * 1.2]
),
plot_bgcolor='white'
)
import plotly.graph_objects as go
thing = go.Figure(go.Indicator(
mode = "gauge+number",
value = min_value,
domain = {'x': [0, 1], 'y': [0, 1]},
title={'text': f"Min {selected_metric.replace('_', ' ').title()}: {min_value}"}))
thing = go.Figure(go.Indicator(
mode = "gauge+number",
value = min_value,
domain = {'x': [0, 1], 'y': [0, 1]},
title={'text': f"Min {selected_metric.replace('_', ' ').title()}: {min_value}"},
gauge={
'axis': {'range': [None, min_value * 1.2]},
'bar': {'color': 'red'}
}
))
# MAX & MIN DISPLAY
col1, col2 = st.columns(2)
with col1:
st.plotly_chart(fig)
with col2:
st.plotly_chart(thing)
# Additional Inputs
st.title("Average Temperature by City")
# Create day of the week slicer
# Filter data based on the selected day of the week
filtered_df = dow_data[dow_data['day_of_week'] == selected_day]
# Calculate average temperature for each city
avg_temp_per_city = filtered_df.groupby('location')['historical'].mean().reset_index()
# Create bar chart
my_chart = px.bar(
avg_temp_per_city,
x='location',
y='historical',
labels={'location': 'City', 'historical': 'Average Temperature (°F)'},
title=f"Average Temperature for Each City on {selected_day}",
color='historical',
color_continuous_scale=px.colors.sequential.Plasma
)
# Show the bar chart
# st.plotly_chart(fig)
# Conditions Visualizations
conditions_data = df.to_pandas()
conditions_data['day_of_week'] = pd.Categorical(conditions_data['day_of_week'], categories=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], ordered=True)
# Filter the DataFrame for the selected weather variable and calculate the average for each day of the week
filtered_df = conditions_data.groupby(['day_of_week', 'location'])[selected_variable].mean().reset_index()
# Create a line chart
bobby = px.line(
filtered_df,
x='day_of_week',
y=selected_variable,
color='location',
title=f"Average {selected_variable.replace('_', ' ').title()} by Day of the Week",
labels={selected_variable: f'Average {selected_variable.replace("_", " ").title()}'}
)
# Ensure the x-axis has the correct order for days of the week
bobby.update_xaxes(
title = 'Day of Week',
categoryorder='array',
categoryarray=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])
# Display the line chart
# st.plotly_chart(bobby)
# display last 2 visualizations
col1, col2 = st.columns(2)
with col1:
st.plotly_chart(my_chart)
with col2:
st.plotly_chart(bobby)