import streamlit as st import pandas as pd import polars as pl import requests from datetime import datetime, timedelta import plotly.express as px # Create a sample DataFrame data = { 'city': ['Rexburg', 'Rexburg', 'Rexburg', 'Provo', 'Provo', 'Laie', 'Laie'], 'date': ['2024-07-01', '2024-07-01', '2024-07-02', '2024-07-01', '2024-07-01', '2024-07-01', '2024-07-01'], 'hour': [0, 1, 0, 0, 1, 0, 1], 'temperature': [15, 14, 16, 20, 19, 25, 24] } # Create Polars DataFrame df = pl.DataFrame(data) # Convert date column to datetime df = df.with_columns(pl.col("date").str.strptime(pl.Date, "%Y-%m-%d")) # HISTORICAL FORECAST # Define the locations with their respective latitude and longitude locations = { "Rexburg": {"latitude": 43.8260, "longitude": -111.7897}, "Provo": {"latitude": 40.2338, "longitude": -111.6585}, "Laie": {"latitude": 21.6478, "longitude": -157.9234} } # Function to get historical forecast data def get_historical_forecast_data(location, latitude, longitude, start_date, end_date): api_url = "https://api.open-meteo.com/v1/forecast" params = { "latitude": latitude, "longitude": longitude, "start_date": start_date, "end_date": end_date, "hourly": "temperature_2m", "timezone": "America/Denver" # Adjust timezone as needed } response = requests.get(api_url, params=params) data = response.json() # Extract hourly data hourly_data = data['hourly'] timestamps = hourly_data['time'] temperatures = hourly_data['temperature_2m'] # Create DataFrame for historical forecast data historical_forecast_df = pl.DataFrame({ "location": location, "timestamp": timestamps, "temperature_2m": temperatures, "data_type": "historical forecast" # Label the data as historical forecast }) return historical_forecast_df # Define the date range for historical data start_date = "2024-06-01" end_date = "2024-07-15" # Fetch and concatenate historical forecast data for all locations forecast_dfs = [get_historical_forecast_data(loc, info['latitude'], info['longitude'], start_date, end_date) for loc, info in locations.items()] forecast_combined_df = pl.concat(forecast_dfs) # Process timestamp to extract date, day of the week, and hour of day forecast_combined_df = forecast_combined_df.with_columns([ pl.col("timestamp").str.strptime(pl.Datetime).alias("datetime"), pl.col("timestamp").str.strptime(pl.Datetime).dt.date().alias("date"), pl.col("timestamp").str.strptime(pl.Datetime).dt.weekday().alias("day_of_week"), pl.col("timestamp").str.strptime(pl.Datetime).dt.hour().alias("hour_of_day"), (pl.col("temperature_2m") * (9 / 5) + 32).alias("temperature_2m") ]) # Select and reorder columns forecast_combined_df = forecast_combined_df.select([ "location", "datetime", "date", "day_of_week", "hour_of_day", "temperature_2m", "data_type" ]) # Show the updated DataFrame print(forecast_combined_df) # HISTORICAL DATA locations = { "Rexburg": {"latitude": 43.8260, "longitude": -111.7897}, "Provo": {"latitude": 40.2338, "longitude": -111.6585}, "Laie": {"latitude": 21.6478, "longitude": -157.9234} } # Function to get historical weather data def get_historical_weather_data(location, latitude, longitude, start_date, end_date): api_url = "https://api.open-meteo.com/v1/forecast" params = { "latitude": latitude, "longitude": longitude, "start_date": start_date, "end_date": end_date, "hourly": "temperature_2m,dewpoint_2m,wind_gusts_10m,visibility,cloudcover,precipitation_probability,relative_humidity_2m,sunshine_duration,vapour_pressure_deficit,rain,soil_temperature_0_to_7cm", "timezone": "America/Denver" # Adjust timezone as needed } response = requests.get(api_url, params=params) data = response.json() # Check if 'hourly' data is available if 'hourly' not in data: raise KeyError(f"'hourly' key not found in API response for {location}") # Extract hourly data hourly_data = data['hourly'] timestamps = hourly_data['time'] temperatures = hourly_data.get('temperature_2m', []) dewpoints = hourly_data.get('dewpoint_2m', []) wind_gusts = hourly_data.get('wind_gusts_10m', []) visibility = hourly_data.get('visibility', []) cloud_cover = hourly_data.get('cloudcover', []) precipitation_prob = hourly_data.get('precipitation_probability', []) relative_humidity = hourly_data.get('relative_humidity_2m', []) sunshine_duration = hourly_data.get('sunshine_duration', []) vapor_pressure = hourly_data.get('vapour_pressure_deficit',[]) rain = hourly_data.get('rain', []) soil_temp = hourly_data.get('soil_temperature_0_to_7cm',[]) # Create DataFrame for historical weather data historical_weather_df = pl.DataFrame({ "location": location, "timestamp": timestamps, "temperature_2m": temperatures, "dewpoint_2m": dewpoints, "wind_gusts_10m": wind_gusts, "visibility": visibility, "cloudcover": cloud_cover, "precipitation_probability": precipitation_prob, "relative_humidity_2m": relative_humidity, "sunshine_duration": sunshine_duration, "vapor_pressure": vapor_pressure, "rain": rain, "soil_temp": soil_temp, "data_type": "historical" # Label the data as historical }) return historical_weather_df # Define the date range for historical data start_date = "2024-06-01" end_date = "2024-07-15" # Fetch and concatenate historical weather data for all locations data_frames = [] for loc, info in locations.items(): try: df = get_historical_weather_data(loc, info['latitude'], info['longitude'], start_date, end_date) data_frames.append(df) except KeyError as e: print(e) continue if data_frames: combined_df = pl.concat(data_frames) else: raise ValueError("No data fetched for any location.") # Process timestamp to extract date, day of the week, and hour of day combined_df = combined_df.with_columns([ pl.col("timestamp").str.strptime(pl.Datetime).alias("datetime"), pl.col("timestamp").str.strptime(pl.Datetime).dt.date().alias("date"), pl.col("timestamp").str.strptime(pl.Datetime).dt.weekday().alias("day_of_week"), pl.col("timestamp").str.strptime(pl.Datetime).dt.hour().alias("hour_of_day"), (pl.col("temperature_2m") * (9 / 5) + 32).alias("temperature_2m_f") # Convert temperature to Fahrenheit ]) # Select and reorder columns weather_combined_df = combined_df.select([ "location", "datetime", "date", "day_of_week", "hour_of_day", "temperature_2m_f", "dewpoint_2m", "wind_gusts_10m", "visibility", "cloudcover", "precipitation_probability", "relative_humidity_2m", "sunshine_duration",'vapor_pressure','rain' ,'soil_temp',"data_type" ]) # Show the updated DataFrame print(weather_combined_df) # COMBINING DATA df = forecast_combined_df.join( weather_combined_df, left_on=["location",'date','day_of_week','hour_of_day'], right_on=["location",'date','day_of_week','hour_of_day'], how="inner" ) day_name_map = {0: "Monday", 1: "Tuesday", 2: "Wednesday", 3: "Thursday", 4: "Friday", 5: "Saturday", 6: "Sunday"} df = df.with_columns([ pl.col("temperature_2m").alias("historical_forecast"), pl.col("temperature_2m_f").alias("historical"), pl.col('date').dt.weekday().map_dict(day_name_map).alias('day_of_week') ]) df= df.drop(["temperature_2m", "temperature_2m_f",'data_type','data_type_right']) # CITY TABLES rexburg = df.filter(pl.col('location') == 'Rexburg') rexburg = rexburg.select([ pl.col('date').alias('Date'), pl.col('hour_of_day').alias('Hour'), pl.col('historical_forecast').alias('Historical_Forecast'), pl.col('historical').alias('Historical') ]) laie = df.filter(pl.col('location') == 'Laie') laie = laie.select([ pl.col('date').alias('Date'), pl.col('hour_of_day').alias('Hour'), pl.col('historical_forecast').alias('Historical_Forecast'), pl.col('historical').alias('Historical') ]) provo = df.filter(pl.col("location") == 'Provo') provo = provo.select([ pl.col('date').alias('Date'), pl.col('hour_of_day').alias('Hour'), pl.col('historical_forecast').alias('Historical_Forecast'), pl.col('historical').alias('Historical') ]) # Sidebar df_streamlit_select = df.groupby('location','date').agg( pl.col('historical').max().alias('daily_high') ) df_streamlit_select = df_streamlit_select.sort(['location', 'date']) # STREAMLIT TABLES rexburg_streamlit = rexburg.to_pandas() laie_streamlit = laie.to_pandas() provo_streamlit = provo.to_pandas() def main(): st.title("Weather Data: Historical Vs Historical Forecast") # Create three columns for side-by-side display col1, col2, col3 = st.columns(3) # Display each DataFrame in its respective column with col1: st.write("### Rexburg Data Table") st.dataframe(rexburg_streamlit) with col2: st.write("### Laie Data Table") st.dataframe(laie_streamlit) with col3: st.write("### Provo Data Table") st.dataframe(provo_streamlit) if __name__ == "__main__": main() # ALL CITIES all_cities = df.select([ pl.col('location').alias('City'), pl.col('date').alias('Date'), pl.col('hour_of_day').alias('Hour'), pl.col('historical').alias('Temperature') ]) all_cities = all_cities.sort(by = ['Date','Hour']) cities_streamlit = all_cities.to_pandas() # SIDE BAR st.sidebar.title("Filters") df_streamlit_select = df.groupby('location','date').agg( pl.col('historical').max().alias('daily_high') ) df_streamlit_select = df_streamlit_select.sort(['location', 'date']) kpi_streamlit = df.to_pandas() # Create date range selection widget with unique key date_min = kpi_streamlit['date'].min() date_max = kpi_streamlit['date'].max() # Create city selection widget cities = kpi_streamlit['location'].unique() selected_city = st.sidebar.selectbox('Select a city', cities, key='city_selector') date_min = kpi_streamlit['date'].min() date_max = kpi_streamlit['date'].max() selected_dates = st.sidebar.date_input('Select start and end date', [date_min, date_max], key='date_range_selector') metrics = [ 'dewpoint_2m', 'wind_gusts_10m', 'visibility', 'cloudcover', 'precipitation', 'relative_humidity_2m', 'sunshine_duration', 'vapor_pressure', 'rain', 'soil_temp' ] selected_metric = st.sidebar.selectbox('Select a metric', metrics, key='metric_selector') dow_data = df.to_pandas() days_of_week = dow_data['day_of_week'].unique() selected_day = st.sidebar.selectbox('Select a day of the week', days_of_week) # Create a widget for selecting the weather variable weather_variables = [ "dewpoint_2m", "wind_gusts_10m", "visibility", "cloudcover", "precipitation_probability", "relative_humidity_2m", "sunshine_duration", 'vapor_pressure','soil_temp', ] selected_variable = st.sidebar.selectbox('Select a weather variable', weather_variables) ##### Interactive Dashboard # Line Chart df_pandas = df_streamlit_select.to_pandas() # Create date range selection widget date_min = df_pandas['date'].min() date_max = df_pandas['date'].max() selected_dates = st.date_input('Select date range', [date_min, date_max]) # Filter data based on user input filtered_df = df_streamlit_select.filter( (pl.col('date') >= pl.lit(pd.to_datetime(selected_dates[0]))) & (pl.col('date') <= pl.lit(pd.to_datetime(selected_dates[1]))) ) # Convert filtered Polars DataFrame to Pandas DataFrame for Streamlit display filtered_df_pandas = filtered_df.to_pandas() # Create a line chart using Plotly Express with multiple lines fig = px.line(filtered_df_pandas, x='date', y='daily_high', color='location', title='Daily High Temperatures by Location') fig.update_layout( xaxis_title='Date', yaxis_title='Daily High Temperature (°F)' ) # Show the Plotly chart in Streamlit st.plotly_chart(fig, use_container_width=True) # BOX PLOT hour_df = df.select( pl.col('location'), pl.col('datetime'), pl.col('date'), pl.col('historical') ) df_pandas = hour_df.to_pandas() fig = px.box( df_pandas, x='location', y='historical', title='Hourly Temperature Distribution by Location', labels={'location': 'Location', 'historical': 'Hourly Temperature'} ) # Display the boxplot in Streamlit st.plotly_chart(fig) # HISTOGRAM fig = px.histogram( df_pandas, x='historical', facet_col='location', title='Histogram of Historical Temperatures by Location', labels={'historical': 'Historical Temperature', 'location': 'Location', 'count':'Frequency'}, nbins=30 # Adjust the number of bins as needed ) fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1])) # Display the faceted histogram in Streamlit st.plotly_chart(fig) # MAX VALUE # Convert Polars DataFrame to Pandas for Streamlit use # Create date range selection widget with unique key # Filter data based on user input filtered_df = kpi_streamlit[ (kpi_streamlit['location'] == selected_city) & (kpi_streamlit['date'] >= pd.to_datetime(selected_dates[0])) & (kpi_streamlit['date'] <= pd.to_datetime(selected_dates[1])) ] # Get the maximum value for the selected metric max_value = filtered_df[selected_metric].max() # Create and display a gauge chart using Plotly Express fig = px.bar( x=[selected_metric.replace('_', ' ').title()], y=[max_value], labels={'x': selected_metric.replace('_', ' ').title(), 'y': 'Value'}, title=f"Max {selected_metric.replace('_', ' ').title()}", color_discrete_sequence=['darkblue'] ) # Customize the layout to make it look like a gauge fig.update_layout( xaxis=dict( tickvals=[], title='' ), yaxis=dict( tickvals=[], title='', range=[0, max_value * 1.2] ), plot_bgcolor='white' ) import plotly.graph_objects as go fig = go.Figure(go.Indicator( mode = "gauge+number", value = max_value, domain = {'x': [0, 1], 'y': [0, 1]}, title={'text': f"Max {selected_metric.replace('_', ' ').title()}: {max_value}"})) # MIN VALUE min_value = filtered_df[selected_metric].min() # Create and display a gauge chart using Plotly Express thing = px.bar( x=[selected_metric.replace('_', ' ').title()], y=[min_value], labels={'x': selected_metric.replace('_', ' ').title(), 'y': 'Value'}, title=f"Min {selected_metric.replace('_', ' ').title()}", color_discrete_sequence=['darkblue'] ) # Customize the layout to make it look like a gauge thing.update_layout( xaxis=dict( tickvals=[], title='' ), yaxis=dict( tickvals=[], title='', range=[0, min_value * 1.2] ), plot_bgcolor='white' ) import plotly.graph_objects as go thing = go.Figure(go.Indicator( mode = "gauge+number", value = min_value, domain = {'x': [0, 1], 'y': [0, 1]}, title={'text': f"Min {selected_metric.replace('_', ' ').title()}: {min_value}"})) thing = go.Figure(go.Indicator( mode = "gauge+number", value = min_value, domain = {'x': [0, 1], 'y': [0, 1]}, title={'text': f"Min {selected_metric.replace('_', ' ').title()}: {min_value}"}, gauge={ 'axis': {'range': [None, min_value * 1.2]}, 'bar': {'color': 'red'} } )) # MAX & MIN DISPLAY col1, col2 = st.columns(2) with col1: st.plotly_chart(fig) with col2: st.plotly_chart(thing) # Additional Inputs st.title("Average Temperature by City") # Create day of the week slicer # Filter data based on the selected day of the week filtered_df = dow_data[dow_data['day_of_week'] == selected_day] # Calculate average temperature for each city avg_temp_per_city = filtered_df.groupby('location')['historical'].mean().reset_index() # Create bar chart my_chart = px.bar( avg_temp_per_city, x='location', y='historical', labels={'location': 'City', 'historical': 'Average Temperature (°F)'}, title=f"Average Temperature for Each City on {selected_day}", color='historical', color_continuous_scale=px.colors.sequential.Plasma ) # Show the bar chart # st.plotly_chart(fig) # Conditions Visualizations conditions_data = df.to_pandas() conditions_data['day_of_week'] = pd.Categorical(conditions_data['day_of_week'], categories=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], ordered=True) # Filter the DataFrame for the selected weather variable and calculate the average for each day of the week filtered_df = conditions_data.groupby(['day_of_week', 'location'])[selected_variable].mean().reset_index() # Create a line chart bobby = px.line( filtered_df, x='day_of_week', y=selected_variable, color='location', title=f"Average {selected_variable.replace('_', ' ').title()} by Day of the Week", labels={selected_variable: f'Average {selected_variable.replace("_", " ").title()}'} ) # Ensure the x-axis has the correct order for days of the week bobby.update_xaxes( title = 'Day of Week', categoryorder='array', categoryarray=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']) # Display the line chart # st.plotly_chart(bobby) # display last 2 visualizations col1, col2 = st.columns(2) with col1: st.plotly_chart(my_chart) with col2: st.plotly_chart(bobby)