Spaces:
No application file
No application file
| import streamlit as st | |
| import pandas as pd | |
| import polars as pl | |
| import requests | |
| from datetime import datetime, timedelta | |
| import plotly.express as px | |
| # Create a sample DataFrame | |
| data = { | |
| 'city': ['Rexburg', 'Rexburg', 'Rexburg', 'Provo', 'Provo', 'Laie', 'Laie'], | |
| 'date': ['2024-07-01', '2024-07-01', '2024-07-02', '2024-07-01', '2024-07-01', '2024-07-01', '2024-07-01'], | |
| 'hour': [0, 1, 0, 0, 1, 0, 1], | |
| 'temperature': [15, 14, 16, 20, 19, 25, 24] | |
| } | |
| # Create Polars DataFrame | |
| df = pl.DataFrame(data) | |
| # Convert date column to datetime | |
| df = df.with_columns(pl.col("date").str.strptime(pl.Date, "%Y-%m-%d")) | |
| # HISTORICAL FORECAST | |
| # Define the locations with their respective latitude and longitude | |
| locations = { | |
| "Rexburg": {"latitude": 43.8260, "longitude": -111.7897}, | |
| "Provo": {"latitude": 40.2338, "longitude": -111.6585}, | |
| "Laie": {"latitude": 21.6478, "longitude": -157.9234} | |
| } | |
| # Function to get historical forecast data | |
| def get_historical_forecast_data(location, latitude, longitude, start_date, end_date): | |
| api_url = "https://api.open-meteo.com/v1/forecast" | |
| params = { | |
| "latitude": latitude, | |
| "longitude": longitude, | |
| "start_date": start_date, | |
| "end_date": end_date, | |
| "hourly": "temperature_2m", | |
| "timezone": "America/Denver" # Adjust timezone as needed | |
| } | |
| response = requests.get(api_url, params=params) | |
| data = response.json() | |
| # Extract hourly data | |
| hourly_data = data['hourly'] | |
| timestamps = hourly_data['time'] | |
| temperatures = hourly_data['temperature_2m'] | |
| # Create DataFrame for historical forecast data | |
| historical_forecast_df = pl.DataFrame({ | |
| "location": location, | |
| "timestamp": timestamps, | |
| "temperature_2m": temperatures, | |
| "data_type": "historical forecast" # Label the data as historical forecast | |
| }) | |
| return historical_forecast_df | |
| # Define the date range for historical data | |
| start_date = "2024-06-01" | |
| end_date = "2024-07-15" | |
| # Fetch and concatenate historical forecast data for all locations | |
| forecast_dfs = [get_historical_forecast_data(loc, info['latitude'], info['longitude'], start_date, end_date) for loc, info in locations.items()] | |
| forecast_combined_df = pl.concat(forecast_dfs) | |
| # Process timestamp to extract date, day of the week, and hour of day | |
| forecast_combined_df = forecast_combined_df.with_columns([ | |
| pl.col("timestamp").str.strptime(pl.Datetime).alias("datetime"), | |
| pl.col("timestamp").str.strptime(pl.Datetime).dt.date().alias("date"), | |
| pl.col("timestamp").str.strptime(pl.Datetime).dt.weekday().alias("day_of_week"), | |
| pl.col("timestamp").str.strptime(pl.Datetime).dt.hour().alias("hour_of_day"), | |
| (pl.col("temperature_2m") * (9 / 5) + 32).alias("temperature_2m") | |
| ]) | |
| # Select and reorder columns | |
| forecast_combined_df = forecast_combined_df.select([ | |
| "location", "datetime", "date", "day_of_week", "hour_of_day", "temperature_2m", "data_type" | |
| ]) | |
| # Show the updated DataFrame | |
| print(forecast_combined_df) | |
| # HISTORICAL DATA | |
| locations = { | |
| "Rexburg": {"latitude": 43.8260, "longitude": -111.7897}, | |
| "Provo": {"latitude": 40.2338, "longitude": -111.6585}, | |
| "Laie": {"latitude": 21.6478, "longitude": -157.9234} | |
| } | |
| # Function to get historical weather data | |
| def get_historical_weather_data(location, latitude, longitude, start_date, end_date): | |
| api_url = "https://api.open-meteo.com/v1/forecast" | |
| params = { | |
| "latitude": latitude, | |
| "longitude": longitude, | |
| "start_date": start_date, | |
| "end_date": end_date, | |
| "hourly": "temperature_2m,dewpoint_2m,wind_gusts_10m,visibility,cloudcover,precipitation_probability,relative_humidity_2m,sunshine_duration,vapour_pressure_deficit,rain,soil_temperature_0_to_7cm", | |
| "timezone": "America/Denver" # Adjust timezone as needed | |
| } | |
| response = requests.get(api_url, params=params) | |
| data = response.json() | |
| # Check if 'hourly' data is available | |
| if 'hourly' not in data: | |
| raise KeyError(f"'hourly' key not found in API response for {location}") | |
| # Extract hourly data | |
| hourly_data = data['hourly'] | |
| timestamps = hourly_data['time'] | |
| temperatures = hourly_data.get('temperature_2m', []) | |
| dewpoints = hourly_data.get('dewpoint_2m', []) | |
| wind_gusts = hourly_data.get('wind_gusts_10m', []) | |
| visibility = hourly_data.get('visibility', []) | |
| cloud_cover = hourly_data.get('cloudcover', []) | |
| precipitation_prob = hourly_data.get('precipitation_probability', []) | |
| relative_humidity = hourly_data.get('relative_humidity_2m', []) | |
| sunshine_duration = hourly_data.get('sunshine_duration', []) | |
| vapor_pressure = hourly_data.get('vapour_pressure_deficit',[]) | |
| rain = hourly_data.get('rain', []) | |
| soil_temp = hourly_data.get('soil_temperature_0_to_7cm',[]) | |
| # Create DataFrame for historical weather data | |
| historical_weather_df = pl.DataFrame({ | |
| "location": location, | |
| "timestamp": timestamps, | |
| "temperature_2m": temperatures, | |
| "dewpoint_2m": dewpoints, | |
| "wind_gusts_10m": wind_gusts, | |
| "visibility": visibility, | |
| "cloudcover": cloud_cover, | |
| "precipitation_probability": precipitation_prob, | |
| "relative_humidity_2m": relative_humidity, | |
| "sunshine_duration": sunshine_duration, | |
| "vapor_pressure": vapor_pressure, | |
| "rain": rain, | |
| "soil_temp": soil_temp, | |
| "data_type": "historical" # Label the data as historical | |
| }) | |
| return historical_weather_df | |
| # Define the date range for historical data | |
| start_date = "2024-06-01" | |
| end_date = "2024-07-15" | |
| # Fetch and concatenate historical weather data for all locations | |
| data_frames = [] | |
| for loc, info in locations.items(): | |
| try: | |
| df = get_historical_weather_data(loc, info['latitude'], info['longitude'], start_date, end_date) | |
| data_frames.append(df) | |
| except KeyError as e: | |
| print(e) | |
| continue | |
| if data_frames: | |
| combined_df = pl.concat(data_frames) | |
| else: | |
| raise ValueError("No data fetched for any location.") | |
| # Process timestamp to extract date, day of the week, and hour of day | |
| combined_df = combined_df.with_columns([ | |
| pl.col("timestamp").str.strptime(pl.Datetime).alias("datetime"), | |
| pl.col("timestamp").str.strptime(pl.Datetime).dt.date().alias("date"), | |
| pl.col("timestamp").str.strptime(pl.Datetime).dt.weekday().alias("day_of_week"), | |
| pl.col("timestamp").str.strptime(pl.Datetime).dt.hour().alias("hour_of_day"), | |
| (pl.col("temperature_2m") * (9 / 5) + 32).alias("temperature_2m_f") # Convert temperature to Fahrenheit | |
| ]) | |
| # Select and reorder columns | |
| weather_combined_df = combined_df.select([ | |
| "location", "datetime", "date", "day_of_week", "hour_of_day", "temperature_2m_f", "dewpoint_2m", "wind_gusts_10m", "visibility", "cloudcover", "precipitation_probability", "relative_humidity_2m", "sunshine_duration",'vapor_pressure','rain' ,'soil_temp',"data_type" | |
| ]) | |
| # Show the updated DataFrame | |
| print(weather_combined_df) | |
| # COMBINING DATA | |
| df = forecast_combined_df.join( | |
| weather_combined_df, | |
| left_on=["location",'date','day_of_week','hour_of_day'], | |
| right_on=["location",'date','day_of_week','hour_of_day'], | |
| how="inner" | |
| ) | |
| day_name_map = {0: "Monday", 1: "Tuesday", 2: "Wednesday", 3: "Thursday", 4: "Friday", 5: "Saturday", 6: "Sunday"} | |
| df = df.with_columns([ | |
| pl.col("temperature_2m").alias("historical_forecast"), | |
| pl.col("temperature_2m_f").alias("historical"), | |
| pl.col('date').dt.weekday().map_dict(day_name_map).alias('day_of_week') | |
| ]) | |
| df= df.drop(["temperature_2m", "temperature_2m_f",'data_type','data_type_right']) | |
| # CITY TABLES | |
| rexburg = df.filter(pl.col('location') == 'Rexburg') | |
| rexburg = rexburg.select([ | |
| pl.col('date').alias('Date'), | |
| pl.col('hour_of_day').alias('Hour'), | |
| pl.col('historical_forecast').alias('Historical_Forecast'), | |
| pl.col('historical').alias('Historical') | |
| ]) | |
| laie = df.filter(pl.col('location') == 'Laie') | |
| laie = laie.select([ | |
| pl.col('date').alias('Date'), | |
| pl.col('hour_of_day').alias('Hour'), | |
| pl.col('historical_forecast').alias('Historical_Forecast'), | |
| pl.col('historical').alias('Historical') | |
| ]) | |
| provo = df.filter(pl.col("location") == 'Provo') | |
| provo = provo.select([ | |
| pl.col('date').alias('Date'), | |
| pl.col('hour_of_day').alias('Hour'), | |
| pl.col('historical_forecast').alias('Historical_Forecast'), | |
| pl.col('historical').alias('Historical') | |
| ]) | |
| # Sidebar | |
| df_streamlit_select = df.groupby('location','date').agg( | |
| pl.col('historical').max().alias('daily_high') | |
| ) | |
| df_streamlit_select = df_streamlit_select.sort(['location', 'date']) | |
| # STREAMLIT TABLES | |
| rexburg_streamlit = rexburg.to_pandas() | |
| laie_streamlit = laie.to_pandas() | |
| provo_streamlit = provo.to_pandas() | |
| def main(): | |
| st.title("Weather Data: Historical Vs Historical Forecast") | |
| # Create three columns for side-by-side display | |
| col1, col2, col3 = st.columns(3) | |
| # Display each DataFrame in its respective column | |
| with col1: | |
| st.write("### Rexburg Data Table") | |
| st.dataframe(rexburg_streamlit) | |
| with col2: | |
| st.write("### Laie Data Table") | |
| st.dataframe(laie_streamlit) | |
| with col3: | |
| st.write("### Provo Data Table") | |
| st.dataframe(provo_streamlit) | |
| if __name__ == "__main__": | |
| main() | |
| # ALL CITIES | |
| all_cities = df.select([ | |
| pl.col('location').alias('City'), | |
| pl.col('date').alias('Date'), | |
| pl.col('hour_of_day').alias('Hour'), | |
| pl.col('historical').alias('Temperature') | |
| ]) | |
| all_cities = all_cities.sort(by = ['Date','Hour']) | |
| cities_streamlit = all_cities.to_pandas() | |
| # SIDE BAR | |
| st.sidebar.title("Filters") | |
| df_streamlit_select = df.groupby('location','date').agg( | |
| pl.col('historical').max().alias('daily_high') | |
| ) | |
| df_streamlit_select = df_streamlit_select.sort(['location', 'date']) | |
| kpi_streamlit = df.to_pandas() | |
| # Create date range selection widget with unique key | |
| date_min = kpi_streamlit['date'].min() | |
| date_max = kpi_streamlit['date'].max() | |
| # Create city selection widget | |
| cities = kpi_streamlit['location'].unique() | |
| selected_city = st.sidebar.selectbox('Select a city', cities, key='city_selector') | |
| date_min = kpi_streamlit['date'].min() | |
| date_max = kpi_streamlit['date'].max() | |
| selected_dates = st.sidebar.date_input('Select start and end date', [date_min, date_max], key='date_range_selector') | |
| metrics = [ | |
| 'dewpoint_2m', | |
| 'wind_gusts_10m', | |
| 'visibility', | |
| 'cloudcover', | |
| 'precipitation', | |
| 'relative_humidity_2m', | |
| 'sunshine_duration', | |
| 'vapor_pressure', | |
| 'rain', | |
| 'soil_temp' | |
| ] | |
| selected_metric = st.sidebar.selectbox('Select a metric', metrics, key='metric_selector') | |
| dow_data = df.to_pandas() | |
| days_of_week = dow_data['day_of_week'].unique() | |
| selected_day = st.sidebar.selectbox('Select a day of the week', days_of_week) | |
| # Create a widget for selecting the weather variable | |
| weather_variables = [ | |
| "dewpoint_2m", "wind_gusts_10m", "visibility", | |
| "cloudcover", "precipitation_probability", | |
| "relative_humidity_2m", "sunshine_duration", | |
| 'vapor_pressure','soil_temp', | |
| ] | |
| selected_variable = st.sidebar.selectbox('Select a weather variable', weather_variables) | |
| ##### Interactive Dashboard | |
| # Line Chart | |
| df_pandas = df_streamlit_select.to_pandas() | |
| # Create date range selection widget | |
| date_min = df_pandas['date'].min() | |
| date_max = df_pandas['date'].max() | |
| selected_dates = st.date_input('Select date range', [date_min, date_max]) | |
| # Filter data based on user input | |
| filtered_df = df_streamlit_select.filter( | |
| (pl.col('date') >= pl.lit(pd.to_datetime(selected_dates[0]))) & | |
| (pl.col('date') <= pl.lit(pd.to_datetime(selected_dates[1]))) | |
| ) | |
| # Convert filtered Polars DataFrame to Pandas DataFrame for Streamlit display | |
| filtered_df_pandas = filtered_df.to_pandas() | |
| # Create a line chart using Plotly Express with multiple lines | |
| fig = px.line(filtered_df_pandas, x='date', y='daily_high', color='location', | |
| title='Daily High Temperatures by Location') | |
| fig.update_layout( | |
| xaxis_title='Date', | |
| yaxis_title='Daily High Temperature (°F)' | |
| ) | |
| # Show the Plotly chart in Streamlit | |
| st.plotly_chart(fig, use_container_width=True) | |
| # BOX PLOT | |
| hour_df = df.select( | |
| pl.col('location'), | |
| pl.col('datetime'), | |
| pl.col('date'), | |
| pl.col('historical') | |
| ) | |
| df_pandas = hour_df.to_pandas() | |
| fig = px.box( | |
| df_pandas, | |
| x='location', | |
| y='historical', | |
| title='Hourly Temperature Distribution by Location', | |
| labels={'location': 'Location', 'historical': 'Hourly Temperature'} | |
| ) | |
| # Display the boxplot in Streamlit | |
| st.plotly_chart(fig) | |
| # HISTOGRAM | |
| fig = px.histogram( | |
| df_pandas, | |
| x='historical', | |
| facet_col='location', | |
| title='Histogram of Historical Temperatures by Location', | |
| labels={'historical': 'Historical Temperature', 'location': 'Location', 'count':'Frequency'}, | |
| nbins=30 # Adjust the number of bins as needed | |
| ) | |
| fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1])) | |
| # Display the faceted histogram in Streamlit | |
| st.plotly_chart(fig) | |
| # MAX VALUE | |
| # Convert Polars DataFrame to Pandas for Streamlit use | |
| # Create date range selection widget with unique key | |
| # Filter data based on user input | |
| filtered_df = kpi_streamlit[ | |
| (kpi_streamlit['location'] == selected_city) & | |
| (kpi_streamlit['date'] >= pd.to_datetime(selected_dates[0])) & | |
| (kpi_streamlit['date'] <= pd.to_datetime(selected_dates[1])) | |
| ] | |
| # Get the maximum value for the selected metric | |
| max_value = filtered_df[selected_metric].max() | |
| # Create and display a gauge chart using Plotly Express | |
| fig = px.bar( | |
| x=[selected_metric.replace('_', ' ').title()], | |
| y=[max_value], | |
| labels={'x': selected_metric.replace('_', ' ').title(), 'y': 'Value'}, | |
| title=f"Max {selected_metric.replace('_', ' ').title()}", | |
| color_discrete_sequence=['darkblue'] | |
| ) | |
| # Customize the layout to make it look like a gauge | |
| fig.update_layout( | |
| xaxis=dict( | |
| tickvals=[], | |
| title='' | |
| ), | |
| yaxis=dict( | |
| tickvals=[], | |
| title='', | |
| range=[0, max_value * 1.2] | |
| ), | |
| plot_bgcolor='white' | |
| ) | |
| import plotly.graph_objects as go | |
| fig = go.Figure(go.Indicator( | |
| mode = "gauge+number", | |
| value = max_value, | |
| domain = {'x': [0, 1], 'y': [0, 1]}, | |
| title={'text': f"Max {selected_metric.replace('_', ' ').title()}: {max_value}"})) | |
| # MIN VALUE | |
| min_value = filtered_df[selected_metric].min() | |
| # Create and display a gauge chart using Plotly Express | |
| thing = px.bar( | |
| x=[selected_metric.replace('_', ' ').title()], | |
| y=[min_value], | |
| labels={'x': selected_metric.replace('_', ' ').title(), 'y': 'Value'}, | |
| title=f"Min {selected_metric.replace('_', ' ').title()}", | |
| color_discrete_sequence=['darkblue'] | |
| ) | |
| # Customize the layout to make it look like a gauge | |
| thing.update_layout( | |
| xaxis=dict( | |
| tickvals=[], | |
| title='' | |
| ), | |
| yaxis=dict( | |
| tickvals=[], | |
| title='', | |
| range=[0, min_value * 1.2] | |
| ), | |
| plot_bgcolor='white' | |
| ) | |
| import plotly.graph_objects as go | |
| thing = go.Figure(go.Indicator( | |
| mode = "gauge+number", | |
| value = min_value, | |
| domain = {'x': [0, 1], 'y': [0, 1]}, | |
| title={'text': f"Min {selected_metric.replace('_', ' ').title()}: {min_value}"})) | |
| thing = go.Figure(go.Indicator( | |
| mode = "gauge+number", | |
| value = min_value, | |
| domain = {'x': [0, 1], 'y': [0, 1]}, | |
| title={'text': f"Min {selected_metric.replace('_', ' ').title()}: {min_value}"}, | |
| gauge={ | |
| 'axis': {'range': [None, min_value * 1.2]}, | |
| 'bar': {'color': 'red'} | |
| } | |
| )) | |
| # MAX & MIN DISPLAY | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.plotly_chart(fig) | |
| with col2: | |
| st.plotly_chart(thing) | |
| # Additional Inputs | |
| st.title("Average Temperature by City") | |
| # Create day of the week slicer | |
| # Filter data based on the selected day of the week | |
| filtered_df = dow_data[dow_data['day_of_week'] == selected_day] | |
| # Calculate average temperature for each city | |
| avg_temp_per_city = filtered_df.groupby('location')['historical'].mean().reset_index() | |
| # Create bar chart | |
| my_chart = px.bar( | |
| avg_temp_per_city, | |
| x='location', | |
| y='historical', | |
| labels={'location': 'City', 'historical': 'Average Temperature (°F)'}, | |
| title=f"Average Temperature for Each City on {selected_day}", | |
| color='historical', | |
| color_continuous_scale=px.colors.sequential.Plasma | |
| ) | |
| # Show the bar chart | |
| # st.plotly_chart(fig) | |
| # Conditions Visualizations | |
| conditions_data = df.to_pandas() | |
| conditions_data['day_of_week'] = pd.Categorical(conditions_data['day_of_week'], categories=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], ordered=True) | |
| # Filter the DataFrame for the selected weather variable and calculate the average for each day of the week | |
| filtered_df = conditions_data.groupby(['day_of_week', 'location'])[selected_variable].mean().reset_index() | |
| # Create a line chart | |
| bobby = px.line( | |
| filtered_df, | |
| x='day_of_week', | |
| y=selected_variable, | |
| color='location', | |
| title=f"Average {selected_variable.replace('_', ' ').title()} by Day of the Week", | |
| labels={selected_variable: f'Average {selected_variable.replace("_", " ").title()}'} | |
| ) | |
| # Ensure the x-axis has the correct order for days of the week | |
| bobby.update_xaxes( | |
| title = 'Day of Week', | |
| categoryorder='array', | |
| categoryarray=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']) | |
| # Display the line chart | |
| # st.plotly_chart(bobby) | |
| # display last 2 visualizations | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.plotly_chart(my_chart) | |
| with col2: | |
| st.plotly_chart(bobby) |