import streamlit as st
import pandas as pd
import polars as pl
import requests
from datetime import datetime, timedelta
import plotly.express as px

# Create a sample DataFrame
data = {
    'city': ['Rexburg', 'Rexburg', 'Rexburg', 'Provo', 'Provo', 'Laie', 'Laie'],
    'date': ['2024-07-01', '2024-07-01', '2024-07-02', '2024-07-01', '2024-07-01', '2024-07-01', '2024-07-01'],
    'hour': [0, 1, 0, 0, 1, 0, 1],
    'temperature': [15, 14, 16, 20, 19, 25, 24]
}

# Create Polars DataFrame
df = pl.DataFrame(data)

# Convert date column to datetime
df = df.with_columns(pl.col("date").str.strptime(pl.Date, "%Y-%m-%d"))

# HISTORICAL FORECAST

# Define the locations with their respective latitude and longitude
locations = {
    "Rexburg": {"latitude": 43.8260, "longitude": -111.7897},
    "Provo": {"latitude": 40.2338, "longitude": -111.6585},
    "Laie": {"latitude": 21.6478, "longitude": -157.9234}
}

# Function to get historical forecast data
def get_historical_forecast_data(location, latitude, longitude, start_date, end_date):
    api_url = "https://api.open-meteo.com/v1/forecast"
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": start_date,
        "end_date": end_date,
        "hourly": "temperature_2m",
        "timezone": "America/Denver"  # Adjust timezone as needed
    }
    
    response = requests.get(api_url, params=params)
    data = response.json()
    
    # Extract hourly data
    hourly_data = data['hourly']
    timestamps = hourly_data['time']
    temperatures = hourly_data['temperature_2m']
    
    # Create DataFrame for historical forecast data
    historical_forecast_df = pl.DataFrame({
        "location": location,
        "timestamp": timestamps,
        "temperature_2m": temperatures,
        "data_type": "historical forecast"  # Label the data as historical forecast
    })
    
    return historical_forecast_df

# Define the date range for historical data
start_date = "2024-06-01"
end_date = "2024-07-15"

# Fetch and concatenate historical forecast data for all locations
forecast_dfs = [get_historical_forecast_data(loc, info['latitude'], info['longitude'], start_date, end_date) for loc, info in locations.items()]
forecast_combined_df = pl.concat(forecast_dfs)

# Process timestamp to extract date, day of the week, and hour of day
forecast_combined_df = forecast_combined_df.with_columns([
    pl.col("timestamp").str.strptime(pl.Datetime).alias("datetime"),
    pl.col("timestamp").str.strptime(pl.Datetime).dt.date().alias("date"),
    pl.col("timestamp").str.strptime(pl.Datetime).dt.weekday().alias("day_of_week"),
    pl.col("timestamp").str.strptime(pl.Datetime).dt.hour().alias("hour_of_day"),
    (pl.col("temperature_2m") * (9 / 5) + 32).alias("temperature_2m")
])

# Select and reorder columns
forecast_combined_df = forecast_combined_df.select([
    "location", "datetime", "date", "day_of_week", "hour_of_day", "temperature_2m", "data_type"
])

# Show the updated DataFrame
print(forecast_combined_df)


# HISTORICAL DATA

locations = {
    "Rexburg": {"latitude": 43.8260, "longitude": -111.7897},
    "Provo": {"latitude": 40.2338, "longitude": -111.6585},
    "Laie": {"latitude": 21.6478, "longitude": -157.9234}
}

# Function to get historical weather data
def get_historical_weather_data(location, latitude, longitude, start_date, end_date):
    api_url = "https://api.open-meteo.com/v1/forecast"
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": start_date,
        "end_date": end_date,
        "hourly": "temperature_2m,dewpoint_2m,wind_gusts_10m,visibility,cloudcover,precipitation_probability,relative_humidity_2m,sunshine_duration,vapour_pressure_deficit,rain,soil_temperature_0_to_7cm",
        "timezone": "America/Denver"  # Adjust timezone as needed
    }
    
    response = requests.get(api_url, params=params)
    data = response.json()
    
    # Check if 'hourly' data is available
    if 'hourly' not in data:
        raise KeyError(f"'hourly' key not found in API response for {location}")
    
    # Extract hourly data
    hourly_data = data['hourly']
    timestamps = hourly_data['time']
    temperatures = hourly_data.get('temperature_2m', [])
    dewpoints = hourly_data.get('dewpoint_2m', [])
    wind_gusts = hourly_data.get('wind_gusts_10m', [])
    visibility = hourly_data.get('visibility', [])
    cloud_cover = hourly_data.get('cloudcover', [])
    precipitation_prob = hourly_data.get('precipitation_probability', [])
    relative_humidity = hourly_data.get('relative_humidity_2m', [])
    sunshine_duration = hourly_data.get('sunshine_duration', [])
    vapor_pressure = hourly_data.get('vapour_pressure_deficit',[])
    rain = hourly_data.get('rain', [])
    soil_temp = hourly_data.get('soil_temperature_0_to_7cm',[])
    
    # Create DataFrame for historical weather data
    historical_weather_df = pl.DataFrame({
        "location": location,
        "timestamp": timestamps,
        "temperature_2m": temperatures,
        "dewpoint_2m": dewpoints,
        "wind_gusts_10m": wind_gusts,
        "visibility": visibility,
        "cloudcover": cloud_cover,
        "precipitation_probability": precipitation_prob,
        "relative_humidity_2m": relative_humidity,
        "sunshine_duration": sunshine_duration,
        "vapor_pressure": vapor_pressure,
        "rain": rain,
        "soil_temp": soil_temp,
        "data_type": "historical"  # Label the data as historical
    })
    
    return historical_weather_df

# Define the date range for historical data
start_date = "2024-06-01"
end_date = "2024-07-15"

# Fetch and concatenate historical weather data for all locations
data_frames = []
for loc, info in locations.items():
    try:
        df = get_historical_weather_data(loc, info['latitude'], info['longitude'], start_date, end_date)
        data_frames.append(df)
    except KeyError as e:
        print(e)
        continue

if data_frames:
    combined_df = pl.concat(data_frames)
else:
    raise ValueError("No data fetched for any location.")

# Process timestamp to extract date, day of the week, and hour of day
combined_df = combined_df.with_columns([
    pl.col("timestamp").str.strptime(pl.Datetime).alias("datetime"),
    pl.col("timestamp").str.strptime(pl.Datetime).dt.date().alias("date"),
    pl.col("timestamp").str.strptime(pl.Datetime).dt.weekday().alias("day_of_week"),
    pl.col("timestamp").str.strptime(pl.Datetime).dt.hour().alias("hour_of_day"),
    (pl.col("temperature_2m") * (9 / 5) + 32).alias("temperature_2m_f")  # Convert temperature to Fahrenheit
])

# Select and reorder columns
weather_combined_df = combined_df.select([
    "location", "datetime", "date", "day_of_week", "hour_of_day", "temperature_2m_f", "dewpoint_2m", "wind_gusts_10m", "visibility", "cloudcover", "precipitation_probability", "relative_humidity_2m", "sunshine_duration",'vapor_pressure','rain' ,'soil_temp',"data_type"
])

# Show the updated DataFrame
print(weather_combined_df)


# COMBINING DATA

df = forecast_combined_df.join(
    weather_combined_df,
    left_on=["location",'date','day_of_week','hour_of_day'],
    right_on=["location",'date','day_of_week','hour_of_day'],
    how="inner"
)

day_name_map = {0: "Monday", 1: "Tuesday", 2: "Wednesday", 3: "Thursday", 4: "Friday", 5: "Saturday", 6: "Sunday"}

df = df.with_columns([
    pl.col("temperature_2m").alias("historical_forecast"),
    pl.col("temperature_2m_f").alias("historical"),
    pl.col('date').dt.weekday().map_dict(day_name_map).alias('day_of_week')
])

df= df.drop(["temperature_2m", "temperature_2m_f",'data_type','data_type_right'])


# CITY TABLES

rexburg = df.filter(pl.col('location') == 'Rexburg')
rexburg = rexburg.select([
    pl.col('date').alias('Date'),
    pl.col('hour_of_day').alias('Hour'),
    pl.col('historical_forecast').alias('Historical_Forecast'),
    pl.col('historical').alias('Historical')
])

laie = df.filter(pl.col('location') == 'Laie')
laie = laie.select([
    pl.col('date').alias('Date'),
    pl.col('hour_of_day').alias('Hour'),
    pl.col('historical_forecast').alias('Historical_Forecast'),
    pl.col('historical').alias('Historical')
])

provo = df.filter(pl.col("location") == 'Provo')
provo = provo.select([
    pl.col('date').alias('Date'),
    pl.col('hour_of_day').alias('Hour'),
    pl.col('historical_forecast').alias('Historical_Forecast'),
    pl.col('historical').alias('Historical')
])


# Sidebar

df_streamlit_select = df.groupby('location','date').agg(
    pl.col('historical').max().alias('daily_high')
)

df_streamlit_select = df_streamlit_select.sort(['location', 'date'])


# STREAMLIT TABLES

rexburg_streamlit = rexburg.to_pandas()
laie_streamlit = laie.to_pandas()
provo_streamlit = provo.to_pandas()

def main():
    st.title("Weather Data: Historical Vs Historical Forecast")

    # Create three columns for side-by-side display
    col1, col2, col3 = st.columns(3)

    # Display each DataFrame in its respective column
    with col1:
        st.write("### Rexburg Data Table")
        st.dataframe(rexburg_streamlit)

    with col2:
        st.write("### Laie Data Table")
        st.dataframe(laie_streamlit)

    with col3:
        st.write("### Provo Data Table")
        st.dataframe(provo_streamlit)

if __name__ == "__main__":
    main()


# ALL CITIES

all_cities = df.select([
    pl.col('location').alias('City'),
    pl.col('date').alias('Date'),
    pl.col('hour_of_day').alias('Hour'),
    pl.col('historical').alias('Temperature')
])

all_cities = all_cities.sort(by = ['Date','Hour'])

cities_streamlit = all_cities.to_pandas()


# SIDE BAR
    
st.sidebar.title("Filters")

df_streamlit_select = df.groupby('location','date').agg(
    pl.col('historical').max().alias('daily_high')
)

df_streamlit_select = df_streamlit_select.sort(['location', 'date'])

kpi_streamlit = df.to_pandas()


# Create date range selection widget with unique key

date_min = kpi_streamlit['date'].min()
date_max = kpi_streamlit['date'].max()

# Create city selection widget
cities = kpi_streamlit['location'].unique()
selected_city = st.sidebar.selectbox('Select a city', cities, key='city_selector')

date_min = kpi_streamlit['date'].min()
date_max = kpi_streamlit['date'].max()
selected_dates = st.sidebar.date_input('Select start and end date', [date_min, date_max], key='date_range_selector')

metrics = [
    'dewpoint_2m',
    'wind_gusts_10m',
    'visibility',
    'cloudcover',
    'precipitation',
    'relative_humidity_2m',
    'sunshine_duration',
    'vapor_pressure',
    'rain',
    'soil_temp'
]


selected_metric = st.sidebar.selectbox('Select a metric', metrics, key='metric_selector')

dow_data = df.to_pandas()

days_of_week = dow_data['day_of_week'].unique()
selected_day = st.sidebar.selectbox('Select a day of the week', days_of_week)


# Create a widget for selecting the weather variable
weather_variables = [
    "dewpoint_2m", "wind_gusts_10m", "visibility",
    "cloudcover", "precipitation_probability",
     "relative_humidity_2m", "sunshine_duration", 
       'vapor_pressure','soil_temp',
]
selected_variable = st.sidebar.selectbox('Select a weather variable', weather_variables)


##### Interactive Dashboard

# Line Chart


df_pandas = df_streamlit_select.to_pandas()


# Create date range selection widget
date_min = df_pandas['date'].min()
date_max = df_pandas['date'].max()
selected_dates = st.date_input('Select date range', [date_min, date_max])

# Filter data based on user input
filtered_df = df_streamlit_select.filter(
    (pl.col('date') >= pl.lit(pd.to_datetime(selected_dates[0]))) &
    (pl.col('date') <= pl.lit(pd.to_datetime(selected_dates[1])))
)

# Convert filtered Polars DataFrame to Pandas DataFrame for Streamlit display
filtered_df_pandas = filtered_df.to_pandas()

# Create a line chart using Plotly Express with multiple lines
fig = px.line(filtered_df_pandas, x='date', y='daily_high', color='location', 
              title='Daily High Temperatures by Location')

fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Daily High Temperature (°F)'
)

# Show the Plotly chart in Streamlit
st.plotly_chart(fig, use_container_width=True)


# BOX PLOT

hour_df = df.select(
    pl.col('location'),
    pl.col('datetime'),
    pl.col('date'),
    pl.col('historical')
)

df_pandas = hour_df.to_pandas()

fig = px.box(
    df_pandas,
    x='location',
    y='historical',
    title='Hourly Temperature Distribution by Location',
    labels={'location': 'Location', 'historical': 'Hourly Temperature'}
)

# Display the boxplot in Streamlit
st.plotly_chart(fig)


# HISTOGRAM

fig = px.histogram(
    df_pandas,
    x='historical',
    facet_col='location',
    title='Histogram of Historical Temperatures by Location',
    labels={'historical': 'Historical Temperature', 'location': 'Location', 'count':'Frequency'},
    nbins=30  # Adjust the number of bins as needed
)

fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))

# Display the faceted histogram in Streamlit
st.plotly_chart(fig)


# MAX VALUE

# Convert Polars DataFrame to Pandas for Streamlit use

# Create date range selection widget with unique key

# Filter data based on user input
filtered_df = kpi_streamlit[
    (kpi_streamlit['location'] == selected_city) &
    (kpi_streamlit['date'] >= pd.to_datetime(selected_dates[0])) &
    (kpi_streamlit['date'] <= pd.to_datetime(selected_dates[1]))
]

# Get the maximum value for the selected metric
max_value = filtered_df[selected_metric].max()

# Create and display a gauge chart using Plotly Express
fig = px.bar(
    x=[selected_metric.replace('_', ' ').title()],
    y=[max_value],
    labels={'x': selected_metric.replace('_', ' ').title(), 'y': 'Value'},
    title=f"Max {selected_metric.replace('_', ' ').title()}",
    color_discrete_sequence=['darkblue']
)

# Customize the layout to make it look like a gauge
fig.update_layout(
    xaxis=dict(
        tickvals=[],
        title=''
    ),
    yaxis=dict(
        tickvals=[],
        title='',
        range=[0, max_value * 1.2]
    ),
    plot_bgcolor='white'
)

import plotly.graph_objects as go

fig = go.Figure(go.Indicator(
    mode = "gauge+number",
    value = max_value,
    domain = {'x': [0, 1], 'y': [0, 1]},
    title={'text': f"Max {selected_metric.replace('_', ' ').title()}: {max_value}"}))


# MIN VALUE

min_value = filtered_df[selected_metric].min()

# Create and display a gauge chart using Plotly Express
thing = px.bar(
    x=[selected_metric.replace('_', ' ').title()],
    y=[min_value],
    labels={'x': selected_metric.replace('_', ' ').title(), 'y': 'Value'},
    title=f"Min {selected_metric.replace('_', ' ').title()}",
    color_discrete_sequence=['darkblue']
)

# Customize the layout to make it look like a gauge
thing.update_layout(
    xaxis=dict(
        tickvals=[],
        title=''
    ),
    yaxis=dict(
        tickvals=[],
        title='',
        range=[0, min_value * 1.2]
    ),
    plot_bgcolor='white'
)

import plotly.graph_objects as go

thing = go.Figure(go.Indicator(
    mode = "gauge+number",
    value = min_value,
    domain = {'x': [0, 1], 'y': [0, 1]},
    title={'text': f"Min {selected_metric.replace('_', ' ').title()}: {min_value}"}))

thing = go.Figure(go.Indicator(
    mode = "gauge+number",
    value = min_value,
    domain = {'x': [0, 1], 'y': [0, 1]},
    title={'text': f"Min {selected_metric.replace('_', ' ').title()}: {min_value}"},
    gauge={
        'axis': {'range': [None, min_value * 1.2]},
        'bar': {'color': 'red'}
    }
))


# MAX & MIN DISPLAY

col1, col2 = st.columns(2)

with col1:
    st.plotly_chart(fig)

with col2:
    st.plotly_chart(thing)


# Additional Inputs


st.title("Average Temperature by City")

    # Create day of the week slicer

    # Filter data based on the selected day of the week
filtered_df = dow_data[dow_data['day_of_week'] == selected_day]

    # Calculate average temperature for each city
avg_temp_per_city = filtered_df.groupby('location')['historical'].mean().reset_index()

    # Create bar chart
my_chart = px.bar(
        avg_temp_per_city,
        x='location',
        y='historical',
        labels={'location': 'City', 'historical': 'Average Temperature (°F)'},
        title=f"Average Temperature for Each City on {selected_day}",
        color='historical',
        color_continuous_scale=px.colors.sequential.Plasma
    )

    # Show the bar chart
   # st.plotly_chart(fig)


# Conditions Visualizations

conditions_data = df.to_pandas()

conditions_data['day_of_week'] = pd.Categorical(conditions_data['day_of_week'], categories=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], ordered=True)


# Filter the DataFrame for the selected weather variable and calculate the average for each day of the week
filtered_df = conditions_data.groupby(['day_of_week', 'location'])[selected_variable].mean().reset_index()

# Create a line chart
bobby = px.line(
    filtered_df,
    x='day_of_week',
    y=selected_variable,
    color='location',
    title=f"Average {selected_variable.replace('_', ' ').title()} by Day of the Week",
    labels={selected_variable: f'Average {selected_variable.replace("_", " ").title()}'}
)

# Ensure the x-axis has the correct order for days of the week
bobby.update_xaxes(
    title = 'Day of Week',
    categoryorder='array',
    categoryarray=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])


# Display the line chart
# st.plotly_chart(bobby)


# display last 2 visualizations

col1, col2 = st.columns(2)

with col1:
    st.plotly_chart(my_chart)

with col2:
    st.plotly_chart(bobby)