IS445_Final / app.py
mihir-s's picture
refined the plots
9636c0b verified
import openmeteo_requests
import requests_cache
from retry_requests import retry
import streamlit as st
import altair as alt
import numpy as np
import pandas as pd
import time
# 1. Data Extraction and Data Transformation
# Global Variables to pre-define the fixed input params
global air_quality_vars, lat, lng, us_aqi
air_quality_vars = ["pm10", "pm2_5", "carbon_monoxide",
"carbon_dioxide", "nitrogen_dioxide", "sulphur_dioxide", "ozone",
"aerosol_optical_depth", "dust", "uv_index", "uv_index_clear_sky",
"ammonia", "methane", "alder_pollen", "birch_pollen",
"grass_pollen", "mugwort_pollen", "olive_pollen", "ragweed_pollen"]
lat = 40.11
lng = -88.24
aqi_dict = {
(0, 50): ("Good", "green"),
(51, 100): ("Moderate", "yellow"),
(101, 150): ("Unhealthy for Sensitive Groups", "orange"),
(151, 200): ("Unhealthy", "red"),
(201, 300): ("Very Unhealthy", "purple"),
(301, 500): ("Hazardous", "maroon"),
}
image_url = 'https://mayor.dc.gov/sites/default/files/dc/sites/mayormb/release_content/images/AQ-June9.png'
# Request air quality data
def air_api_request():
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)
url = "https://air-quality-api.open-meteo.com/v1/air-quality"
params = {
"latitude": lat,
"longitude": lng,
"current": "us_aqi",
"hourly": air_quality_vars,
"timezone": "America/Chicago",
"forecast_days": 1,
"domains": "cams_global"
}
responses = openmeteo.weather_api(url, params=params)
response = responses[0]
current = response.Current()
current_us_aqi = current.Variables(0).Value()
hourly = response.Hourly()
hourly_pm10 = hourly.Variables(0).ValuesAsNumpy()
hourly_pm2_5 = hourly.Variables(1).ValuesAsNumpy()
hourly_carbon_monoxide = hourly.Variables(2).ValuesAsNumpy()
hourly_carbon_dioxide = hourly.Variables(3).ValuesAsNumpy()
hourly_nitrogen_dioxide = hourly.Variables(4).ValuesAsNumpy()
hourly_sulphur_dioxide = hourly.Variables(5).ValuesAsNumpy()
hourly_ozone = hourly.Variables(6).ValuesAsNumpy()
hourly_aerosol_optical_depth = hourly.Variables(7).ValuesAsNumpy()
hourly_dust = hourly.Variables(8).ValuesAsNumpy()
hourly_uv_index = hourly.Variables(9).ValuesAsNumpy()
hourly_uv_index_clear_sky = hourly.Variables(10).ValuesAsNumpy()
hourly_ammonia = hourly.Variables(11).ValuesAsNumpy()
hourly_methane = hourly.Variables(12).ValuesAsNumpy()
hourly_alder_pollen = hourly.Variables(13).ValuesAsNumpy()
hourly_birch_pollen = hourly.Variables(14).ValuesAsNumpy()
hourly_grass_pollen = hourly.Variables(15).ValuesAsNumpy()
hourly_mugwort_pollen = hourly.Variables(16).ValuesAsNumpy()
hourly_olive_pollen = hourly.Variables(17).ValuesAsNumpy()
hourly_ragweed_pollen = hourly.Variables(18).ValuesAsNumpy()
hourly_data = {
"date": pd.date_range(
start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True) - pd.Timedelta(seconds=1),
freq=pd.Timedelta(seconds=hourly.Interval())
)}
hourly_data["pm10"] = hourly_pm10
hourly_data["pm2_5"] = hourly_pm2_5
hourly_data["carbon_monoxide"] = hourly_carbon_monoxide
hourly_data["carbon_dioxide"] = hourly_carbon_dioxide
hourly_data["nitrogen_dioxide"] = hourly_nitrogen_dioxide
hourly_data["sulphur_dioxide"] = hourly_sulphur_dioxide
hourly_data["ozone"] = hourly_ozone
hourly_data["aerosol_optical_depth"] = hourly_aerosol_optical_depth
hourly_data["dust"] = hourly_dust
hourly_data["uv_index"] = hourly_uv_index
hourly_data["uv_index_clear_sky"] = hourly_uv_index_clear_sky
hourly_data["ammonia"] = hourly_ammonia
hourly_data["methane"] = hourly_methane
hourly_data["alder_pollen"] = hourly_alder_pollen
hourly_data["birch_pollen"] = hourly_birch_pollen
hourly_data["grass_pollen"] = hourly_grass_pollen
hourly_data["mugwort_pollen"] = hourly_mugwort_pollen
hourly_data["olive_pollen"] = hourly_olive_pollen
hourly_data["ragweed_pollen"] = hourly_ragweed_pollen
hourly_dataframe = pd.DataFrame(data = hourly_data)
return hourly_dataframe, current_us_aqi
# 2. Refresh Button
st.title('Streamlit App for Final Project Group 1')
st.markdown('**Group Member: Jingyi Huang, Ethan Shin, Mihir Sahasrabudhe, Arjav Malay Parekh, Yu Huang**')
st.markdown('Air Quality Analysis for ***Champaign, IL*** (latitude, longitude: **40.11,-88.24**)')
# Initial Request
df_air_quality, us_aqi= air_api_request()
df_air_quality['datetime'] = pd.to_datetime(df_air_quality['date']).dt.strftime('%d %b %H:%M')
def refresh_data():
global df_air_quality
df_air_quality, us_aqi= air_api_request()
df_air_quality['datetime'] = pd.to_datetime(df_air_quality['date']).dt.strftime('%d %b %H:%M')
st.subheader('Updated Air Quality Data')
if st.button('Refresh Data', type='primary'):
refresh_data()
try:
alert = st.success('Data refreshed successfully! The alert will disappear after 3 seconds.')
time.sleep(3)
alert.empty()
except Exception as e:
st.error(f'An error occurred: {e}')
st.dataframe(df_air_quality[['datetime'] + air_quality_vars])
# 3. Data Visulizations
## US Air Quality Index Scale
category = "Unknown"
color = "black"
for range_, (cat, col) in aqi_dict.items():
if range_[0] <= us_aqi <= range_[1]:
category, color = cat, col
break
st.subheader('US Air Quality Index (AQI)')
st.markdown(
f"#### Today's Air Quality Index in Champaign, IL is <span style='color:{color}'>{us_aqi:.2f} ({category})</span> ####",
unsafe_allow_html=True,
)
st.image(image_url, caption="Air Quality Index Scale", use_container_width=True)
st.markdown(
"""
District of Columbia Mayor’s Office. (2023, June 9). *Air quality continues to improve in DC: Air Quality Index now downgraded to Code Yellow.* Retrieved November 30, 2024, from [https://mayor.dc.gov/release/air-quality-continues-improve-dc-air-quality-index-now-downgraded-code-yellow](https://mayor.dc.gov/release/air-quality-continues-improve-dc-air-quality-index-now-downgraded-code-yellow)
"""
)
# -----------------------------------------------------------
# Additional Trial and Error Visualizations (Unpolished/Cluttered)
# Place this code block at the bottom of your existing code
# -----------------------------------------------------------
import altair as alt
st.markdown("""
### Early Trial Visualizations (Cluttered Prototypes)
Below are some of our initial attempts at visualizing all pollutants together.
These attempts are intentionally left here to demonstrate the "scaffolding" nature
of our work. They are cluttered and not very user-friendly, but they show our trial-and-error process.
""")
# Melt the dataframe to a long format for plotting multiple pollutants at once
long_df = df_air_quality.melt(id_vars="datetime", value_vars=air_quality_vars,
var_name="Pollutant", value_name="Concentration")
# Attempt 1: A single line chart with ALL pollutants at once
# This leads to a very cluttered chart where it's hard to distinguish individual lines.
st.markdown("#### Attempt 1: All Pollutants in One Line Chart")
all_in_one_line = alt.Chart(long_df).mark_line().encode(
x=alt.X('datetime:O', title='Date and Time'),
y=alt.Y('Concentration:Q', title='Concentration'),
color=alt.Color('Pollutant:N', legend=alt.Legend(title='Pollutant')),
tooltip=['datetime', 'Pollutant', 'Concentration']
).properties(
width=700,
height=400,
title="A Very Overcrowded Line Chart"
)
st.altair_chart(all_in_one_line, use_container_width=True)
st.markdown("""
*As you can see, this single chart becomes difficult to interpret due to the sheer
number of lines and colors overlapping. While it technically "works," it doesn't provide
clear insights at a glance.*
""")
# Attempt 2: A scatter plot of all pollutants over time
# Again, this will be cluttered. Each pollutant on the same time axis, different colors.
# With so many pollutants, the chart becomes a mass of points.
st.markdown("#### Attempt 2: Scatter Plot of All Pollutants Over Time")
all_in_one_scatter = alt.Chart(long_df).mark_circle(size=40).encode(
x=alt.X('datetime:O', title='Date and Time'),
y=alt.Y('Concentration:Q', title='Concentration'),
color=alt.Color('Pollutant:N', legend=alt.Legend(title='Pollutant')),
tooltip=['datetime', 'Pollutant', 'Concentration']
).properties(
width=700,
height=400,
title="Scatter Plot with All Pollutants"
)
st.altair_chart(all_in_one_scatter, use_container_width=True)
st.markdown("""
*This scatter plot presents all pollutants simultaneously as well. While we can see
some variance in concentration over time, the chart is noisy and doesn't direct the user
to any immediate insights. It's a good reminder that "more data on one chart"
does not always mean "more understanding."*
""")
st.markdown("#### Attempt 3: Bar Chart of All Pollutants at a Single Timestamp")
first_time = df_air_quality['datetime'].iloc[0]
single_point_data = long_df[long_df['datetime'] == first_time]
all_in_one_bar = alt.Chart(single_point_data).mark_bar().encode(
x=alt.X('Pollutant:N', sort=None, title='Pollutant'),
y=alt.Y('Concentration:Q', title='Concentration'),
tooltip=['Pollutant', 'Concentration']
).properties(
width=700,
height=400,
title=f"Bar Chart at {first_time}"
)
st.altair_chart(all_in_one_bar, use_container_width=True)
st.markdown("""
*At a single timestamp, a bar chart of all pollutants quickly becomes unwieldy if
we have too many pollutants. Even though it's simpler than a time-series plot,
it's still not very informative due to the volume of categories.*
---
These attempts illustrate that while we can technically display all the data
at once, it's not always the most practical or insightful approach.
This helps us understand which visualizations to refine
and which ones to discard or simplify in future iterations.
""")