Spaces:
Sleeping
Sleeping
| import openmeteo_requests | |
| import requests_cache | |
| from retry_requests import retry | |
| import streamlit as st | |
| import altair as alt | |
| import numpy as np | |
| import pandas as pd | |
| import time | |
| # 1. Data Extraction and Data Transformation | |
| # Global Variables to pre-define the fixed input params | |
| global air_quality_vars, lat, lng, us_aqi | |
| air_quality_vars = ["pm10", "pm2_5", "carbon_monoxide", | |
| "carbon_dioxide", "nitrogen_dioxide", "sulphur_dioxide", "ozone", | |
| "aerosol_optical_depth", "dust", "uv_index", "uv_index_clear_sky", | |
| "ammonia", "methane", "alder_pollen", "birch_pollen", | |
| "grass_pollen", "mugwort_pollen", "olive_pollen", "ragweed_pollen"] | |
| lat = 40.11 | |
| lng = -88.24 | |
| aqi_dict = { | |
| (0, 50): ("Good", "green"), | |
| (51, 100): ("Moderate", "yellow"), | |
| (101, 150): ("Unhealthy for Sensitive Groups", "orange"), | |
| (151, 200): ("Unhealthy", "red"), | |
| (201, 300): ("Very Unhealthy", "purple"), | |
| (301, 500): ("Hazardous", "maroon"), | |
| } | |
| image_url = 'https://mayor.dc.gov/sites/default/files/dc/sites/mayormb/release_content/images/AQ-June9.png' | |
| # Request air quality data | |
| def air_api_request(): | |
| cache_session = requests_cache.CachedSession('.cache', expire_after = 3600) | |
| retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2) | |
| openmeteo = openmeteo_requests.Client(session = retry_session) | |
| url = "https://air-quality-api.open-meteo.com/v1/air-quality" | |
| params = { | |
| "latitude": lat, | |
| "longitude": lng, | |
| "current": "us_aqi", | |
| "hourly": air_quality_vars, | |
| "timezone": "America/Chicago", | |
| "forecast_days": 1, | |
| "domains": "cams_global" | |
| } | |
| responses = openmeteo.weather_api(url, params=params) | |
| response = responses[0] | |
| current = response.Current() | |
| current_us_aqi = current.Variables(0).Value() | |
| hourly = response.Hourly() | |
| hourly_pm10 = hourly.Variables(0).ValuesAsNumpy() | |
| hourly_pm2_5 = hourly.Variables(1).ValuesAsNumpy() | |
| hourly_carbon_monoxide = hourly.Variables(2).ValuesAsNumpy() | |
| hourly_carbon_dioxide = hourly.Variables(3).ValuesAsNumpy() | |
| hourly_nitrogen_dioxide = hourly.Variables(4).ValuesAsNumpy() | |
| hourly_sulphur_dioxide = hourly.Variables(5).ValuesAsNumpy() | |
| hourly_ozone = hourly.Variables(6).ValuesAsNumpy() | |
| hourly_aerosol_optical_depth = hourly.Variables(7).ValuesAsNumpy() | |
| hourly_dust = hourly.Variables(8).ValuesAsNumpy() | |
| hourly_uv_index = hourly.Variables(9).ValuesAsNumpy() | |
| hourly_uv_index_clear_sky = hourly.Variables(10).ValuesAsNumpy() | |
| hourly_ammonia = hourly.Variables(11).ValuesAsNumpy() | |
| hourly_methane = hourly.Variables(12).ValuesAsNumpy() | |
| hourly_alder_pollen = hourly.Variables(13).ValuesAsNumpy() | |
| hourly_birch_pollen = hourly.Variables(14).ValuesAsNumpy() | |
| hourly_grass_pollen = hourly.Variables(15).ValuesAsNumpy() | |
| hourly_mugwort_pollen = hourly.Variables(16).ValuesAsNumpy() | |
| hourly_olive_pollen = hourly.Variables(17).ValuesAsNumpy() | |
| hourly_ragweed_pollen = hourly.Variables(18).ValuesAsNumpy() | |
| hourly_data = { | |
| "date": pd.date_range( | |
| start=pd.to_datetime(hourly.Time(), unit="s", utc=True), | |
| end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True) - pd.Timedelta(seconds=1), | |
| freq=pd.Timedelta(seconds=hourly.Interval()) | |
| )} | |
| hourly_data["pm10"] = hourly_pm10 | |
| hourly_data["pm2_5"] = hourly_pm2_5 | |
| hourly_data["carbon_monoxide"] = hourly_carbon_monoxide | |
| hourly_data["carbon_dioxide"] = hourly_carbon_dioxide | |
| hourly_data["nitrogen_dioxide"] = hourly_nitrogen_dioxide | |
| hourly_data["sulphur_dioxide"] = hourly_sulphur_dioxide | |
| hourly_data["ozone"] = hourly_ozone | |
| hourly_data["aerosol_optical_depth"] = hourly_aerosol_optical_depth | |
| hourly_data["dust"] = hourly_dust | |
| hourly_data["uv_index"] = hourly_uv_index | |
| hourly_data["uv_index_clear_sky"] = hourly_uv_index_clear_sky | |
| hourly_data["ammonia"] = hourly_ammonia | |
| hourly_data["methane"] = hourly_methane | |
| hourly_data["alder_pollen"] = hourly_alder_pollen | |
| hourly_data["birch_pollen"] = hourly_birch_pollen | |
| hourly_data["grass_pollen"] = hourly_grass_pollen | |
| hourly_data["mugwort_pollen"] = hourly_mugwort_pollen | |
| hourly_data["olive_pollen"] = hourly_olive_pollen | |
| hourly_data["ragweed_pollen"] = hourly_ragweed_pollen | |
| hourly_dataframe = pd.DataFrame(data = hourly_data) | |
| return hourly_dataframe, current_us_aqi | |
| # 2. Refresh Button | |
| st.title('Streamlit App for Final Project Group 1') | |
| st.markdown('**Group Member: Jingyi Huang, Ethan Shin, Mihir Sahasrabudhe, Arjav Malay Parekh, Yu Huang**') | |
| st.markdown('Air Quality Analysis for ***Champaign, IL*** (latitude, longitude: **40.11,-88.24**)') | |
| # Initial Request | |
| df_air_quality, us_aqi= air_api_request() | |
| df_air_quality['datetime'] = pd.to_datetime(df_air_quality['date']).dt.strftime('%d %b %H:%M') | |
| def refresh_data(): | |
| global df_air_quality | |
| df_air_quality, us_aqi= air_api_request() | |
| df_air_quality['datetime'] = pd.to_datetime(df_air_quality['date']).dt.strftime('%d %b %H:%M') | |
| st.subheader('Updated Air Quality Data') | |
| if st.button('Refresh Data', type='primary'): | |
| refresh_data() | |
| try: | |
| alert = st.success('Data refreshed successfully! The alert will disappear after 3 seconds.') | |
| time.sleep(3) | |
| alert.empty() | |
| except Exception as e: | |
| st.error(f'An error occurred: {e}') | |
| st.dataframe(df_air_quality[['datetime'] + air_quality_vars]) | |
| # 3. Data Visulizations | |
| ## US Air Quality Index Scale | |
| category = "Unknown" | |
| color = "black" | |
| for range_, (cat, col) in aqi_dict.items(): | |
| if range_[0] <= us_aqi <= range_[1]: | |
| category, color = cat, col | |
| break | |
| st.subheader('US Air Quality Index (AQI)') | |
| st.markdown( | |
| f"#### Today's Air Quality Index in Champaign, IL is <span style='color:{color}'>{us_aqi:.2f} ({category})</span> ####", | |
| unsafe_allow_html=True, | |
| ) | |
| st.image(image_url, caption="Air Quality Index Scale", use_container_width=True) | |
| st.markdown( | |
| """ | |
| District of Columbia Mayor’s Office. (2023, June 9). *Air quality continues to improve in DC: Air Quality Index now downgraded to Code Yellow.* Retrieved November 30, 2024, from [https://mayor.dc.gov/release/air-quality-continues-improve-dc-air-quality-index-now-downgraded-code-yellow](https://mayor.dc.gov/release/air-quality-continues-improve-dc-air-quality-index-now-downgraded-code-yellow) | |
| """ | |
| ) | |
| # ----------------------------------------------------------- | |
| # Additional Trial and Error Visualizations (Unpolished/Cluttered) | |
| # Place this code block at the bottom of your existing code | |
| # ----------------------------------------------------------- | |
| import altair as alt | |
| st.markdown(""" | |
| ### Early Trial Visualizations (Cluttered Prototypes) | |
| Below are some of our initial attempts at visualizing all pollutants together. | |
| These attempts are intentionally left here to demonstrate the "scaffolding" nature | |
| of our work. They are cluttered and not very user-friendly, but they show our trial-and-error process. | |
| """) | |
| # Melt the dataframe to a long format for plotting multiple pollutants at once | |
| long_df = df_air_quality.melt(id_vars="datetime", value_vars=air_quality_vars, | |
| var_name="Pollutant", value_name="Concentration") | |
| # Attempt 1: A single line chart with ALL pollutants at once | |
| # This leads to a very cluttered chart where it's hard to distinguish individual lines. | |
| st.markdown("#### Attempt 1: All Pollutants in One Line Chart") | |
| all_in_one_line = alt.Chart(long_df).mark_line().encode( | |
| x=alt.X('datetime:O', title='Date and Time'), | |
| y=alt.Y('Concentration:Q', title='Concentration'), | |
| color=alt.Color('Pollutant:N', legend=alt.Legend(title='Pollutant')), | |
| tooltip=['datetime', 'Pollutant', 'Concentration'] | |
| ).properties( | |
| width=700, | |
| height=400, | |
| title="A Very Overcrowded Line Chart" | |
| ) | |
| st.altair_chart(all_in_one_line, use_container_width=True) | |
| st.markdown(""" | |
| *As you can see, this single chart becomes difficult to interpret due to the sheer | |
| number of lines and colors overlapping. While it technically "works," it doesn't provide | |
| clear insights at a glance.* | |
| """) | |
| # Attempt 2: A scatter plot of all pollutants over time | |
| # Again, this will be cluttered. Each pollutant on the same time axis, different colors. | |
| # With so many pollutants, the chart becomes a mass of points. | |
| st.markdown("#### Attempt 2: Scatter Plot of All Pollutants Over Time") | |
| all_in_one_scatter = alt.Chart(long_df).mark_circle(size=40).encode( | |
| x=alt.X('datetime:O', title='Date and Time'), | |
| y=alt.Y('Concentration:Q', title='Concentration'), | |
| color=alt.Color('Pollutant:N', legend=alt.Legend(title='Pollutant')), | |
| tooltip=['datetime', 'Pollutant', 'Concentration'] | |
| ).properties( | |
| width=700, | |
| height=400, | |
| title="Scatter Plot with All Pollutants" | |
| ) | |
| st.altair_chart(all_in_one_scatter, use_container_width=True) | |
| st.markdown(""" | |
| *This scatter plot presents all pollutants simultaneously as well. While we can see | |
| some variance in concentration over time, the chart is noisy and doesn't direct the user | |
| to any immediate insights. It's a good reminder that "more data on one chart" | |
| does not always mean "more understanding."* | |
| """) | |
| st.markdown("#### Attempt 3: Bar Chart of All Pollutants at a Single Timestamp") | |
| first_time = df_air_quality['datetime'].iloc[0] | |
| single_point_data = long_df[long_df['datetime'] == first_time] | |
| all_in_one_bar = alt.Chart(single_point_data).mark_bar().encode( | |
| x=alt.X('Pollutant:N', sort=None, title='Pollutant'), | |
| y=alt.Y('Concentration:Q', title='Concentration'), | |
| tooltip=['Pollutant', 'Concentration'] | |
| ).properties( | |
| width=700, | |
| height=400, | |
| title=f"Bar Chart at {first_time}" | |
| ) | |
| st.altair_chart(all_in_one_bar, use_container_width=True) | |
| st.markdown(""" | |
| *At a single timestamp, a bar chart of all pollutants quickly becomes unwieldy if | |
| we have too many pollutants. Even though it's simpler than a time-series plot, | |
| it's still not very informative due to the volume of categories.* | |
| --- | |
| These attempts illustrate that while we can technically display all the data | |
| at once, it's not always the most practical or insightful approach. | |
| This helps us understand which visualizations to refine | |
| and which ones to discard or simplify in future iterations. | |
| """) | |