Spaces:

mihir-s
/

IS445_Final

Sleeping

App Files Files Community

IS445_Final / app.py

mihir-s

refined the plots

9636c0b verified over 1 year ago

raw

history blame contribute delete

10.3 kB

	import openmeteo_requests

	import requests_cache
	from retry_requests import retry

	import streamlit as st
	import altair as alt

	import numpy as np
	import pandas as pd
	import time


	# 1. Data Extraction and Data Transformation

	# Global Variables to pre-define the fixed input params
	global air_quality_vars, lat, lng, us_aqi
	air_quality_vars = ["pm10", "pm2_5", "carbon_monoxide",
	"carbon_dioxide", "nitrogen_dioxide", "sulphur_dioxide", "ozone",
	"aerosol_optical_depth", "dust", "uv_index", "uv_index_clear_sky",
	"ammonia", "methane", "alder_pollen", "birch_pollen",
	"grass_pollen", "mugwort_pollen", "olive_pollen", "ragweed_pollen"]
	lat = 40.11
	lng = -88.24
	aqi_dict = {
	(0, 50): ("Good", "green"),
	(51, 100): ("Moderate", "yellow"),
	(101, 150): ("Unhealthy for Sensitive Groups", "orange"),
	(151, 200): ("Unhealthy", "red"),
	(201, 300): ("Very Unhealthy", "purple"),
	(301, 500): ("Hazardous", "maroon"),
	}

	image_url = 'https://mayor.dc.gov/sites/default/files/dc/sites/mayormb/release_content/images/AQ-June9.png'

	# Request air quality data
	def air_api_request():
	cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
	retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
	openmeteo = openmeteo_requests.Client(session = retry_session)

	url = "https://air-quality-api.open-meteo.com/v1/air-quality"
	params = {
	"latitude": lat,
	"longitude": lng,
	"current": "us_aqi",
	"hourly": air_quality_vars,
	"timezone": "America/Chicago",
	"forecast_days": 1,
	"domains": "cams_global"
	}

	responses = openmeteo.weather_api(url, params=params)

	response = responses[0]

	current = response.Current()
	current_us_aqi = current.Variables(0).Value()

	hourly = response.Hourly()
	hourly_pm10 = hourly.Variables(0).ValuesAsNumpy()
	hourly_pm2_5 = hourly.Variables(1).ValuesAsNumpy()
	hourly_carbon_monoxide = hourly.Variables(2).ValuesAsNumpy()
	hourly_carbon_dioxide = hourly.Variables(3).ValuesAsNumpy()
	hourly_nitrogen_dioxide = hourly.Variables(4).ValuesAsNumpy()
	hourly_sulphur_dioxide = hourly.Variables(5).ValuesAsNumpy()
	hourly_ozone = hourly.Variables(6).ValuesAsNumpy()
	hourly_aerosol_optical_depth = hourly.Variables(7).ValuesAsNumpy()
	hourly_dust = hourly.Variables(8).ValuesAsNumpy()
	hourly_uv_index = hourly.Variables(9).ValuesAsNumpy()
	hourly_uv_index_clear_sky = hourly.Variables(10).ValuesAsNumpy()
	hourly_ammonia = hourly.Variables(11).ValuesAsNumpy()
	hourly_methane = hourly.Variables(12).ValuesAsNumpy()
	hourly_alder_pollen = hourly.Variables(13).ValuesAsNumpy()
	hourly_birch_pollen = hourly.Variables(14).ValuesAsNumpy()
	hourly_grass_pollen = hourly.Variables(15).ValuesAsNumpy()
	hourly_mugwort_pollen = hourly.Variables(16).ValuesAsNumpy()
	hourly_olive_pollen = hourly.Variables(17).ValuesAsNumpy()
	hourly_ragweed_pollen = hourly.Variables(18).ValuesAsNumpy()

	hourly_data = {
	"date": pd.date_range(
	start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
	end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True) - pd.Timedelta(seconds=1),
	freq=pd.Timedelta(seconds=hourly.Interval())
	)}

	hourly_data["pm10"] = hourly_pm10
	hourly_data["pm2_5"] = hourly_pm2_5
	hourly_data["carbon_monoxide"] = hourly_carbon_monoxide
	hourly_data["carbon_dioxide"] = hourly_carbon_dioxide
	hourly_data["nitrogen_dioxide"] = hourly_nitrogen_dioxide
	hourly_data["sulphur_dioxide"] = hourly_sulphur_dioxide
	hourly_data["ozone"] = hourly_ozone
	hourly_data["aerosol_optical_depth"] = hourly_aerosol_optical_depth
	hourly_data["dust"] = hourly_dust
	hourly_data["uv_index"] = hourly_uv_index
	hourly_data["uv_index_clear_sky"] = hourly_uv_index_clear_sky
	hourly_data["ammonia"] = hourly_ammonia
	hourly_data["methane"] = hourly_methane
	hourly_data["alder_pollen"] = hourly_alder_pollen
	hourly_data["birch_pollen"] = hourly_birch_pollen
	hourly_data["grass_pollen"] = hourly_grass_pollen
	hourly_data["mugwort_pollen"] = hourly_mugwort_pollen
	hourly_data["olive_pollen"] = hourly_olive_pollen
	hourly_data["ragweed_pollen"] = hourly_ragweed_pollen

	hourly_dataframe = pd.DataFrame(data = hourly_data)
	return hourly_dataframe, current_us_aqi


	# 2. Refresh Button
	st.title('Streamlit App for Final Project Group 1')
	st.markdown('Group Member: Jingyi Huang, Ethan Shin, Mihir Sahasrabudhe, Arjav Malay Parekh, Yu Huang')
	st.markdown('Air Quality Analysis for *Champaign, IL* (latitude, longitude: 40.11,-88.24)')

	# Initial Request
	df_air_quality, us_aqi= air_api_request()
	df_air_quality['datetime'] = pd.to_datetime(df_air_quality['date']).dt.strftime('%d %b %H:%M')

	def refresh_data():
	global df_air_quality
	df_air_quality, us_aqi= air_api_request()
	df_air_quality['datetime'] = pd.to_datetime(df_air_quality['date']).dt.strftime('%d %b %H:%M')

	st.subheader('Updated Air Quality Data')

	if st.button('Refresh Data', type='primary'):
	refresh_data()
	try:
	alert = st.success('Data refreshed successfully! The alert will disappear after 3 seconds.')
	time.sleep(3)
	alert.empty()
	except Exception as e:
	st.error(f'An error occurred: {e}')

	st.dataframe(df_air_quality[['datetime'] + air_quality_vars])


	# 3. Data Visulizations

	## US Air Quality Index Scale
	category = "Unknown"
	color = "black"
	for range_, (cat, col) in aqi_dict.items():
	if range_[0] <= us_aqi <= range_[1]:
	category, color = cat, col
	break

	st.subheader('US Air Quality Index (AQI)')
	st.markdown(
	f"#### Today's Air Quality Index in Champaign, IL is <span style='color:{color}'>{us_aqi:.2f} ({category})</span> ####",
	unsafe_allow_html=True,
	)

	st.image(image_url, caption="Air Quality Index Scale", use_container_width=True)

	st.markdown(
	"""
	District of Columbia Mayor’s Office. (2023, June 9). Air quality continues to improve in DC: Air Quality Index now downgraded to Code Yellow. Retrieved November 30, 2024, from [https://mayor.dc.gov/release/air-quality-continues-improve-dc-air-quality-index-now-downgraded-code-yellow](https://mayor.dc.gov/release/air-quality-continues-improve-dc-air-quality-index-now-downgraded-code-yellow)
	"""
	)

	# -----------------------------------------------------------
	# Additional Trial and Error Visualizations (Unpolished/Cluttered)
	# Place this code block at the bottom of your existing code
	# -----------------------------------------------------------

	import altair as alt

	st.markdown("""
	### Early Trial Visualizations (Cluttered Prototypes)

	Below are some of our initial attempts at visualizing all pollutants together.
	These attempts are intentionally left here to demonstrate the "scaffolding" nature
	of our work. They are cluttered and not very user-friendly, but they show our trial-and-error process.
	""")

	# Melt the dataframe to a long format for plotting multiple pollutants at once
	long_df = df_air_quality.melt(id_vars="datetime", value_vars=air_quality_vars,
	var_name="Pollutant", value_name="Concentration")

	# Attempt 1: A single line chart with ALL pollutants at once
	# This leads to a very cluttered chart where it's hard to distinguish individual lines.
	st.markdown("#### Attempt 1: All Pollutants in One Line Chart")
	all_in_one_line = alt.Chart(long_df).mark_line().encode(
	x=alt.X('datetime:O', title='Date and Time'),
	y=alt.Y('Concentration:Q', title='Concentration'),
	color=alt.Color('Pollutant:N', legend=alt.Legend(title='Pollutant')),
	tooltip=['datetime', 'Pollutant', 'Concentration']
	).properties(
	width=700,
	height=400,
	title="A Very Overcrowded Line Chart"
	)
	st.altair_chart(all_in_one_line, use_container_width=True)

	st.markdown("""
	*As you can see, this single chart becomes difficult to interpret due to the sheer
	number of lines and colors overlapping. While it technically "works," it doesn't provide
	clear insights at a glance.*
	""")

	# Attempt 2: A scatter plot of all pollutants over time
	# Again, this will be cluttered. Each pollutant on the same time axis, different colors.
	# With so many pollutants, the chart becomes a mass of points.
	st.markdown("#### Attempt 2: Scatter Plot of All Pollutants Over Time")
	all_in_one_scatter = alt.Chart(long_df).mark_circle(size=40).encode(
	x=alt.X('datetime:O', title='Date and Time'),
	y=alt.Y('Concentration:Q', title='Concentration'),
	color=alt.Color('Pollutant:N', legend=alt.Legend(title='Pollutant')),
	tooltip=['datetime', 'Pollutant', 'Concentration']
	).properties(
	width=700,
	height=400,
	title="Scatter Plot with All Pollutants"
	)
	st.altair_chart(all_in_one_scatter, use_container_width=True)

	st.markdown("""
	*This scatter plot presents all pollutants simultaneously as well. While we can see
	some variance in concentration over time, the chart is noisy and doesn't direct the user
	to any immediate insights. It's a good reminder that "more data on one chart"
	does not always mean "more understanding."*
	""")

	st.markdown("#### Attempt 3: Bar Chart of All Pollutants at a Single Timestamp")
	first_time = df_air_quality['datetime'].iloc[0]
	single_point_data = long_df[long_df['datetime'] == first_time]

	all_in_one_bar = alt.Chart(single_point_data).mark_bar().encode(
	x=alt.X('Pollutant:N', sort=None, title='Pollutant'),
	y=alt.Y('Concentration:Q', title='Concentration'),
	tooltip=['Pollutant', 'Concentration']
	).properties(
	width=700,
	height=400,
	title=f"Bar Chart at {first_time}"
	)
	st.altair_chart(all_in_one_bar, use_container_width=True)

	st.markdown("""
	*At a single timestamp, a bar chart of all pollutants quickly becomes unwieldy if
	we have too many pollutants. Even though it's simpler than a time-series plot,
	it's still not very informative due to the volume of categories.*

	---

	These attempts illustrate that while we can technically display all the data
	at once, it's not always the most practical or insightful approach.
	This helps us understand which visualizations to refine
	and which ones to discard or simplify in future iterations.
	""")