SparshSG's picture
Upload 18 files
3d6943b verified
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
def plot_aqi_distribution(df: pd.DataFrame):
plt.figure(figsize=(8, 5))
sns.histplot(df["AQI"], bins=40, kde=True)
plt.title("AQI Distribution")
plt.xlabel("AQI")
plt.ylabel("Count")
plt.tight_layout()
plt.show()
def plot_city_aqi(df: pd.DataFrame):
city_aqi = df.groupby("City")["AQI"].mean().sort_values(ascending=False).reset_index()
plt.figure(figsize=(8, 6))
ax = sns.barplot(data=city_aqi, y="City", x="AQI")
for i, row in city_aqi.iterrows():
ax.text(row["AQI"] + 2, i, f"{row['AQI']:.1f}", va='center', fontsize=7)
plt.title("Average AQI by City")
plt.xlabel("AQI")
plt.ylabel("City")
plt.tight_layout()
plt.show()
def plot_season_aqi(df: pd.DataFrame):
season_aqi = df.groupby("Season")["AQI"].mean().reset_index()
plt.figure(figsize=(6, 4))
ax = sns.barplot(data=season_aqi, x="Season", y="AQI")
for i, row in season_aqi.iterrows():
ax.text(i, row["AQI"] + 3, f"{row['AQI']:.1f}", ha='center', fontsize=7)
plt.title("Average AQI by Season")
plt.xlabel("Season")
plt.ylabel("AQI")
plt.tight_layout()
plt.show()
def plot_yearly_trend(df: pd.DataFrame):
yearly_aqi = df.groupby("Year")["AQI"].mean().reset_index()
plt.figure(figsize=(8, 5))
sns.lineplot(data=yearly_aqi, x="Year", y="AQI", marker="o")
plt.title("Yearly AQI Trend")
plt.xlabel("Year")
plt.ylabel("AQI")
plt.tight_layout()
plt.show()
def plot_monthly_trend(df: pd.DataFrame):
df["year_month"] = df["Date"].dt.to_period("M").dt.to_timestamp()
monthly_aqi = df.groupby("year_month")["AQI"].mean().reset_index()
plt.figure(figsize=(12, 5))
ax = sns.lineplot(data=monthly_aqi, x="year_month", y="AQI")
years = monthly_aqi["year_month"].dt.year.unique()
tick_positions = monthly_aqi.groupby(monthly_aqi["year_month"].dt.year).first()["year_month"]
ax.set_xticks(tick_positions)
ax.set_xticklabels(years)
plt.title("Monthly AQI Trend")
plt.xlabel("Year")
plt.ylabel("AQI")
plt.tight_layout()
plt.show()
def plot_correlation_heatmap(df: pd.DataFrame):
corr = df.select_dtypes(include="number").corr()
plt.figure(figsize=(10, 8))
sns.heatmap(
corr,
annot=True,
fmt=".2f",
cmap="coolwarm",
linewidths=0.5
)
plt.title("Correlation Heatmap")
plt.tight_layout()
plt.show()
def plot_corr_with_aqi(df: pd.DataFrame):
corr = df.select_dtypes(include="number").corr()
corr_aqi = corr["AQI"].sort_values(ascending=False)
plt.figure(figsize=(6, 5))
sns.barplot(x=corr_aqi.values, y=corr_aqi.index)
plt.title("Correlation with AQI")
plt.xlabel("Correlation")
plt.tight_layout()
plt.show()
def plot_delhi_trend(df: pd.DataFrame):
df_delhi = df[df["City"] == "Delhi"].copy()
df_delhi["year_month"] = df_delhi["Date"].dt.to_period("M").dt.to_timestamp()
delhi_monthly = df_delhi.groupby("year_month")["AQI"].mean().reset_index()
plt.figure(figsize=(12, 5))
ax = sns.lineplot(data=delhi_monthly, x="year_month", y="AQI", color="red")
years = delhi_monthly["year_month"].dt.year.unique()
tick_positions = delhi_monthly.groupby(delhi_monthly["year_month"].dt.year).first()["year_month"]
ax.set_xticks(tick_positions)
ax.set_xticklabels(years)
plt.title("Monthly AQI Trend - Delhi")
plt.xlabel("Year")
plt.ylabel("AQI")
plt.tight_layout()
plt.show()
def plot_pandemic_effect(df: pd.DataFrame):
d_2020 = df[(df['City']=='Delhi') & (df['Date'].dt.year==2020)] \
.groupby(pd.Grouper(key='Date', freq='ME'))['AQI'].mean().dropna()
d_2019 = df[(df['City']=='Delhi') & (df['Date'].dt.year==2019)] \
.groupby(pd.Grouper(key='Date', freq='ME'))['AQI'].mean().dropna()
plt.figure(figsize=(12, 5))
plt.plot(range(len(d_2019)), d_2019.values, label='2019 (Pre-Pandemic)', marker='o')
plt.plot(range(len(d_2020)), d_2020.values, label='2020 (Pandemic)', marker='o', linestyle='--')
plt.xticks(range(12), ["Jan","Feb","Mar","Apr","May","Jun",
"Jul","Aug","Sep","Oct","Nov","Dec"])
plt.title('Delhi AQI: 2019 vs 2020 Pandemic')
plt.legend()
plt.tight_layout()
plt.show()