import streamlit as st import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error import numpy as np # Load sample data (replace with real pollution dataset) def load_sample_data(): data = { "Date": pd.date_range(start="2023-01-01", periods=100, freq="D"), "Location": np.random.choice(["Karachi", "Lahore", "Islamabad", "Peshawar", "Quetta"], size=100), "AQI": np.random.randint(50, 200, size=100), # Random AQI values "Temperature": np.random.uniform(20, 35, size=100), "Humidity": np.random.uniform(30, 80, size=100), } return pd.DataFrame(data) # Train a simple model to predict AQI def train_model(data): X = data[["Temperature", "Humidity"]] y = data["AQI"] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) model = RandomForestRegressor(n_estimators=100, random_state=42) model.fit(X_train, y_train) y_pred = model.predict(X_test) mse = mean_squared_error(y_test, y_pred) return model, mse # Predict AQI for a given input def predict_aqi(model, temperature, humidity): prediction = model.predict([[temperature, humidity]]) return round(prediction[0], 2) # Visualization of historical trends def plot_trends(data, location): filtered_data = data[data["Location"] == location] plt.figure(figsize=(10, 6)) sns.lineplot(data=filtered_data, x="Date", y="AQI", label="AQI") sns.lineplot(data=filtered_data, x="Date", y="Temperature", label="Temperature") sns.lineplot(data=filtered_data, x="Date", y="Humidity", label="Humidity") plt.title(f"Historical Data Trends for {location}") plt.xlabel("Date") plt.ylabel("Values") plt.legend() plt.grid() plt.tight_layout() # Save the plot to a file plt.savefig("trends.png") return "trends.png" # Load data and train model data = load_sample_data() model, mse = train_model(data) # Streamlit app st.title("🌍 Pollution Data Analysis Tool") st.markdown( "This app predicts air pollution levels (AQI) based on temperature and humidity. " "It also provides a visualization of historical trends." ) # Sidebar inputs st.sidebar.header("Input Parameters") location = st.sidebar.selectbox("Select Location", data["Location"].unique()) temperature = st.sidebar.slider("Temperature (°C)", 20, 40, 25) humidity = st.sidebar.slider("Humidity (%)", 30, 90, 50) # Prediction st.subheader("Predicted AQI") prediction = predict_aqi(model, temperature, humidity) st.write(f"The predicted AQI for {location} is: {prediction}") # Historical trends visualization st.subheader("Historical Data Trends") trends_image = plot_trends(data, location) st.image(trends_image) # Model performance st.sidebar.subheader("Model Performance") st.sidebar.write(f"Mean Squared Error: {mse:.2f}")