Spaces:
No application file
No application file
File size: 11,730 Bytes
80710a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 |
# Importing libraries-----------------------------------------------------------------------------------------
import streamlit as st
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from scipy import stats
from sklearn.ensemble import RandomForestRegressor
# Creating Sidebar-------------------------------------------------------------------------------------------
with st.sidebar:
st.markdown("# CO2 Emissions by Vehicle")
user_input = st.selectbox('Please select',('Visulization','Model'))
# Load the vehicle dataset
df = pd.read_csv('co2 Emissions.csv')
# Drop rows with natural gas as fuel type
fuel_type_mapping = {"Z": "Premium Gasoline","X": "Regular Gasoline","D": "Diesel","E": "Ethanol(E85)","N": "Natural Gas"}
df["Fuel Type"] = df["Fuel Type"].map(fuel_type_mapping)
df_natural = df[~df["Fuel Type"].str.contains("Natural Gas")].reset_index(drop=True)
# Remove outliers from the data
df_new = df_natural[['Engine Size(L)', 'Cylinders', 'Fuel Consumption Comb (L/100 km)', 'CO2 Emissions(g/km)']]
df_new_model = df_new[(np.abs(stats.zscore(df_new)) < 1.9).all(axis=1)]
# Visulization-------------------------------------------------------------------------------------------------
if user_input == 'Visulization':
# Remove unwanted warnings---------------------------------------------------------------------------------
st.set_option('deprecation.showPyplotGlobalUse', False)
# Showing Dataset------------------------------------------------------------------------------------------
st.title('CO2 Emissions by Vehicle')
st.header("Data We collected from the source")
st.write(df)
# Brands of Cars-------------------------------------------------------------------------------------------
st.subheader('Brands of Cars')
df_brand = df['Make'].value_counts().reset_index().rename(columns={'count':'Count'})
plt.figure(figsize=(15, 6))
fig1 = sns.barplot(data=df_brand, x="Make", y="Count")
plt.xticks(rotation=75)
plt.title("All Car Companies and their Cars")
plt.xlabel("Companies")
plt.ylabel("Cars")
plt.bar_label(fig1.containers[0], fontsize=7)
st.pyplot()
st.write(df_brand)
# Top 25 Models of Cars------------------------------------------------------------------------------------
st.subheader('Top 25 Models of Cars')
df_model = df['Model'].value_counts().reset_index().rename(columns={'count':'Count'})
plt.figure(figsize=(20, 6))
fig2 = sns.barplot(data=df_model[:25], x="Model", y="Count")
plt.xticks(rotation=75)
plt.title("Top 25 Car Models")
plt.xlabel("Models")
plt.ylabel("Cars")
plt.bar_label(fig2.containers[0])
st.pyplot()
st.write(df_model)
# Vehicle Class--------------------------------------------------------------------------------------------
st.subheader('Vehicle Class')
df_vehicle_class = df['Vehicle Class'].value_counts().reset_index().rename(columns={'count':'Count'})
plt.figure(figsize=(20, 5))
fig3 = sns.barplot(data=df_vehicle_class, x="Vehicle Class", y="Count")
plt.xticks(rotation=75)
plt.title("All Vehicle Class")
plt.xlabel("Vehicle Class")
plt.ylabel("Cars")
plt.bar_label(fig3.containers[0])
st.pyplot()
st.write(df_vehicle_class)
# Engine Sizes of Cars-------------------------------------------------------------------------------------
st.subheader('Engine Sizes of Cars')
df_engine_size = df['Engine Size(L)'].value_counts().reset_index().rename(columns={'count':'Count'})
plt.figure(figsize=(20, 6))
fig4 = sns.barplot(data=df_engine_size, x="Engine Size(L)", y="Count")
plt.xticks(rotation=90)
plt.title("All Engine Sizes")
plt.xlabel("Engine Size(L)")
plt.ylabel("Cars")
plt.bar_label(fig4.containers[0])
st.pyplot()
st.write(df_engine_size)
# Cylinders-----------------------------------------------------------------------------------------------
st.subheader('Cylinders')
df_cylinders = df['Cylinders'].value_counts().reset_index().rename(columns={'count':'Count'})
plt.figure(figsize=(20, 6))
fig5 = sns.barplot(data=df_cylinders, x="Cylinders", y="Count")
plt.xticks(rotation=90)
plt.title("All Cylinders")
plt.xlabel("Cylinders")
plt.ylabel("Cars")
plt.bar_label(fig5.containers[0])
st.pyplot()
st.write(df_cylinders)
# Transmission of Cars------------------------------------------------------------------------------------
transmission_mapping = { "A4": "Automatic", "A5": "Automatic", "A6": "Automatic", "A7": "Automatic", "A8": "Automatic", "A9": "Automatic", "A10": "Automatic", "AM5": "Automated Manual", "AM6": "Automated Manual", "AM7": "Automated Manual", "AM8": "Automated Manual", "AM9": "Automated Manual", "AS4": "Automatic with Select Shift", "AS5": "Automatic with Select Shift", "AS6": "Automatic with Select Shift", "AS7": "Automatic with Select Shift", "AS8": "Automatic with Select Shift", "AS9": "Automatic with Select Shift", "AS10": "Automatic with Select Shift", "AV": "Continuously Variable", "AV6": "Continuously Variable", "AV7": "Continuously Variable", "AV8": "Continuously Variable", "AV10": "Continuously Variable", "M5": "Manual", "M6": "Manual", "M7": "Manual"}
df["Transmission"] = df["Transmission"].map(transmission_mapping)
st.subheader('Transmission')
df_transmission = df['Transmission'].value_counts().reset_index().rename(columns={'count': 'Count'})
fig6 = plt.figure(figsize=(20, 5))
sns.barplot(data=df_transmission, x="Transmission", y="Count")
plt.title("All Transmissions")
plt.xlabel("Transmissions")
plt.ylabel("Cars")
plt.bar_label(plt.gca().containers[0])
st.pyplot(fig6)
st.write(df_transmission)
# Fuel Type of Cars--------------------------------------------------------------------------------------
st.subheader('Fuel Type')
df_fuel_type = df['Fuel Type'].value_counts().reset_index().rename(columns={'count': 'Count'})
fig7 = plt.figure(figsize=(20, 5))
sns.barplot(data=df_fuel_type, x="Fuel Type", y="Count")
plt.title("All Fuel Types")
plt.xlabel("Fuel Types")
plt.ylabel("Cars")
plt.bar_label(plt.gca().containers[0])
st.pyplot(fig7)
st.text("We have only one data on natural gas. So we cannot predict anything using only one data. That's why we have to drop this row.")
st.write(df_fuel_type)
# Removing Natural Gas-----------------------------------------------------------------------------------
st.subheader('After removing Natural Gas data')
df_ftype = df_natural['Fuel Type'].value_counts().reset_index().rename(columns={'count': 'Count'})
fig8 = plt.figure(figsize=(20, 5))
sns.barplot(data=df_ftype, x="Fuel Type", y="Count")
plt.title("All Fuel Types")
plt.xlabel("Fuel Types")
plt.ylabel("Cars")
plt.bar_label(plt.gca().containers[0])
st.pyplot(fig8)
st.write(df_ftype)
# CO2 Emission variation with Brand----------------------------------------------------------------------
st.header('Variation in CO2 emissions with different features')
st.subheader('CO2 Emission with Brand ')
df_co2_make = df.groupby(['Make'])['CO2 Emissions(g/km)'].mean().sort_values().reset_index()
fig8 = plt.figure(figsize=(20, 5))
sns.barplot(data=df_co2_make, x="Make", y="CO2 Emissions(g/km)")
plt.xticks(rotation=90)
plt.title("CO2 Emissions variation with Brand")
plt.xlabel("Brands")
plt.ylabel("CO2 Emissions(g/km)")
plt.bar_label(plt.gca().containers[0], fontsize=8, fmt='%.1f')
st.pyplot(fig8)
def plot_bar(data, x_label, y_label, title):
plt.figure(figsize=(23, 5))
sns.barplot(data=data, x=x_label, y=y_label)
plt.xticks(rotation=90)
plt.title(title)
plt.xlabel(x_label)
plt.ylabel(y_label)
plt.bar_label(plt.gca().containers[0], fontsize=9)
# CO2 Emissions variation with Vehicle Class-------------------------------------------------------------
st.subheader('CO2 Emissions variation with Vehicle Class')
df_co2_vehicle_class = df.groupby(['Vehicle Class'])['CO2 Emissions(g/km)'].mean().sort_values().reset_index()
plot_bar(df_co2_vehicle_class, "Vehicle Class", "CO2 Emissions(g/km)", "CO2 Emissions variation with Vehicle Class")
st.pyplot()
# CO2 Emission variation with Transmission---------------------------------------------------------------
st.subheader('CO2 Emission variation with Transmission')
df_co2_transmission = df.groupby(['Transmission'])['CO2 Emissions(g/km)'].mean().sort_values().reset_index()
plot_bar(df_co2_transmission, "Transmission", "CO2 Emissions(g/km)", "CO2 Emission variation with Transmission")
st.pyplot()
# CO2 Emissions variation with Fuel Type--------------------------------------------------------------
st.subheader('CO2 Emissions variation with Fuel Type')
df_co2_fuel_type = df.groupby(['Fuel Type'])['CO2 Emissions(g/km)'].mean().sort_values().reset_index()
plot_bar(df_co2_fuel_type, "Fuel Type", "CO2 Emissions(g/km)", "CO2 Emissions variation with Fuel Type")
st.pyplot()
# Box Plots-------------------------------------------------------------------------------------------
st.header("Box Plots")
plt.figure(figsize=(20, 10))
features = ['Engine Size(L)', 'Cylinders', 'Fuel Consumption Comb (L/100 km)', 'CO2 Emissions(g/km)']
for i, feature in enumerate(features, start=1):
plt.subplot(2, 2, i)
plt.boxplot(df_new[feature])
plt.title(feature)
st.pyplot()
# Outliers-------------------------------------------------------------------------------------------
st.text("As we can see there are some outliers present in our Dataset")
st.subheader("After removing outliers")
st.write("Before removing outliers we have", len(df), "data")
st.write("After removing outliers we have", len(df_new_model), "data")
# Boxplot after removing outliers-------------------------------------------------------------------
st.subheader("Boxplot after removing outliers")
plt.figure(figsize=(20, 10))
for i, feature in enumerate(features, start=1):
plt.subplot(2, 2, i)
plt.boxplot(df_new_model[feature])
plt.title(feature)
st.pyplot()
else:
# Prepare the data for modeling--------------------------------------------------------------------
X = df_new_model[['Engine Size(L)', 'Cylinders', 'Fuel Consumption Comb (L/100 km)']]
y = df_new_model['CO2 Emissions(g/km)']
# Train the random forest regression model---------------------------------------------------------
model = RandomForestRegressor().fit(X, y)
# Create the Streamlit web app---------------------------------------------------------------------
st.title('CO2 Emission Prediction')
st.write('Enter the vehicle specifications to predict CO2 emissions.')
# Input fields for user----------------------------------------------------------------------------
engine_size = st.number_input('Engine Size(L)', step=0.1, format="%.1f")
cylinders = st.number_input('Cylinders', min_value=2, max_value=16, step=1)
fuel_consumption = st.number_input('Fuel Consumption Comb (L/100 km)', step=0.1, format="%.1f")
# Predict CO2 emissions----------------------------------------------------------------------------
input_data = [[cylinders, engine_size, fuel_consumption]]
predicted_co2 = model.predict(input_data)
# Display the prediction---------------------------------------------------------------------------
st.write(f'Predicted CO2 Emissions: {predicted_co2[0]:.2f} g/km')
|