Spaces:
No application file
No application file
| # Importing libraries----------------------------------------------------------------------------------------- | |
| import streamlit as st | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import pandas as pd | |
| import numpy as np | |
| from scipy import stats | |
| from sklearn.ensemble import RandomForestRegressor | |
| # Creating Sidebar------------------------------------------------------------------------------------------- | |
| with st.sidebar: | |
| st.markdown("# CO2 Emissions by Vehicle") | |
| user_input = st.selectbox('Please select',('Visulization','Model')) | |
| # Load the vehicle dataset | |
| df = pd.read_csv('co2 Emissions.csv') | |
| # Drop rows with natural gas as fuel type | |
| fuel_type_mapping = {"Z": "Premium Gasoline","X": "Regular Gasoline","D": "Diesel","E": "Ethanol(E85)","N": "Natural Gas"} | |
| df["Fuel Type"] = df["Fuel Type"].map(fuel_type_mapping) | |
| df_natural = df[~df["Fuel Type"].str.contains("Natural Gas")].reset_index(drop=True) | |
| # Remove outliers from the data | |
| df_new = df_natural[['Engine Size(L)', 'Cylinders', 'Fuel Consumption Comb (L/100 km)', 'CO2 Emissions(g/km)']] | |
| df_new_model = df_new[(np.abs(stats.zscore(df_new)) < 1.9).all(axis=1)] | |
| # Visulization------------------------------------------------------------------------------------------------- | |
| if user_input == 'Visulization': | |
| # Remove unwanted warnings--------------------------------------------------------------------------------- | |
| st.set_option('deprecation.showPyplotGlobalUse', False) | |
| # Showing Dataset------------------------------------------------------------------------------------------ | |
| st.title('CO2 Emissions by Vehicle') | |
| st.header("Data We collected from the source") | |
| st.write(df) | |
| # Brands of Cars------------------------------------------------------------------------------------------- | |
| st.subheader('Brands of Cars') | |
| df_brand = df['Make'].value_counts().reset_index().rename(columns={'count':'Count'}) | |
| plt.figure(figsize=(15, 6)) | |
| fig1 = sns.barplot(data=df_brand, x="Make", y="Count") | |
| plt.xticks(rotation=75) | |
| plt.title("All Car Companies and their Cars") | |
| plt.xlabel("Companies") | |
| plt.ylabel("Cars") | |
| plt.bar_label(fig1.containers[0], fontsize=7) | |
| st.pyplot() | |
| st.write(df_brand) | |
| # Top 25 Models of Cars------------------------------------------------------------------------------------ | |
| st.subheader('Top 25 Models of Cars') | |
| df_model = df['Model'].value_counts().reset_index().rename(columns={'count':'Count'}) | |
| plt.figure(figsize=(20, 6)) | |
| fig2 = sns.barplot(data=df_model[:25], x="Model", y="Count") | |
| plt.xticks(rotation=75) | |
| plt.title("Top 25 Car Models") | |
| plt.xlabel("Models") | |
| plt.ylabel("Cars") | |
| plt.bar_label(fig2.containers[0]) | |
| st.pyplot() | |
| st.write(df_model) | |
| # Vehicle Class-------------------------------------------------------------------------------------------- | |
| st.subheader('Vehicle Class') | |
| df_vehicle_class = df['Vehicle Class'].value_counts().reset_index().rename(columns={'count':'Count'}) | |
| plt.figure(figsize=(20, 5)) | |
| fig3 = sns.barplot(data=df_vehicle_class, x="Vehicle Class", y="Count") | |
| plt.xticks(rotation=75) | |
| plt.title("All Vehicle Class") | |
| plt.xlabel("Vehicle Class") | |
| plt.ylabel("Cars") | |
| plt.bar_label(fig3.containers[0]) | |
| st.pyplot() | |
| st.write(df_vehicle_class) | |
| # Engine Sizes of Cars------------------------------------------------------------------------------------- | |
| st.subheader('Engine Sizes of Cars') | |
| df_engine_size = df['Engine Size(L)'].value_counts().reset_index().rename(columns={'count':'Count'}) | |
| plt.figure(figsize=(20, 6)) | |
| fig4 = sns.barplot(data=df_engine_size, x="Engine Size(L)", y="Count") | |
| plt.xticks(rotation=90) | |
| plt.title("All Engine Sizes") | |
| plt.xlabel("Engine Size(L)") | |
| plt.ylabel("Cars") | |
| plt.bar_label(fig4.containers[0]) | |
| st.pyplot() | |
| st.write(df_engine_size) | |
| # Cylinders----------------------------------------------------------------------------------------------- | |
| st.subheader('Cylinders') | |
| df_cylinders = df['Cylinders'].value_counts().reset_index().rename(columns={'count':'Count'}) | |
| plt.figure(figsize=(20, 6)) | |
| fig5 = sns.barplot(data=df_cylinders, x="Cylinders", y="Count") | |
| plt.xticks(rotation=90) | |
| plt.title("All Cylinders") | |
| plt.xlabel("Cylinders") | |
| plt.ylabel("Cars") | |
| plt.bar_label(fig5.containers[0]) | |
| st.pyplot() | |
| st.write(df_cylinders) | |
| # Transmission of Cars------------------------------------------------------------------------------------ | |
| transmission_mapping = { "A4": "Automatic", "A5": "Automatic", "A6": "Automatic", "A7": "Automatic", "A8": "Automatic", "A9": "Automatic", "A10": "Automatic", "AM5": "Automated Manual", "AM6": "Automated Manual", "AM7": "Automated Manual", "AM8": "Automated Manual", "AM9": "Automated Manual", "AS4": "Automatic with Select Shift", "AS5": "Automatic with Select Shift", "AS6": "Automatic with Select Shift", "AS7": "Automatic with Select Shift", "AS8": "Automatic with Select Shift", "AS9": "Automatic with Select Shift", "AS10": "Automatic with Select Shift", "AV": "Continuously Variable", "AV6": "Continuously Variable", "AV7": "Continuously Variable", "AV8": "Continuously Variable", "AV10": "Continuously Variable", "M5": "Manual", "M6": "Manual", "M7": "Manual"} | |
| df["Transmission"] = df["Transmission"].map(transmission_mapping) | |
| st.subheader('Transmission') | |
| df_transmission = df['Transmission'].value_counts().reset_index().rename(columns={'count': 'Count'}) | |
| fig6 = plt.figure(figsize=(20, 5)) | |
| sns.barplot(data=df_transmission, x="Transmission", y="Count") | |
| plt.title("All Transmissions") | |
| plt.xlabel("Transmissions") | |
| plt.ylabel("Cars") | |
| plt.bar_label(plt.gca().containers[0]) | |
| st.pyplot(fig6) | |
| st.write(df_transmission) | |
| # Fuel Type of Cars-------------------------------------------------------------------------------------- | |
| st.subheader('Fuel Type') | |
| df_fuel_type = df['Fuel Type'].value_counts().reset_index().rename(columns={'count': 'Count'}) | |
| fig7 = plt.figure(figsize=(20, 5)) | |
| sns.barplot(data=df_fuel_type, x="Fuel Type", y="Count") | |
| plt.title("All Fuel Types") | |
| plt.xlabel("Fuel Types") | |
| plt.ylabel("Cars") | |
| plt.bar_label(plt.gca().containers[0]) | |
| st.pyplot(fig7) | |
| st.text("We have only one data on natural gas. So we cannot predict anything using only one data. That's why we have to drop this row.") | |
| st.write(df_fuel_type) | |
| # Removing Natural Gas----------------------------------------------------------------------------------- | |
| st.subheader('After removing Natural Gas data') | |
| df_ftype = df_natural['Fuel Type'].value_counts().reset_index().rename(columns={'count': 'Count'}) | |
| fig8 = plt.figure(figsize=(20, 5)) | |
| sns.barplot(data=df_ftype, x="Fuel Type", y="Count") | |
| plt.title("All Fuel Types") | |
| plt.xlabel("Fuel Types") | |
| plt.ylabel("Cars") | |
| plt.bar_label(plt.gca().containers[0]) | |
| st.pyplot(fig8) | |
| st.write(df_ftype) | |
| # CO2 Emission variation with Brand---------------------------------------------------------------------- | |
| st.header('Variation in CO2 emissions with different features') | |
| st.subheader('CO2 Emission with Brand ') | |
| df_co2_make = df.groupby(['Make'])['CO2 Emissions(g/km)'].mean().sort_values().reset_index() | |
| fig8 = plt.figure(figsize=(20, 5)) | |
| sns.barplot(data=df_co2_make, x="Make", y="CO2 Emissions(g/km)") | |
| plt.xticks(rotation=90) | |
| plt.title("CO2 Emissions variation with Brand") | |
| plt.xlabel("Brands") | |
| plt.ylabel("CO2 Emissions(g/km)") | |
| plt.bar_label(plt.gca().containers[0], fontsize=8, fmt='%.1f') | |
| st.pyplot(fig8) | |
| def plot_bar(data, x_label, y_label, title): | |
| plt.figure(figsize=(23, 5)) | |
| sns.barplot(data=data, x=x_label, y=y_label) | |
| plt.xticks(rotation=90) | |
| plt.title(title) | |
| plt.xlabel(x_label) | |
| plt.ylabel(y_label) | |
| plt.bar_label(plt.gca().containers[0], fontsize=9) | |
| # CO2 Emissions variation with Vehicle Class------------------------------------------------------------- | |
| st.subheader('CO2 Emissions variation with Vehicle Class') | |
| df_co2_vehicle_class = df.groupby(['Vehicle Class'])['CO2 Emissions(g/km)'].mean().sort_values().reset_index() | |
| plot_bar(df_co2_vehicle_class, "Vehicle Class", "CO2 Emissions(g/km)", "CO2 Emissions variation with Vehicle Class") | |
| st.pyplot() | |
| # CO2 Emission variation with Transmission--------------------------------------------------------------- | |
| st.subheader('CO2 Emission variation with Transmission') | |
| df_co2_transmission = df.groupby(['Transmission'])['CO2 Emissions(g/km)'].mean().sort_values().reset_index() | |
| plot_bar(df_co2_transmission, "Transmission", "CO2 Emissions(g/km)", "CO2 Emission variation with Transmission") | |
| st.pyplot() | |
| # CO2 Emissions variation with Fuel Type-------------------------------------------------------------- | |
| st.subheader('CO2 Emissions variation with Fuel Type') | |
| df_co2_fuel_type = df.groupby(['Fuel Type'])['CO2 Emissions(g/km)'].mean().sort_values().reset_index() | |
| plot_bar(df_co2_fuel_type, "Fuel Type", "CO2 Emissions(g/km)", "CO2 Emissions variation with Fuel Type") | |
| st.pyplot() | |
| # Box Plots------------------------------------------------------------------------------------------- | |
| st.header("Box Plots") | |
| plt.figure(figsize=(20, 10)) | |
| features = ['Engine Size(L)', 'Cylinders', 'Fuel Consumption Comb (L/100 km)', 'CO2 Emissions(g/km)'] | |
| for i, feature in enumerate(features, start=1): | |
| plt.subplot(2, 2, i) | |
| plt.boxplot(df_new[feature]) | |
| plt.title(feature) | |
| st.pyplot() | |
| # Outliers------------------------------------------------------------------------------------------- | |
| st.text("As we can see there are some outliers present in our Dataset") | |
| st.subheader("After removing outliers") | |
| st.write("Before removing outliers we have", len(df), "data") | |
| st.write("After removing outliers we have", len(df_new_model), "data") | |
| # Boxplot after removing outliers------------------------------------------------------------------- | |
| st.subheader("Boxplot after removing outliers") | |
| plt.figure(figsize=(20, 10)) | |
| for i, feature in enumerate(features, start=1): | |
| plt.subplot(2, 2, i) | |
| plt.boxplot(df_new_model[feature]) | |
| plt.title(feature) | |
| st.pyplot() | |
| else: | |
| # Prepare the data for modeling-------------------------------------------------------------------- | |
| X = df_new_model[['Engine Size(L)', 'Cylinders', 'Fuel Consumption Comb (L/100 km)']] | |
| y = df_new_model['CO2 Emissions(g/km)'] | |
| # Train the random forest regression model--------------------------------------------------------- | |
| model = RandomForestRegressor().fit(X, y) | |
| # Create the Streamlit web app--------------------------------------------------------------------- | |
| st.title('CO2 Emission Prediction') | |
| st.write('Enter the vehicle specifications to predict CO2 emissions.') | |
| # Input fields for user---------------------------------------------------------------------------- | |
| engine_size = st.number_input('Engine Size(L)', step=0.1, format="%.1f") | |
| cylinders = st.number_input('Cylinders', min_value=2, max_value=16, step=1) | |
| fuel_consumption = st.number_input('Fuel Consumption Comb (L/100 km)', step=0.1, format="%.1f") | |
| # Predict CO2 emissions---------------------------------------------------------------------------- | |
| input_data = [[cylinders, engine_size, fuel_consumption]] | |
| predicted_co2 = model.predict(input_data) | |
| # Display the prediction--------------------------------------------------------------------------- | |
| st.write(f'Predicted CO2 Emissions: {predicted_co2[0]:.2f} g/km') | |