| import streamlit as st |
| import pandas as pd |
| import numpy as np |
| import matplotlib.pyplot as plt |
| import seaborn as sns |
| import plotly.express as px |
|
|
| |
| @st.cache_data |
| def fetch_data(): |
| df = pd.read_csv("laptop_updated.csv") |
| return df |
|
|
| |
| def run(): |
| |
| st.title('Exploratory Data Analysis') |
|
|
| |
| df = fetch_data() |
|
|
| |
| st.write(df) |
|
|
| |
| company_list = df['Company'].value_counts().index[:20].tolist() |
|
|
| |
| plt.figure(figsize=(9,5)) |
|
|
| |
| ax = sns.barplot( |
| x='Company', |
| y='price', |
| data=df[df['Company'].isin(company_list)], |
| order=company_list, |
| palette='Spectral', |
| errorbar=('ci', False), |
| edgecolor="black" |
| ) |
|
|
| plt.xticks(rotation=80) |
| plt.title('Average Price of Laptops by Company') |
| plt.xlabel('') |
| plt.ylabel('Price') |
| st.pyplot(plt) |
|
|
| |
| fig, ax = plt.subplots(figsize=(10,5)) |
|
|
| |
| ax = sns.countplot(x='Company', data=df, palette='mako_r', order=df['Company'].value_counts().index) |
| ax.set_xticklabels(ax.get_xticklabels(), rotation=80) |
|
|
| plt.title('Number of Laptops by Company') |
| plt.ylabel('Counts') |
| plt.xlabel('') |
| st.pyplot(plt) |
|
|
| |
| cpu_list = df['cpu_name'].value_counts()[:5].index.tolist() |
|
|
| |
| cpu_avg_price = df[df['cpu_name'].isin(cpu_list)].groupby('cpu_name')['price'].mean().sort_values(ascending=False) |
| cpu_avg_price = cpu_avg_price.reset_index() |
|
|
| plt.figure(figsize=(8,6)) |
|
|
| |
| ax = sns.barplot(x='cpu_name', y='price', data=cpu_avg_price, palette='viridis') |
| plt.xticks(rotation=80) |
|
|
| |
| for container in ax.containers: |
| ax.bar_label(container) |
|
|
| plt.title('Average Price of Laptop based on Common CPUs') |
| plt.xlabel('') |
| plt.ylabel('') |
| st.pyplot(plt) |
|
|
| |
| custom_palette = {'Intel': 'blue', 'Nvidia': 'green', 'AMD': 'red', 'ARM': 'yellow'} |
|
|
| |
| colors = [custom_palette.get(brand, 'grey') for brand in df['gpu_brand'].unique()] |
|
|
| |
| plt.figure(figsize=(10, 6)) |
| sns.barplot(x=df['gpu_brand'], y=df['price'], estimator=np.mean, palette=colors) |
| plt.xlabel('') |
| plt.ylabel('') |
| plt.title('Average Laptop Price by GPU Brand') |
| plt.xticks(rotation='vertical') |
| st.pyplot(plt) |
|
|
| |
| ram = df.groupby("Ram").size() * 100 / len(df) |
| ram.name = "percentage" |
| ram = ram.reset_index() |
|
|
| |
| fig = px.pie(ram, names="Ram", values="percentage", title="Amount of RAMs is the Most Common in Laptops") |
| fig.update_layout({"legend_title": "RAM (GB)"}) |
| st.plotly_chart(fig) |
|
|
| |
| fig, ax = plt.subplots(figsize=(6,5)) |
|
|
| |
| ax = sns.countplot( |
| x='TypeName', |
| data=df, |
| palette='tab20c', |
| order=df['TypeName'].value_counts().index |
| ) |
| ax.set_xticklabels(ax.get_xticklabels(), rotation=80) |
| plt.title('Most Common Laptops Type') |
| plt.xlabel('') |
| plt.ylabel('Counts') |
| st.pyplot(plt) |
|
|
| |
| sns.barplot( |
| x=df['TypeName'], |
| y=df['price'], |
| palette='plasma' |
| ) |
|
|
| |
| plt.xticks(rotation='vertical') |
| plt.ylabel('Price') |
| plt.title('Average Laptop Price by Types') |
| plt.xlabel('') |
| st.pyplot(plt) |
|
|
| |
| inches_list = df['Inches'].value_counts().index[:6].tolist() |
|
|
| |
| df_clean = df[df['Inches'].isin(inches_list)] |
| |
| |
| fig, ax = plt.subplots(figsize=(6,5)) |
|
|
| |
| ax = sns.countplot(x='Inches', data=df_clean, palette='cool') |
| ax.set_xticklabels(ax.get_xticklabels(), rotation=80) |
| plt.title('Most Common Laptop Screen Size') |
| plt.ylabel('') |
| st.pyplot(plt) |
|
|
| |
| operating_systems = df.groupby("OpSys").size().sort_values(ascending=False) |
| operating_systems.name = "Jumlah laptop" |
| operating_systems = operating_systems.reset_index() |
|
|
| |
| fig = px.bar(operating_systems, x="Jumlah laptop", y="OpSys", color="OpSys", title="Most Used Operating Systems in Laptops") |
| fig.update_yaxes(title_text='') |
| fig.update_xaxes(title_text='') |
| fig.update_layout(legend_title_text='Operating Systems') |
| st.plotly_chart(fig) |
|
|
| |
| operating_systems = df.groupby("OpSys")['price'].mean().sort_values(ascending=False) |
| operating_systems.name = "Rata-rata harga" |
| operating_systems = operating_systems.reset_index() |
|
|
| |
| fig = px.bar(operating_systems, x="Rata-rata harga", y="OpSys", color="OpSys", title="Average Price of Laptops based on Operating Systems") |
| fig.update_yaxes(title_text='') |
| fig.update_xaxes(title_text='') |
| fig.update_layout(legend_title_text='Operating Systems') |
| st.plotly_chart(fig) |
|
|
|
|
| |
| if __name__ == '__main__': |
| run() |
|
|