Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| from scipy.cluster.hierarchy import linkage, dendrogram, fcluster | |
| from sklearn.preprocessing import StandardScaler | |
| # Load dataset | |
| def load_data(): | |
| data = pd.read_csv("kpopidolsv3.csv") | |
| return data | |
| data = load_data() | |
| # Preprocess data | |
| def preprocess_data(data): | |
| features = ['Height', 'Weight'] | |
| df = data.dropna(subset=features) | |
| scaler = StandardScaler() | |
| scaled_features = scaler.fit_transform(df[features]) | |
| return scaled_features, df | |
| # Perform hierarchical clustering | |
| def apply_hierarchical_clustering(scaled_features, method='ward'): | |
| Z = linkage(scaled_features, method=method) | |
| return Z | |
| # Sidebar controls | |
| st.sidebar.header("Clustering Parameters") | |
| num_clusters = st.sidebar.slider("Number of Clusters", 2, 10, 3) | |
| def main(): | |
| st.title("π€ K-Pop Idol Clustering using Hierarchical Clustering") | |
| # Tabs for Navigation | |
| tab1, tab2, tab3 = st.tabs(["π About the App", "π Dataset & Results", "π Explore Idols"]) | |
| with tab1: | |
| st.header("π About the App") | |
| st.markdown( | |
| "This app groups K-pop idols based on their physical features (height, weight), company, and debut information using **Hierarchical Clustering with Ward's Method**." | |
| ) | |
| st.markdown( | |
| "### How It Works: | |
| - **Dendrogram Visualization:** Explore hierarchical clusters. | |
| - **Dynamic Cluster Cutting:** Set the number of clusters dynamically. | |
| - **Idol Comparison:** Analyze clusters by different features." | |
| ) | |
| with tab2: | |
| st.header("π Dataset Overview and Results") | |
| st.write("### Sample Data") | |
| st.dataframe(data.head()) | |
| # Preprocess and cluster | |
| scaled_features, df_processed = preprocess_data(data) | |
| Z = apply_hierarchical_clustering(scaled_features) | |
| # Dendrogram | |
| st.write("### Dendrogram") | |
| plt.figure(figsize=(12, 6)) | |
| dendrogram(Z, labels=df_processed['Stage Name'].values, leaf_rotation=90) | |
| st.pyplot(plt) | |
| # Cut the dendrogram | |
| cluster_labels = fcluster(Z, num_clusters, criterion='maxclust') | |
| df_processed['Cluster'] = cluster_labels | |
| st.write("### Clustered Data Sample") | |
| st.dataframe(df_processed[['Stage Name', 'Company', 'Nationality', 'Cluster']].head(10)) | |
| with tab3: | |
| st.header("π Explore Idols by Company or Nationality") | |
| option = st.selectbox("Filter idols by:", ["Company", "Nationality"]) | |
| selected_value = st.text_input(f"Enter {option} name:") | |
| if selected_value: | |
| filtered_data = df_processed[df_processed[option].str.contains(selected_value, na=False, case=False)] | |
| if not filtered_data.empty: | |
| st.dataframe(filtered_data[['Stage Name', 'Company', 'Nationality', 'Cluster']]) | |
| else: | |
| st.warning(f"No idols found for {option}: {selected_value}") | |
| if __name__ == "__main__": | |
| main() | |