Spaces:

reysarms
/

all_about_kpop

Sleeping

App Files Files Community

reysarms commited on Mar 30, 2025

Commit

dfaa369

1 Parent(s): c9a36a4

updated files

Browse files

Files changed (3) hide show

.gitignore +4 -0
app.py +86 -0
requirements.txt +7 -0

.gitignore CHANGED Viewed

	@@ -0,0 +1,4 @@

+kpop_venv/
+.venv/
+env/
+venv/

app.py CHANGED Viewed

	@@ -0,0 +1,86 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
+from sklearn.preprocessing import StandardScaler
+# Load dataset
+@st.cache_data
+def load_data():
+    data = pd.read_csv("kpopidolsv3.csv")
+    return data
+data = load_data()
+# Preprocess data
+def preprocess_data(data):
+    features = ['Height', 'Weight']
+    df = data.dropna(subset=features)
+    scaler = StandardScaler()
+    scaled_features = scaler.fit_transform(df[features])
+    return scaled_features, df
+# Perform hierarchical clustering
+def apply_hierarchical_clustering(scaled_features, method='ward'):
+    Z = linkage(scaled_features, method=method)
+    return Z
+# Sidebar controls
+st.sidebar.header("Clustering Parameters")
+num_clusters = st.sidebar.slider("Number of Clusters", 2, 10, 3)
+def main():
+    st.title("🎤 K-Pop Idol Clustering using Hierarchical Clustering")
+    # Tabs for Navigation
+    tab1, tab2, tab3 = st.tabs(["📚 About the App", "📊 Dataset & Results", "🔎 Explore Idols"])
+    with tab1:
+        st.header("📚 About the App")
+        st.markdown(
+            "This app groups K-pop idols based on their physical features (height, weight), company, and debut information using **Hierarchical Clustering with Ward's Method**."
+        )
+        st.markdown(
+            "### How It Works:
+            - **Dendrogram Visualization:** Explore hierarchical clusters.
+            - **Dynamic Cluster Cutting:** Set the number of clusters dynamically.
+            - **Idol Comparison:** Analyze clusters by different features."
+        )
+    with tab2:
+        st.header("📊 Dataset Overview and Results")
+        st.write("### Sample Data")
+        st.dataframe(data.head())
+        # Preprocess and cluster
+        scaled_features, df_processed = preprocess_data(data)
+        Z = apply_hierarchical_clustering(scaled_features)
+        # Dendrogram
+        st.write("### Dendrogram")
+        plt.figure(figsize=(12, 6))
+        dendrogram(Z, labels=df_processed['Stage Name'].values, leaf_rotation=90)
+        st.pyplot(plt)
+        # Cut the dendrogram
+        cluster_labels = fcluster(Z, num_clusters, criterion='maxclust')
+        df_processed['Cluster'] = cluster_labels
+        st.write("### Clustered Data Sample")
+        st.dataframe(df_processed[['Stage Name', 'Company', 'Nationality', 'Cluster']].head(10))
+    with tab3:
+        st.header("🔎 Explore Idols by Company or Nationality")
+        option = st.selectbox("Filter idols by:", ["Company", "Nationality"])
+        selected_value = st.text_input(f"Enter {option} name:")
+        if selected_value:
+            filtered_data = df_processed[df_processed[option].str.contains(selected_value, na=False, case=False)]
+            if not filtered_data.empty:
+                st.dataframe(filtered_data[['Stage Name', 'Company', 'Nationality', 'Cluster']])
+            else:
+                st.warning(f"No idols found for {option}: {selected_value}")
+if __name__ == "__main__":
+    main()

requirements.txt CHANGED Viewed

	@@ -0,0 +1,7 @@

+streamlit
+pandas
+numpy
+seaborn
+matplotlib
+scipy
+scikit-learn