reysarms commited on
Commit
dfaa369
Β·
1 Parent(s): c9a36a4

updated files

Browse files
Files changed (3) hide show
  1. .gitignore +4 -0
  2. app.py +86 -0
  3. requirements.txt +7 -0
.gitignore CHANGED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ kpop_venv/
2
+ .venv/
3
+ env/
4
+ venv/
app.py CHANGED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import seaborn as sns
5
+ import matplotlib.pyplot as plt
6
+ from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
7
+ from sklearn.preprocessing import StandardScaler
8
+
9
+ # Load dataset
10
+ @st.cache_data
11
+ def load_data():
12
+ data = pd.read_csv("kpopidolsv3.csv")
13
+ return data
14
+
15
+ data = load_data()
16
+
17
+ # Preprocess data
18
+ def preprocess_data(data):
19
+ features = ['Height', 'Weight']
20
+ df = data.dropna(subset=features)
21
+ scaler = StandardScaler()
22
+ scaled_features = scaler.fit_transform(df[features])
23
+ return scaled_features, df
24
+
25
+ # Perform hierarchical clustering
26
+ def apply_hierarchical_clustering(scaled_features, method='ward'):
27
+ Z = linkage(scaled_features, method=method)
28
+ return Z
29
+
30
+ # Sidebar controls
31
+ st.sidebar.header("Clustering Parameters")
32
+ num_clusters = st.sidebar.slider("Number of Clusters", 2, 10, 3)
33
+
34
+ def main():
35
+ st.title("🎀 K-Pop Idol Clustering using Hierarchical Clustering")
36
+
37
+ # Tabs for Navigation
38
+ tab1, tab2, tab3 = st.tabs(["πŸ“š About the App", "πŸ“Š Dataset & Results", "πŸ”Ž Explore Idols"])
39
+
40
+ with tab1:
41
+ st.header("πŸ“š About the App")
42
+ st.markdown(
43
+ "This app groups K-pop idols based on their physical features (height, weight), company, and debut information using **Hierarchical Clustering with Ward's Method**."
44
+ )
45
+ st.markdown(
46
+ "### How It Works:
47
+ - **Dendrogram Visualization:** Explore hierarchical clusters.
48
+ - **Dynamic Cluster Cutting:** Set the number of clusters dynamically.
49
+ - **Idol Comparison:** Analyze clusters by different features."
50
+ )
51
+
52
+ with tab2:
53
+ st.header("πŸ“Š Dataset Overview and Results")
54
+ st.write("### Sample Data")
55
+ st.dataframe(data.head())
56
+
57
+ # Preprocess and cluster
58
+ scaled_features, df_processed = preprocess_data(data)
59
+ Z = apply_hierarchical_clustering(scaled_features)
60
+
61
+ # Dendrogram
62
+ st.write("### Dendrogram")
63
+ plt.figure(figsize=(12, 6))
64
+ dendrogram(Z, labels=df_processed['Stage Name'].values, leaf_rotation=90)
65
+ st.pyplot(plt)
66
+
67
+ # Cut the dendrogram
68
+ cluster_labels = fcluster(Z, num_clusters, criterion='maxclust')
69
+ df_processed['Cluster'] = cluster_labels
70
+ st.write("### Clustered Data Sample")
71
+ st.dataframe(df_processed[['Stage Name', 'Company', 'Nationality', 'Cluster']].head(10))
72
+
73
+ with tab3:
74
+ st.header("πŸ”Ž Explore Idols by Company or Nationality")
75
+ option = st.selectbox("Filter idols by:", ["Company", "Nationality"])
76
+ selected_value = st.text_input(f"Enter {option} name:")
77
+
78
+ if selected_value:
79
+ filtered_data = df_processed[df_processed[option].str.contains(selected_value, na=False, case=False)]
80
+ if not filtered_data.empty:
81
+ st.dataframe(filtered_data[['Stage Name', 'Company', 'Nationality', 'Cluster']])
82
+ else:
83
+ st.warning(f"No idols found for {option}: {selected_value}")
84
+
85
+ if __name__ == "__main__":
86
+ main()
requirements.txt CHANGED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ numpy
4
+ seaborn
5
+ matplotlib
6
+ scipy
7
+ scikit-learn