import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.preprocessing import StandardScaler from sklearn.cluster import DBSCAN st.title("Music Genre Clustering with DBSCAN") # Load dataset directly file_path = "top_10000_1950-now.csv" df = pd.read_csv(file_path) # Remove non-numeric columns df_numeric = df.select_dtypes(include=[np.number]) # Create tabs tab1, tab2, tab3 = st.tabs(["Overview", "Visualization Matrix", "User Input"]) with tab1: st.write("### Dataset Overview") st.dataframe(df.head()) st.write("### Dataset Information") st.write(df_numeric.describe()) with tab2: st.write("### Correlation Matrix") plt.figure(figsize=(10, 6)) sns.heatmap(df_numeric.corr(), annot=True, cmap="coolwarm", fmt=".2f") st.pyplot(plt) st.write("### Pairplot Visualization") pairplot_features = st.multiselect("Select Features for Pairplot", df_numeric.columns.tolist(), default=["Danceability", "Energy", "Tempo", "Loudness", "Valence"]) if pairplot_features: sns.pairplot(df[pairplot_features]) st.pyplot(plt) with tab3: st.write("### Clustering Settings") num_features = st.slider("Select Number of Features", 2, len(df_numeric.columns), 5) features = st.multiselect("Select Features for Clustering", df_numeric.columns.tolist(), default=df_numeric.columns[:num_features]) if st.button("Run Clustering"): if len(features) >= 2: df_filtered = df_numeric[features].dropna() X_scaled = StandardScaler().fit_transform(df_filtered) eps = 1.0 # Default value, can be modified as needed min_samples = 10 # Default value, can be modified as needed dbscan = DBSCAN(eps=eps, min_samples=min_samples) labels = dbscan.fit_predict(X_scaled) df_filtered["Cluster"] = labels df["Cluster"] = np.nan df.loc[df_filtered.index, "Cluster"] = labels st.write("### Clustered Data:") st.dataframe(df[["Track Name", "Artist Name(s)", "Cluster"]].dropna().head(20)) st.write("### Cluster Visualization:") fig, ax = plt.subplots() scatter = ax.scatter(X_scaled[:, 0], X_scaled[:, 1], c=labels, cmap="viridis", alpha=0.7) legend1 = ax.legend(*scatter.legend_elements(), title="Clusters") ax.add_artist(legend1) st.pyplot(fig) else: st.warning("Please select at least two features for clustering.")