Spaces:

michaelryt
/

City_Clustering_Using_DBSCAN

Build error

App Files Files Community

Michael Rey commited on Apr 8, 2025

Commit

d3bd225

1 Parent(s): d74d55e

added latest changes

Browse files

Files changed (4) hide show

README.md +1 -1
app.py +66 -0
movies.csv +0 -0
requirements.txt +4 -0

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: City Clustering Using DBSCAN
 emoji: 🔥
 colorFrom: green
 colorTo: red

 ---
+title: Movie Recommender Using K-Means Clustering
 emoji: 🔥
 colorFrom: green
 colorTo: red

app.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import streamlit as st
+import pandas as pd
+from sklearn.preprocessing import MultiLabelBinarizer
+from sklearn.cluster import KMeans
+from sklearn.decomposition import PCA
+import matplotlib.pyplot as plt
+# Title
+st.title("Movie Recommendation")
+st.markdown("#### Recommend Movies in the Same Cluster of Genre using K-Means Clustering", unsafe_allow_html=True)
+# Load dataset
+movies = pd.read_csv('movies.csv')
+movies = movies[movies['genres'] != '(no genres listed)']
+movies['genres'] = movies['genres'].apply(lambda x: x.split('|'))
+# One-hot encode genres
+mlb = MultiLabelBinarizer()
+genre_matrix = mlb.fit_transform(movies['genres'])
+# Apply KMeans
+k = 10
+model = KMeans(n_clusters=k, random_state=42)
+movies['cluster'] = model.fit_predict(genre_matrix)
+# Add PCA for 2D visualization
+pca = PCA(n_components=2)
+pca_result = pca.fit_transform(genre_matrix)
+movies['pca_x'] = pca_result[:, 0]
+movies['pca_y'] = pca_result[:, 1]
+# Streamlit Tabs
+tab1, tab2, tab3 = st.tabs(["📄 Dataset Overview", "📊 Clustering Visualization", "🎬 Movie Recommender"])
+# Tab 1: Dataset Overview
+with tab1:
+    st.header("🎥 Movie Dataset Overview")
+    st.write("Total Movies:", len(movies))
+    st.dataframe(movies[['title', 'genres', 'cluster']].head(10))
+# Tab 2: Visualization
+with tab2:
+    st.header("🧠 Genre-Based Clustering (PCA Projection)")
+    fig, ax = plt.subplots()
+    scatter = ax.scatter(movies['pca_x'], movies['pca_y'], c=movies['cluster'], cmap='tab10', alpha=0.6)
+    ax.set_xlabel("PCA 1")
+    ax.set_ylabel("PCA 2")
+    ax.set_title("Movie Genre Clusters")
+    st.pyplot(fig)
+# Tab 3: Movie Recommender
+with tab3:
+    st.header("🎬 Movie Recommender (Unsupervised KMeans)")
+    movie_options = sorted(movies['title'].unique())
+    selected_title = st.selectbox("Choose a movie:", movie_options)
+    selected_movie = movies[movies['title'] == selected_title].iloc[0]
+    st.success(f"You selected: {selected_movie['title']}")
+    cluster_id = selected_movie['cluster']
+    recs = movies[(movies['cluster'] == cluster_id) & (movies['title'] != selected_movie['title'])]
+    st.subheader("📍 Recommended Movies (Same Cluster):")
+    for title in recs['title'].head(10):
+        st.write(f"- {title}")

movies.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+streamlit
+pandas
+scikit-learn
+matplotlib