Spaces:
Build error
Build error
| import streamlit as st | |
| import pandas as pd | |
| from sklearn.preprocessing import MultiLabelBinarizer | |
| from sklearn.cluster import KMeans | |
| from sklearn.decomposition import PCA | |
| import matplotlib.pyplot as plt | |
| # Title | |
| st.title("Movie Recommendation") | |
| st.markdown("#### Recommend Movies in the Same Cluster of Genre using K-Means Clustering", unsafe_allow_html=True) | |
| # Load dataset | |
| movies = pd.read_csv('movies.csv') | |
| movies = movies[movies['genres'] != '(no genres listed)'] | |
| movies['genres'] = movies['genres'].apply(lambda x: x.split('|')) | |
| # One-hot encode genres | |
| mlb = MultiLabelBinarizer() | |
| genre_matrix = mlb.fit_transform(movies['genres']) | |
| # Apply KMeans | |
| k = 10 | |
| model = KMeans(n_clusters=k, random_state=42) | |
| movies['cluster'] = model.fit_predict(genre_matrix) | |
| # Add PCA for 2D visualization | |
| pca = PCA(n_components=2) | |
| pca_result = pca.fit_transform(genre_matrix) | |
| movies['pca_x'] = pca_result[:, 0] | |
| movies['pca_y'] = pca_result[:, 1] | |
| # Streamlit Tabs | |
| tab1, tab2, tab3 = st.tabs(["π Dataset Overview", "π Clustering Visualization", "π¬ Movie Recommender"]) | |
| # Tab 1: Dataset Overview | |
| with tab1: | |
| st.header("π₯ Movie Dataset Overview") | |
| st.write("Total Movies:", len(movies)) | |
| st.dataframe(movies[['title', 'genres', 'cluster']].head(10)) | |
| # Tab 2: Visualization | |
| with tab2: | |
| st.header("π§ Genre-Based Clustering (PCA Projection)") | |
| fig, ax = plt.subplots() | |
| scatter = ax.scatter(movies['pca_x'], movies['pca_y'], c=movies['cluster'], cmap='tab10', alpha=0.6) | |
| ax.set_xlabel("PCA 1") | |
| ax.set_ylabel("PCA 2") | |
| ax.set_title("Movie Genre Clusters") | |
| st.pyplot(fig) | |
| # Tab 3: Movie Recommender | |
| with tab3: | |
| st.header("π¬ Movie Recommender (Unsupervised KMeans)") | |
| movie_options = sorted(movies['title'].unique()) | |
| selected_title = st.selectbox("Choose a movie:", movie_options) | |
| selected_movie = movies[movies['title'] == selected_title].iloc[0] | |
| st.success(f"You selected: {selected_movie['title']}") | |
| cluster_id = selected_movie['cluster'] | |
| recs = movies[(movies['cluster'] == cluster_id) & (movies['title'] != selected_movie['title'])] | |
| st.subheader("π Recommended Movies (Same Cluster):") | |
| for title in recs['title'].head(10): | |
| st.write(f"- {title}") | |