Spaces:
Sleeping
Sleeping
File size: 2,483 Bytes
404e73a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import streamlit as st
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
def load_data(uploaded_file):
if uploaded_file is not None:
movies_df = pd.read_csv(uploaded_file)
if 'title' in movies_df.columns and 'genres' in movies_df.columns:
movies_df = movies_df[['title', 'genres']].dropna()
movies_df.rename(columns={'title': 'Title', 'genres': 'Genre'}, inplace=True)
return movies_df
else:
st.error("The dataset must contain 'title' and 'genres' columns.")
return None
return None
def train_model(movies_df):
vectorizer = TfidfVectorizer()
genre_matrix = vectorizer.fit_transform(movies_df['Genre'])
kmeans = KMeans(n_clusters=10, random_state=42, n_init=10)
movies_df['Cluster'] = kmeans.fit_predict(genre_matrix)
return movies_df, kmeans, vectorizer
def recommend_movies(selected_genre, movies_df, kmeans, vectorizer):
genre_vector = vectorizer.transform([selected_genre])
cluster = kmeans.predict(genre_vector)[0]
recommendations = movies_df[movies_df['Cluster'] == cluster]['Title'].tolist()
return recommendations[:10] # Limit to top 10 recommendations
def main():
st.title("Movie Recommendation System")
st.write("### Instructions:")
st.write("1. Download the dataset")
st.write("2. Upload the dataset using the uploader below.")
st.write("3. Select a genre from the dropdown.")
st.write("4. Click 'Get Recommendations' to see recommended movies.")
uploaded_file = st.file_uploader("Upload CSV", type=["csv"])
if uploaded_file:
movies_df = load_data(uploaded_file)
if movies_df is not None:
movies_df, kmeans, vectorizer = train_model(movies_df)
unique_genres = set(', '.join(movies_df['Genre']).split('|'))
selected_genre = st.selectbox("Select a Genre", sorted(unique_genres))
if st.button("Get Recommendations"):
recommendations = recommend_movies(selected_genre, movies_df, kmeans, vectorizer)
if recommendations:
st.write("### Recommended Movies:")
for movie in recommendations:
st.write(f"- {movie}")
else:
st.write("No recommendations found for the selected genre.")
if __name__ == "__main__":
main()
|