File size: 2,483 Bytes
404e73a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import streamlit as st
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans

def load_data(uploaded_file):
    if uploaded_file is not None:
        movies_df = pd.read_csv(uploaded_file)
        if 'title' in movies_df.columns and 'genres' in movies_df.columns:
            movies_df = movies_df[['title', 'genres']].dropna()
            movies_df.rename(columns={'title': 'Title', 'genres': 'Genre'}, inplace=True)
            return movies_df
        else:
            st.error("The dataset must contain 'title' and 'genres' columns.")
            return None
    return None

def train_model(movies_df):
    vectorizer = TfidfVectorizer()
    genre_matrix = vectorizer.fit_transform(movies_df['Genre'])
    kmeans = KMeans(n_clusters=10, random_state=42, n_init=10)
    movies_df['Cluster'] = kmeans.fit_predict(genre_matrix)
    return movies_df, kmeans, vectorizer

def recommend_movies(selected_genre, movies_df, kmeans, vectorizer):
    genre_vector = vectorizer.transform([selected_genre])
    cluster = kmeans.predict(genre_vector)[0]
    recommendations = movies_df[movies_df['Cluster'] == cluster]['Title'].tolist()
    return recommendations[:10]  # Limit to top 10 recommendations

def main():
    st.title("Movie Recommendation System")
    st.write("### Instructions:")
    st.write("1. Download the dataset")
    st.write("2. Upload the dataset using the uploader below.")
    st.write("3. Select a genre from the dropdown.")
    st.write("4. Click 'Get Recommendations' to see recommended movies.")
    
    uploaded_file = st.file_uploader("Upload CSV", type=["csv"])
    
    if uploaded_file:
        movies_df = load_data(uploaded_file)
        if movies_df is not None:
            movies_df, kmeans, vectorizer = train_model(movies_df)
            
            unique_genres = set(', '.join(movies_df['Genre']).split('|'))
            selected_genre = st.selectbox("Select a Genre", sorted(unique_genres))
            
            if st.button("Get Recommendations"):
                recommendations = recommend_movies(selected_genre, movies_df, kmeans, vectorizer)
                if recommendations:
                    st.write("### Recommended Movies:")
                    for movie in recommendations:
                        st.write(f"- {movie}")
                else:
                    st.write("No recommendations found for the selected genre.")

if __name__ == "__main__":
    main()