File size: 3,162 Bytes
1910e89
 
 
 
b81e062
 
1910e89
 
e749a79
1910e89
68ef21a
cbacd69
 
 
 
 
 
0e7aaa0
b81e062
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b9f5b0f
b81e062
4790583
 
 
 
e660b3f
e93cb38
67ec571
769923e
3d7e042
913f7bf
67ec571
 
 
 
 
 
 
5d5108c
67ec571
e749a79
88d878a
 
 
 
67ec571
5cb64bc
5d5108c
0930037
b81e062
b9f5b0f
a9a7c3a
68ef21a
df88715
62e18d0
5aabe52
7080b44
 
188f228
36d4ffb
54a1f7c
62e18d0
59e0b08
 
280ca3b
59e0b08
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import streamlit as st
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer


movies_df = pd.read_csv("movies.csv")
links = pd.read_csv("imdbLinks.csv")
genre_df = movies_df['genres'].str.get_dummies("|")
st.title("Movie Recommendation System")
@st.cache_data
def calculate_similarity_matrix(genre_df):
    similarity_matrix = cosine_similarity(genre_df)
    return similarity_matrix

similarity_matrix = calculate_similarity_matrix(genre_df)

@st.cache_data
def vectorize_data(movies):
    vec = TfidfVectorizer()
    vectorized = vec.fit_transform(movies['title'])
    return vectorized,vec

def search_movie(title):
    vectorized_data,vec = vectorize_data(movies_df)
    title_vector = vec.transform([title])

    similarity_scores = cosine_similarity(title_vector, vectorized_data).flatten()

    similar_indices = similarity_scores.argsort()[::-1]

    results = [(movies_df['title'][i], similarity_scores[i]) for i in similar_indices]

    return results[0][0]

if 'search' not in st.session_state:
    st.session_state['search'] = ""

movie = st.text_input("Search",placeholder='Search for movies',key="search")
if 'random_movies' not in st.session_state:
    st.session_state['random_movies'] = movies_df['title'].sample(5)
col1,col2 = st.columns(2)
random_movies = st.session_state['random_movies']
movie1,id = None,0
if not movie:
    with col1:
        for i,movie in enumerate(random_movies,0):
            if st.button(movie):
                movie1 = movie
                id = i
                st.write("Selected Movie:",movie1)
                movie_id = movies_df[movies_df['title'] == random_movies.iloc[id]].index
                similar_movies_idx = similarity_matrix[movie_id[0]].argsort()[::-1][1:7]
                similar_movies = movies_df['title'].iloc[similar_movies_idx]
                movies_links = links.iloc[similar_movies_idx].to_numpy()
                if movie1 in similar_movies:
                    index = np.where(similar_movies == movie1)[0][0]
        
                    similar_movies = np.delete(similar_movies,index)
                with col2:
                    st.subheader("Recommended Movies")
                    for i,j in enumerate(similar_movies[:5]):
                        st.write(f"\t{i+1}.{j}:[IMDb]({movies_links[i][0][:-1]})")
else:
    result = search_movie(movie)
    id = movies_df[movies_df['title'] == result].index[0]
    similar_movies_idx = similarity_matrix[id].argsort()[::-1][1:7]
    similar_movies = movies_df['title'].iloc[similar_movies_idx].to_numpy()
    movies_links = links.iloc[similar_movies_idx].to_numpy()
    if result in similar_movies:
        index = np.where(similar_movies == result)[0][0]
        
        similar_movies = np.delete(similar_movies,index)
    st.subheader("Recommended Movies")
    for i,j in enumerate(similar_movies[:5]):
        st.write(f"\t{i+1}.{j}:[IMDb]({movies_links[i][0][:-1]})")
if st.button("Refresh", key='refresh'):
    st.session_state['random_movies'] = movies_df['title'].sample(5)
    del st.session_state['search']
    st.rerun()