Shrikrishna commited on
Commit
735fa00
·
verified ·
1 Parent(s): c0a52c4

Upload 6 files

Browse files
Files changed (6) hide show
  1. app.py +163 -0
  2. movie_list.pkl +3 -0
  3. movies_df.pkl +3 -0
  4. requirements.txt +4 -0
  5. similarity.pkl +3 -0
  6. top_popular.pkl +3 -0
app.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+ import requests
4
+ import pandas as pd
5
+
6
+ footer="""<style>
7
+ a:link , a:visited{
8
+ color: black;
9
+ background-color: transparent;
10
+ }
11
+
12
+ a:hover, a:active {
13
+ color: red;
14
+ background-color: transparent;
15
+ }
16
+
17
+ .footer {
18
+ position: fixed;
19
+ left: 0;
20
+ bottom: 0;
21
+ width: 100%;
22
+ background-color: white;
23
+ color: black;
24
+ text-align: center;
25
+ }
26
+ </style>
27
+ <div class="footer">
28
+ <p>Developed with <span style ='color:red'>❤</span> by <a href="https://shrikrishnaparab.tech/" target="_blank">Shrikrishna Parab</a></p>
29
+ </div>
30
+ """
31
+
32
+ def fetch_poster(movie_id):
33
+ url = "https://api.themoviedb.org/3/movie/{}?api_key=8265bd1679663a7ea12ac168da84d2e8&language=en-US".format(movie_id)
34
+ data = requests.get(url)
35
+ data = data.json()
36
+ poster_path = data['poster_path']
37
+ full_path = "https://image.tmdb.org/t/p/w500/" + poster_path
38
+ return full_path
39
+
40
+ def get_popular(qualified):
41
+ top_5 = qualified.head(5)
42
+ return top_5
43
+
44
+
45
+ def top_genre_based_movies(genre, percentile=0.95):
46
+ df = genre_df[genre_df['genres'].str.contains(genre)]
47
+ vote_counts = df['vote_count'].astype('int')
48
+ vote_averages = df['vote_average'].astype('int')
49
+ C = vote_averages.mean()
50
+ m = vote_counts.quantile(percentile)
51
+ qualified = df[(df['vote_count'] >= m)][['movie_id', 'title', 'vote_count', 'vote_average', 'genres']]
52
+ qualified['vote_count'] = qualified['vote_count'].astype('int')
53
+ qualified['vote_average'] = qualified['vote_average'].astype('int')
54
+ qualified['wr'] = qualified.apply(
55
+ lambda x: (x['vote_count'] / (x['vote_count'] + m) * x['vote_average']) + (m / (m + x['vote_count']) * C),
56
+ axis=1)
57
+ qualified = qualified.sort_values('wr', ascending=False).head(250)
58
+ return qualified
59
+
60
+ def recommend(movie):
61
+ index = movies[movies['title'] == movie].index[0]
62
+ distances = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1])
63
+ recommended_movie_names = []
64
+ recommended_movie_posters = []
65
+ for i in distances[1:6]:
66
+ # fetch the movie poster
67
+ movie_id = movies.iloc[i[0]].movie_id
68
+ recommended_movie_posters.append(fetch_poster(movie_id))
69
+ recommended_movie_names.append(movies.iloc[i[0]].title)
70
+
71
+ return recommended_movie_names,recommended_movie_posters
72
+
73
+
74
+ st.title("Movie Recommender System")
75
+
76
+ movies = pickle.load(open('movie_list.pkl','rb'))
77
+ similarity = pickle.load(open('similarity.pkl','rb'))
78
+ all_movies = pickle.load(open('movies_df.pkl','rb'))
79
+ top_popular = pickle.load(open('top_popular.pkl','rb'))
80
+
81
+ s = all_movies.apply(lambda x: pd.Series(x['genres']),axis=1).stack().reset_index(level=1, drop=True)
82
+ s.name = 'genres'
83
+ genre_df = all_movies.drop('genres', axis=1).join(s)
84
+
85
+ movie_list = movies['title'].values
86
+ option_selected = st.selectbox(
87
+ 'Type or Select Movie Name from Dropdown',
88
+ movie_list
89
+ )
90
+
91
+ genre_list = ['Action','Romance','Adventure','Science Fiction','Comedy']
92
+ genre_selected = st.selectbox(
93
+ 'Type or Select Genre from Dropdown',
94
+ genre_list
95
+ )
96
+
97
+ if st.button('Show Recommendation'):
98
+ recommended_movie_names, recommended_movie_posters = recommend(option_selected)
99
+ top_popular_movies = get_popular(top_popular)
100
+ st.header("Movies Based on Content: Similar Movies")
101
+ col1, col2, col3, col4, col5 = st.columns(5)
102
+ with col1:
103
+ st.image(recommended_movie_posters[0], caption=recommended_movie_names[0])
104
+ with col2:
105
+ st.image(recommended_movie_posters[1], caption=recommended_movie_names[1])
106
+
107
+ with col3:
108
+ st.image(recommended_movie_posters[2], caption=recommended_movie_names[2])
109
+ with col4:
110
+ st.image(recommended_movie_posters[3], caption=recommended_movie_names[3])
111
+ with col5:
112
+ st.image(recommended_movie_posters[4], caption=recommended_movie_names[4])
113
+
114
+ st.header("Movies Based on Popularity: Top Popular")
115
+ popular = []
116
+ for row in top_popular_movies.loc[:,['title','movie_id']].values:
117
+ popular.append(row)
118
+ col6, col7, col8, col9, col10 = st.columns(5)
119
+ with col6:
120
+ full_path = fetch_poster(popular[0][1])
121
+ st.image(full_path, caption=popular[0][0])
122
+ with col7:
123
+ full_path = fetch_poster(popular[1][1])
124
+ st.image(full_path, caption=popular[1][0])
125
+ with col8:
126
+ full_path = fetch_poster(popular[2][1])
127
+ st.image(full_path, caption=popular[2][0])
128
+ with col9:
129
+ full_path = fetch_poster(popular[3][1])
130
+ st.image(full_path, caption=popular[3][0])
131
+ with col10:
132
+ full_path = fetch_poster(popular[4][1])
133
+ st.image(full_path, caption=popular[4][0])
134
+
135
+
136
+ st.header("Movies Based on Genre: Top "+str(genre_selected)+" Movies")
137
+ top_gener_based = top_genre_based_movies(genre_selected).head(5)
138
+ genre_popular = []
139
+ for row in top_gener_based.loc[:, ['title', 'movie_id']].values:
140
+ genre_popular.append(row)
141
+ col11, col12, col13, col14, col15 = st.columns(5)
142
+ with col11:
143
+ full_path = fetch_poster(genre_popular[0][1])
144
+ st.image(full_path, caption=genre_popular[0][0])
145
+ with col12:
146
+ full_path = fetch_poster(genre_popular[1][1])
147
+ st.image(full_path, caption=genre_popular[1][0])
148
+ with col13:
149
+ full_path = fetch_poster(genre_popular[2][1])
150
+ st.image(full_path, caption=genre_popular[2][0])
151
+ with col14:
152
+ full_path = fetch_poster(genre_popular[3][1])
153
+ st.image(full_path, caption=genre_popular[3][0])
154
+ with col15:
155
+ full_path = fetch_poster(genre_popular[4][1])
156
+ st.image(full_path, caption=genre_popular[4][0])
157
+
158
+
159
+
160
+
161
+
162
+
163
+ st.markdown(footer,unsafe_allow_html=True)
movie_list.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7be347c4e22aae6549aafb013a4172287511db29112d87ad08b12b6abb1e41e
3
+ size 2294433
movies_df.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc6b18a914e04d6f46f759ee1a380e3a3ee8b15cc866684bfb1ff79976e5deee
3
+ size 226351096
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit
2
+ sklearn
3
+ altair==4.0
4
+ pandas<2.0.0
similarity.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb54916c165572df7b87afe95cf7b56241383c4c13255031d1a4bccdb9231dce
3
+ size 184781248
top_popular.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7eb0f806da0e9489f8729479b2eabd1354e3d34e532069a54210a23b965af482
3
+ size 35348