Lakshy75 commited on
Commit
27c4cfb
·
verified ·
1 Parent(s): ac7a2ed

Upload 8 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tmdb_5000_credits[[:space:]](1).csv filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,13 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Movie Recommendation System
3
+ emoji: 😻
4
+ colorFrom: gray
5
+ colorTo: red
6
+ sdk: streamlit
7
+ sdk_version: 1.40.2
8
+ app_file: app.py
9
+ pinned: false
10
+ short_description: This project is a movie recommendation system built with Str
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import requests
4
+ import pickle
5
+ import os
6
+ from dotenv import load_dotenv
7
+
8
+ # Load environment variables
9
+ load_dotenv()
10
+
11
+
12
+ # Load the processed data and similarity matrix
13
+ with open('movie_data.pkl', 'rb') as file:
14
+ movies, cosine_sim = pickle.load(file)
15
+
16
+ # Function to get movie recommendations
17
+ def get_recommendations(title, cosine_sim=cosine_sim):
18
+ idx = movies[movies['title'] == title].index[0]
19
+ sim_scores = list(enumerate(cosine_sim[idx]))
20
+ sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
21
+ sim_scores = sim_scores[1:11] # Get top 10 similar movies
22
+ movie_indices = [i[0] for i in sim_scores]
23
+ return movies[['title', 'movie_id']].iloc[movie_indices]
24
+
25
+ # Fetch movie poster from TMDB API
26
+ def fetch_poster(movie_id):
27
+ api_key = os.getenv("API_KEY") # Replace with your TMDB API key
28
+ url = f'https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key}'
29
+ response = requests.get(url)
30
+ data = response.json()
31
+ poster_path = data['poster_path']
32
+ full_path = f"https://image.tmdb.org/t/p/w500{poster_path}"
33
+ return full_path
34
+
35
+ # Streamlit UI
36
+ st.title("Movie Recommendation System")
37
+
38
+ selected_movie = st.selectbox("Select a movie:", movies['title'].values)
39
+
40
+ if st.button('Recommend'):
41
+ recommendations = get_recommendations(selected_movie)
42
+ st.write("Top 10 recommended movies:")
43
+
44
+ # Create a 2x5 grid layout
45
+ for i in range(0, 10, 5): # Loop over rows (2 rows, 5 movies each)
46
+ cols = st.columns(5) # Create 5 columns for each row
47
+ for col, j in zip(cols, range(i, i+5)):
48
+ if j < len(recommendations):
49
+ movie_title = recommendations.iloc[j]['title']
50
+ movie_id = recommendations.iloc[j]['movie_id']
51
+ poster_url = fetch_poster(movie_id)
52
+ with col:
53
+ st.image(poster_url, width=130)
54
+ st.write(movie_title)
gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tmdb_5000_credits.csv filter=lfs diff=lfs merge=lfs -text
movie_data.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee875ac979bc56a80e843eb9cb92960426d17640940fe962474d50e0c632095a
3
+ size 187413682
notebook86c26b4f17.ipynb ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.7.10","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load\n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the read-only \"../input/\" directory\n# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n for filename in filenames:\n print(os.path.join(dirname, filename))\n\n# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","execution":{"iopub.status.busy":"2021-06-21T12:32:55.393741Z","iopub.execute_input":"2021-06-21T12:32:55.394360Z","iopub.status.idle":"2021-06-21T12:32:55.415999Z","shell.execute_reply.started":"2021-06-21T12:32:55.394257Z","shell.execute_reply":"2021-06-21T12:32:55.414664Z"},"trusted":true},"execution_count":1,"outputs":[{"name":"stdout","text":"/kaggle/input/tmdb-movie-metadata/tmdb_5000_movies.csv\n/kaggle/input/tmdb-movie-metadata/tmdb_5000_credits.csv\n","output_type":"stream"}]},{"cell_type":"code","source":"movies = pd.read_csv('/kaggle/input/tmdb-movie-metadata/tmdb_5000_movies.csv')\ncredits = pd.read_csv('/kaggle/input/tmdb-movie-metadata/tmdb_5000_credits.csv') ","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:36:31.347717Z","iopub.execute_input":"2021-06-21T12:36:31.348105Z","iopub.status.idle":"2021-06-21T12:36:32.003856Z","shell.execute_reply.started":"2021-06-21T12:36:31.348073Z","shell.execute_reply":"2021-06-21T12:36:32.002744Z"},"trusted":true},"execution_count":30,"outputs":[]},{"cell_type":"code","source":"movies.head(2)","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:36:33.757346Z","iopub.execute_input":"2021-06-21T12:36:33.757778Z","iopub.status.idle":"2021-06-21T12:36:33.781729Z","shell.execute_reply.started":"2021-06-21T12:36:33.757743Z","shell.execute_reply":"2021-06-21T12:36:33.780738Z"},"trusted":true},"execution_count":31,"outputs":[{"execution_count":31,"output_type":"execute_result","data":{"text/plain":" budget genres \\\n0 237000000 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n1 300000000 [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"... \n\n homepage id \\\n0 http://www.avatarmovie.com/ 19995 \n1 http://disney.go.com/disneypictures/pirates/ 285 \n\n keywords original_language \\\n0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... en \n1 [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na... en \n\n original_title \\\n0 Avatar \n1 Pirates of the Caribbean: At World's End \n\n overview popularity \\\n0 In the 22nd century, a paraplegic Marine is di... 150.437577 \n1 Captain Barbossa, long believed to be dead, ha... 139.082615 \n\n production_companies \\\n0 [{\"name\": \"Ingenious Film Partners\", \"id\": 289... \n1 [{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"... \n\n production_countries release_date revenue \\\n0 [{\"iso_3166_1\": \"US\", \"name\": \"United States o... 2009-12-10 2787965087 \n1 [{\"iso_3166_1\": \"US\", \"name\": \"United States o... 2007-05-19 961000000 \n\n runtime spoken_languages status \\\n0 162.0 [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso... Released \n1 169.0 [{\"iso_639_1\": \"en\", \"name\": \"English\"}] Released \n\n tagline \\\n0 Enter the World of Pandora. \n1 At the end of the world, the adventure begins. \n\n title vote_average vote_count \n0 Avatar 7.2 11800 \n1 Pirates of the Caribbean: At World's End 6.9 4500 ","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>budget</th>\n <th>genres</th>\n <th>homepage</th>\n <th>id</th>\n <th>keywords</th>\n <th>original_language</th>\n <th>original_title</th>\n <th>overview</th>\n <th>popularity</th>\n <th>production_companies</th>\n <th>production_countries</th>\n <th>release_date</th>\n <th>revenue</th>\n <th>runtime</th>\n <th>spoken_languages</th>\n <th>status</th>\n <th>tagline</th>\n <th>title</th>\n <th>vote_average</th>\n <th>vote_count</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>237000000</td>\n <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n <td>http://www.avatarmovie.com/</td>\n <td>19995</td>\n <td>[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...</td>\n <td>en</td>\n <td>Avatar</td>\n <td>In the 22nd century, a paraplegic Marine is di...</td>\n <td>150.437577</td>\n <td>[{\"name\": \"Ingenious Film Partners\", \"id\": 289...</td>\n <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n <td>2009-12-10</td>\n <td>2787965087</td>\n <td>162.0</td>\n <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...</td>\n <td>Released</td>\n <td>Enter the World of Pandora.</td>\n <td>Avatar</td>\n <td>7.2</td>\n <td>11800</td>\n </tr>\n <tr>\n <th>1</th>\n <td>300000000</td>\n <td>[{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...</td>\n <td>http://disney.go.com/disneypictures/pirates/</td>\n <td>285</td>\n <td>[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...</td>\n <td>en</td>\n <td>Pirates of the Caribbean: At World's End</td>\n <td>Captain Barbossa, long believed to be dead, ha...</td>\n <td>139.082615</td>\n <td>[{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"...</td>\n <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n <td>2007-05-19</td>\n <td>961000000</td>\n <td>169.0</td>\n <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n <td>Released</td>\n <td>At the end of the world, the adventure begins.</td>\n <td>Pirates of the Caribbean: At World's End</td>\n <td>6.9</td>\n <td>4500</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"movies.shape","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:33:06.307921Z","iopub.execute_input":"2021-06-21T12:33:06.308311Z","iopub.status.idle":"2021-06-21T12:33:06.316052Z","shell.execute_reply.started":"2021-06-21T12:33:06.308279Z","shell.execute_reply":"2021-06-21T12:33:06.314903Z"},"trusted":true},"execution_count":4,"outputs":[{"execution_count":4,"output_type":"execute_result","data":{"text/plain":"(4803, 20)"},"metadata":{}}]},{"cell_type":"code","source":"credits.head()","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:33:08.787389Z","iopub.execute_input":"2021-06-21T12:33:08.787791Z","iopub.status.idle":"2021-06-21T12:33:08.800864Z","shell.execute_reply.started":"2021-06-21T12:33:08.787758Z","shell.execute_reply":"2021-06-21T12:33:08.799834Z"},"trusted":true},"execution_count":5,"outputs":[{"execution_count":5,"output_type":"execute_result","data":{"text/plain":" movie_id title \\\n0 19995 Avatar \n1 285 Pirates of the Caribbean: At World's End \n2 206647 Spectre \n3 49026 The Dark Knight Rises \n4 49529 John Carter \n\n cast \\\n0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n2 [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... \n3 [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... \n4 [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... \n\n crew \n0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... ","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>movie_id</th>\n <th>title</th>\n <th>cast</th>\n <th>crew</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>19995</td>\n <td>Avatar</td>\n <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n </tr>\n <tr>\n <th>1</th>\n <td>285</td>\n <td>Pirates of the Caribbean: At World's End</td>\n <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n </tr>\n <tr>\n <th>2</th>\n <td>206647</td>\n <td>Spectre</td>\n <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n </tr>\n <tr>\n <th>3</th>\n <td>49026</td>\n <td>The Dark Knight Rises</td>\n <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n </tr>\n <tr>\n <th>4</th>\n <td>49529</td>\n <td>John Carter</td>\n <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"movies = movies.merge(credits,on='title')","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:36:38.686521Z","iopub.execute_input":"2021-06-21T12:36:38.687005Z","iopub.status.idle":"2021-06-21T12:36:38.713343Z","shell.execute_reply.started":"2021-06-21T12:36:38.686963Z","shell.execute_reply":"2021-06-21T12:36:38.711939Z"},"trusted":true},"execution_count":32,"outputs":[]},{"cell_type":"code","source":"movies.head()\n# budget\n# homepage\n# id\n# original_language\n# original_title\n# popularity\n# production_comapny\n# production_countries\n# release-date(not sure)","metadata":{"execution":{"iopub.status.busy":"2021-06-19T12:48:23.470187Z","iopub.execute_input":"2021-06-19T12:48:23.470754Z","iopub.status.idle":"2021-06-19T12:48:23.5041Z","shell.execute_reply.started":"2021-06-19T12:48:23.470724Z","shell.execute_reply":"2021-06-19T12:48:23.503327Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"movies = movies[['movie_id','title','overview','genres','keywords','cast','crew']]","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:36:41.393326Z","iopub.execute_input":"2021-06-21T12:36:41.393733Z","iopub.status.idle":"2021-06-21T12:36:41.406883Z","shell.execute_reply.started":"2021-06-21T12:36:41.393699Z","shell.execute_reply":"2021-06-21T12:36:41.405822Z"},"trusted":true},"execution_count":33,"outputs":[]},{"cell_type":"code","source":"movies.head()","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:33:39.516481Z","iopub.execute_input":"2021-06-21T12:33:39.517061Z","iopub.status.idle":"2021-06-21T12:33:39.532499Z","shell.execute_reply.started":"2021-06-21T12:33:39.517012Z","shell.execute_reply":"2021-06-21T12:33:39.531584Z"},"trusted":true},"execution_count":8,"outputs":[{"execution_count":8,"output_type":"execute_result","data":{"text/plain":" movie_id title \\\n0 19995 Avatar \n1 285 Pirates of the Caribbean: At World's End \n2 206647 Spectre \n3 49026 The Dark Knight Rises \n4 49529 John Carter \n\n overview \\\n0 In the 22nd century, a paraplegic Marine is di... \n1 Captain Barbossa, long believed to be dead, ha... \n2 A cryptic message from Bond’s past sends him o... \n3 Following the death of District Attorney Harve... \n4 John Carter is a war-weary, former military ca... \n\n genres \\\n0 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n1 [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"... \n2 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n3 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam... \n4 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n\n keywords \\\n0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... \n1 [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na... \n2 [{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name... \n3 [{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,... \n4 [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":... \n\n cast \\\n0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n2 [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... \n3 [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... \n4 [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... \n\n crew \n0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... ","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>movie_id</th>\n <th>title</th>\n <th>overview</th>\n <th>genres</th>\n <th>keywords</th>\n <th>cast</th>\n <th>crew</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>19995</td>\n <td>Avatar</td>\n <td>In the 22nd century, a paraplegic Marine is di...</td>\n <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n <td>[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...</td>\n <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n </tr>\n <tr>\n <th>1</th>\n <td>285</td>\n <td>Pirates of the Caribbean: At World's End</td>\n <td>Captain Barbossa, long believed to be dead, ha...</td>\n <td>[{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...</td>\n <td>[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...</td>\n <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n </tr>\n <tr>\n <th>2</th>\n <td>206647</td>\n <td>Spectre</td>\n <td>A cryptic message from Bond’s past sends him o...</td>\n <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n <td>[{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...</td>\n <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n </tr>\n <tr>\n <th>3</th>\n <td>49026</td>\n <td>The Dark Knight Rises</td>\n <td>Following the death of District Attorney Harve...</td>\n <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...</td>\n <td>[{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...</td>\n <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n </tr>\n <tr>\n <th>4</th>\n <td>49529</td>\n <td>John Carter</td>\n <td>John Carter is a war-weary, former military ca...</td>\n <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n <td>[{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...</td>\n <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"import ast","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:34:19.382331Z","iopub.execute_input":"2021-06-21T12:34:19.382856Z","iopub.status.idle":"2021-06-21T12:34:19.387416Z","shell.execute_reply.started":"2021-06-21T12:34:19.382822Z","shell.execute_reply":"2021-06-21T12:34:19.386451Z"},"trusted":true},"execution_count":12,"outputs":[]},{"cell_type":"code","source":"def convert(text):\n L = []\n for i in ast.literal_eval(text):\n L.append(i['name']) \n return L ","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:36:47.539904Z","iopub.execute_input":"2021-06-21T12:36:47.540453Z","iopub.status.idle":"2021-06-21T12:36:47.545014Z","shell.execute_reply.started":"2021-06-21T12:36:47.540418Z","shell.execute_reply":"2021-06-21T12:36:47.544243Z"},"trusted":true},"execution_count":34,"outputs":[]},{"cell_type":"code","source":"movies.dropna(inplace=True)","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:36:50.059006Z","iopub.execute_input":"2021-06-21T12:36:50.059538Z","iopub.status.idle":"2021-06-21T12:36:50.073095Z","shell.execute_reply.started":"2021-06-21T12:36:50.059504Z","shell.execute_reply":"2021-06-21T12:36:50.071577Z"},"trusted":true},"execution_count":35,"outputs":[]},{"cell_type":"code","source":"movies['genres'] = movies['genres'].apply(convert)\nmovies.head()","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:36:52.343645Z","iopub.execute_input":"2021-06-21T12:36:52.344121Z","iopub.status.idle":"2021-06-21T12:36:52.523910Z","shell.execute_reply.started":"2021-06-21T12:36:52.344082Z","shell.execute_reply":"2021-06-21T12:36:52.522805Z"},"trusted":true},"execution_count":36,"outputs":[{"execution_count":36,"output_type":"execute_result","data":{"text/plain":" movie_id title \\\n0 19995 Avatar \n1 285 Pirates of the Caribbean: At World's End \n2 206647 Spectre \n3 49026 The Dark Knight Rises \n4 49529 John Carter \n\n overview \\\n0 In the 22nd century, a paraplegic Marine is di... \n1 Captain Barbossa, long believed to be dead, ha... \n2 A cryptic message from Bond’s past sends him o... \n3 Following the death of District Attorney Harve... \n4 John Carter is a war-weary, former military ca... \n\n genres \\\n0 [Action, Adventure, Fantasy, Science Fiction] \n1 [Adventure, Fantasy, Action] \n2 [Action, Adventure, Crime] \n3 [Action, Crime, Drama, Thriller] \n4 [Action, Adventure, Science Fiction] \n\n keywords \\\n0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... \n1 [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na... \n2 [{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name... \n3 [{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,... \n4 [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":... \n\n cast \\\n0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n2 [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... \n3 [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... \n4 [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... \n\n crew \n0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... ","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>movie_id</th>\n <th>title</th>\n <th>overview</th>\n <th>genres</th>\n <th>keywords</th>\n <th>cast</th>\n <th>crew</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>19995</td>\n <td>Avatar</td>\n <td>In the 22nd century, a paraplegic Marine is di...</td>\n <td>[Action, Adventure, Fantasy, Science Fiction]</td>\n <td>[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...</td>\n <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n </tr>\n <tr>\n <th>1</th>\n <td>285</td>\n <td>Pirates of the Caribbean: At World's End</td>\n <td>Captain Barbossa, long believed to be dead, ha...</td>\n <td>[Adventure, Fantasy, Action]</td>\n <td>[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...</td>\n <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n </tr>\n <tr>\n <th>2</th>\n <td>206647</td>\n <td>Spectre</td>\n <td>A cryptic message from Bond’s past sends him o...</td>\n <td>[Action, Adventure, Crime]</td>\n <td>[{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...</td>\n <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n </tr>\n <tr>\n <th>3</th>\n <td>49026</td>\n <td>The Dark Knight Rises</td>\n <td>Following the death of District Attorney Harve...</td>\n <td>[Action, Crime, Drama, Thriller]</td>\n <td>[{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...</td>\n <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n </tr>\n <tr>\n <th>4</th>\n <td>49529</td>\n <td>John Carter</td>\n <td>John Carter is a war-weary, former military ca...</td>\n <td>[Action, Adventure, Science Fiction]</td>\n <td>[{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...</td>\n <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"movies['keywords'] = movies['keywords'].apply(convert)\nmovies.head()","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:36:55.304070Z","iopub.execute_input":"2021-06-21T12:36:55.304439Z","iopub.status.idle":"2021-06-21T12:36:55.738525Z","shell.execute_reply.started":"2021-06-21T12:36:55.304408Z","shell.execute_reply":"2021-06-21T12:36:55.737123Z"},"trusted":true},"execution_count":37,"outputs":[{"execution_count":37,"output_type":"execute_result","data":{"text/plain":" movie_id title \\\n0 19995 Avatar \n1 285 Pirates of the Caribbean: At World's End \n2 206647 Spectre \n3 49026 The Dark Knight Rises \n4 49529 John Carter \n\n overview \\\n0 In the 22nd century, a paraplegic Marine is di... \n1 Captain Barbossa, long believed to be dead, ha... \n2 A cryptic message from Bond’s past sends him o... \n3 Following the death of District Attorney Harve... \n4 John Carter is a war-weary, former military ca... \n\n genres \\\n0 [Action, Adventure, Fantasy, Science Fiction] \n1 [Adventure, Fantasy, Action] \n2 [Action, Adventure, Crime] \n3 [Action, Crime, Drama, Thriller] \n4 [Action, Adventure, Science Fiction] \n\n keywords \\\n0 [culture clash, future, space war, space colon... \n1 [ocean, drug abuse, exotic island, east india ... \n2 [spy, based on novel, secret agent, sequel, mi... \n3 [dc comics, crime fighter, terrorist, secret i... \n4 [based on novel, mars, medallion, space travel... \n\n cast \\\n0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n2 [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... \n3 [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... \n4 [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... \n\n crew \n0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... ","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>movie_id</th>\n <th>title</th>\n <th>overview</th>\n <th>genres</th>\n <th>keywords</th>\n <th>cast</th>\n <th>crew</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>19995</td>\n <td>Avatar</td>\n <td>In the 22nd century, a paraplegic Marine is di...</td>\n <td>[Action, Adventure, Fantasy, Science Fiction]</td>\n <td>[culture clash, future, space war, space colon...</td>\n <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n </tr>\n <tr>\n <th>1</th>\n <td>285</td>\n <td>Pirates of the Caribbean: At World's End</td>\n <td>Captain Barbossa, long believed to be dead, ha...</td>\n <td>[Adventure, Fantasy, Action]</td>\n <td>[ocean, drug abuse, exotic island, east india ...</td>\n <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n </tr>\n <tr>\n <th>2</th>\n <td>206647</td>\n <td>Spectre</td>\n <td>A cryptic message from Bond’s past sends him o...</td>\n <td>[Action, Adventure, Crime]</td>\n <td>[spy, based on novel, secret agent, sequel, mi...</td>\n <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n </tr>\n <tr>\n <th>3</th>\n <td>49026</td>\n <td>The Dark Knight Rises</td>\n <td>Following the death of District Attorney Harve...</td>\n <td>[Action, Crime, Drama, Thriller]</td>\n <td>[dc comics, crime fighter, terrorist, secret i...</td>\n <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n </tr>\n <tr>\n <th>4</th>\n <td>49529</td>\n <td>John Carter</td>\n <td>John Carter is a war-weary, former military ca...</td>\n <td>[Action, Adventure, Science Fiction]</td>\n <td>[based on novel, mars, medallion, space travel...</td>\n <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"import ast\nast.literal_eval('[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"name\": \"Fantasy\"}, {\"id\": 878, \"name\": \"Science Fiction\"}]')","metadata":{"execution":{"iopub.status.busy":"2021-06-19T13:07:21.572154Z","iopub.execute_input":"2021-06-19T13:07:21.572473Z","iopub.status.idle":"2021-06-19T13:07:21.578686Z","shell.execute_reply.started":"2021-06-19T13:07:21.572446Z","shell.execute_reply":"2021-06-19T13:07:21.577661Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"def convert3(text):\n L = []\n counter = 0\n for i in ast.literal_eval(text):\n if counter < 3:\n L.append(i['name'])\n counter+=1\n return L ","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:36:59.510859Z","iopub.execute_input":"2021-06-21T12:36:59.511226Z","iopub.status.idle":"2021-06-21T12:36:59.517043Z","shell.execute_reply.started":"2021-06-21T12:36:59.511192Z","shell.execute_reply":"2021-06-21T12:36:59.515878Z"},"trusted":true},"execution_count":38,"outputs":[]},{"cell_type":"code","source":"movies['cast'] = movies['cast'].apply(convert)\nmovies.head()","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:37:00.828661Z","iopub.execute_input":"2021-06-21T12:37:00.829409Z","iopub.status.idle":"2021-06-21T12:37:04.117090Z","shell.execute_reply.started":"2021-06-21T12:37:00.829355Z","shell.execute_reply":"2021-06-21T12:37:04.115822Z"},"trusted":true},"execution_count":39,"outputs":[{"execution_count":39,"output_type":"execute_result","data":{"text/plain":" movie_id title \\\n0 19995 Avatar \n1 285 Pirates of the Caribbean: At World's End \n2 206647 Spectre \n3 49026 The Dark Knight Rises \n4 49529 John Carter \n\n overview \\\n0 In the 22nd century, a paraplegic Marine is di... \n1 Captain Barbossa, long believed to be dead, ha... \n2 A cryptic message from Bond’s past sends him o... \n3 Following the death of District Attorney Harve... \n4 John Carter is a war-weary, former military ca... \n\n genres \\\n0 [Action, Adventure, Fantasy, Science Fiction] \n1 [Adventure, Fantasy, Action] \n2 [Action, Adventure, Crime] \n3 [Action, Crime, Drama, Thriller] \n4 [Action, Adventure, Science Fiction] \n\n keywords \\\n0 [culture clash, future, space war, space colon... \n1 [ocean, drug abuse, exotic island, east india ... \n2 [spy, based on novel, secret agent, sequel, mi... \n3 [dc comics, crime fighter, terrorist, secret i... \n4 [based on novel, mars, medallion, space travel... \n\n cast \\\n0 [Sam Worthington, Zoe Saldana, Sigourney Weave... \n1 [Johnny Depp, Orlando Bloom, Keira Knightley, ... \n2 [Daniel Craig, Christoph Waltz, Léa Seydoux, R... \n3 [Christian Bale, Michael Caine, Gary Oldman, A... \n4 [Taylor Kitsch, Lynn Collins, Samantha Morton,... \n\n crew \n0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... ","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>movie_id</th>\n <th>title</th>\n <th>overview</th>\n <th>genres</th>\n <th>keywords</th>\n <th>cast</th>\n <th>crew</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>19995</td>\n <td>Avatar</td>\n <td>In the 22nd century, a paraplegic Marine is di...</td>\n <td>[Action, Adventure, Fantasy, Science Fiction]</td>\n <td>[culture clash, future, space war, space colon...</td>\n <td>[Sam Worthington, Zoe Saldana, Sigourney Weave...</td>\n <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n </tr>\n <tr>\n <th>1</th>\n <td>285</td>\n <td>Pirates of the Caribbean: At World's End</td>\n <td>Captain Barbossa, long believed to be dead, ha...</td>\n <td>[Adventure, Fantasy, Action]</td>\n <td>[ocean, drug abuse, exotic island, east india ...</td>\n <td>[Johnny Depp, Orlando Bloom, Keira Knightley, ...</td>\n <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n </tr>\n <tr>\n <th>2</th>\n <td>206647</td>\n <td>Spectre</td>\n <td>A cryptic message from Bond’s past sends him o...</td>\n <td>[Action, Adventure, Crime]</td>\n <td>[spy, based on novel, secret agent, sequel, mi...</td>\n <td>[Daniel Craig, Christoph Waltz, Léa Seydoux, R...</td>\n <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n </tr>\n <tr>\n <th>3</th>\n <td>49026</td>\n <td>The Dark Knight Rises</td>\n <td>Following the death of District Attorney Harve...</td>\n <td>[Action, Crime, Drama, Thriller]</td>\n <td>[dc comics, crime fighter, terrorist, secret i...</td>\n <td>[Christian Bale, Michael Caine, Gary Oldman, A...</td>\n <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n </tr>\n <tr>\n <th>4</th>\n <td>49529</td>\n <td>John Carter</td>\n <td>John Carter is a war-weary, former military ca...</td>\n <td>[Action, Adventure, Science Fiction]</td>\n <td>[based on novel, mars, medallion, space travel...</td>\n <td>[Taylor Kitsch, Lynn Collins, Samantha Morton,...</td>\n <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"movies['cast'] = movies['cast'].apply(lambda x:x[0:3])","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:37:07.352464Z","iopub.execute_input":"2021-06-21T12:37:07.352886Z","iopub.status.idle":"2021-06-21T12:37:07.367808Z","shell.execute_reply.started":"2021-06-21T12:37:07.352854Z","shell.execute_reply":"2021-06-21T12:37:07.366250Z"},"trusted":true},"execution_count":40,"outputs":[]},{"cell_type":"code","source":"def fetch_director(text):\n L = []\n for i in ast.literal_eval(text):\n if i['job'] == 'Director':\n L.append(i['name'])\n return L ","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:37:09.762909Z","iopub.execute_input":"2021-06-21T12:37:09.763317Z","iopub.status.idle":"2021-06-21T12:37:09.770917Z","shell.execute_reply.started":"2021-06-21T12:37:09.763278Z","shell.execute_reply":"2021-06-21T12:37:09.770002Z"},"trusted":true},"execution_count":41,"outputs":[]},{"cell_type":"code","source":"movies['crew'] = movies['crew'].apply(fetch_director)","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:37:12.189921Z","iopub.execute_input":"2021-06-21T12:37:12.190468Z","iopub.status.idle":"2021-06-21T12:37:15.825662Z","shell.execute_reply.started":"2021-06-21T12:37:12.190407Z","shell.execute_reply":"2021-06-21T12:37:15.824562Z"},"trusted":true},"execution_count":42,"outputs":[]},{"cell_type":"code","source":"#movies['overview'] = movies['overview'].apply(lambda x:x.split())\nmovies.sample(5)","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:34:54.994397Z","iopub.execute_input":"2021-06-21T12:34:54.994779Z","iopub.status.idle":"2021-06-21T12:34:55.019276Z","shell.execute_reply.started":"2021-06-21T12:34:54.994738Z","shell.execute_reply":"2021-06-21T12:34:55.017718Z"},"trusted":true},"execution_count":22,"outputs":[{"execution_count":22,"output_type":"execute_result","data":{"text/plain":" movie_id title \\\n1183 9889 Shallow Hal \n4215 15976 The Bubble \n1571 22947 Up in the Air \n205 58574 Sherlock Holmes: A Game of Shadows \n352 10674 Mulan \n\n overview \\\n1183 A shallow man falls in love with a 300 pound w... \n4215 The movie follows a group of young friends in ... \n1571 George Clooney plays the dry cynical character... \n205 There is a new criminal mastermind at large (P... \n352 A tomboyish girl disguises herself as a young ... \n\n genres \\\n1183 [Comedy, Romance] \n4215 [Drama, Romance] \n1571 [Drama, Romance] \n205 [Adventure, Action, Crime, Mystery] \n352 [Animation, Family, Adventure] \n\n keywords \\\n1183 [overweight, beauty, hypnosis, overweight man,... \n4215 [gay] \n1571 [suitcase, business, omaha, on the road, downs... \n205 [detective inspector, steampunk, criminal mast... \n352 [homeland, musical, training, daughter, cricke... \n\n cast \\\n1183 [Gwyneth Paltrow, Jack Black, Jason Alexander] \n4215 [Ohad Knoller, Yousef Sweid, Daniella Wircer] \n1571 [George Clooney, Vera Farmiga, Anna Kendrick] \n205 [Robert Downey Jr., Jude Law, Jared Harris] \n352 [Eddie Murphy, Jackie Chan, Ming-Na Wen] \n\n crew \n1183 [Bobby Farrelly, Peter Farrelly] \n4215 [Eytan Fox] \n1571 [Jason Reitman] \n205 [Guy Ritchie] \n352 [Tony Bancroft, Barry Cook] ","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>movie_id</th>\n <th>title</th>\n <th>overview</th>\n <th>genres</th>\n <th>keywords</th>\n <th>cast</th>\n <th>crew</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>1183</th>\n <td>9889</td>\n <td>Shallow Hal</td>\n <td>A shallow man falls in love with a 300 pound w...</td>\n <td>[Comedy, Romance]</td>\n <td>[overweight, beauty, hypnosis, overweight man,...</td>\n <td>[Gwyneth Paltrow, Jack Black, Jason Alexander]</td>\n <td>[Bobby Farrelly, Peter Farrelly]</td>\n </tr>\n <tr>\n <th>4215</th>\n <td>15976</td>\n <td>The Bubble</td>\n <td>The movie follows a group of young friends in ...</td>\n <td>[Drama, Romance]</td>\n <td>[gay]</td>\n <td>[Ohad Knoller, Yousef Sweid, Daniella Wircer]</td>\n <td>[Eytan Fox]</td>\n </tr>\n <tr>\n <th>1571</th>\n <td>22947</td>\n <td>Up in the Air</td>\n <td>George Clooney plays the dry cynical character...</td>\n <td>[Drama, Romance]</td>\n <td>[suitcase, business, omaha, on the road, downs...</td>\n <td>[George Clooney, Vera Farmiga, Anna Kendrick]</td>\n <td>[Jason Reitman]</td>\n </tr>\n <tr>\n <th>205</th>\n <td>58574</td>\n <td>Sherlock Holmes: A Game of Shadows</td>\n <td>There is a new criminal mastermind at large (P...</td>\n <td>[Adventure, Action, Crime, Mystery]</td>\n <td>[detective inspector, steampunk, criminal mast...</td>\n <td>[Robert Downey Jr., Jude Law, Jared Harris]</td>\n <td>[Guy Ritchie]</td>\n </tr>\n <tr>\n <th>352</th>\n <td>10674</td>\n <td>Mulan</td>\n <td>A tomboyish girl disguises herself as a young ...</td>\n <td>[Animation, Family, Adventure]</td>\n <td>[homeland, musical, training, daughter, cricke...</td>\n <td>[Eddie Murphy, Jackie Chan, Ming-Na Wen]</td>\n <td>[Tony Bancroft, Barry Cook]</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"def collapse(L):\n L1 = []\n for i in L:\n L1.append(i.replace(\" \",\"\"))\n return L1","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:37:20.072749Z","iopub.execute_input":"2021-06-21T12:37:20.073320Z","iopub.status.idle":"2021-06-21T12:37:20.079118Z","shell.execute_reply.started":"2021-06-21T12:37:20.073270Z","shell.execute_reply":"2021-06-21T12:37:20.077997Z"},"trusted":true},"execution_count":43,"outputs":[]},{"cell_type":"code","source":"movies['cast'] = movies['cast'].apply(collapse)\nmovies['crew'] = movies['crew'].apply(collapse)\nmovies['genres'] = movies['genres'].apply(collapse)\nmovies['keywords'] = movies['keywords'].apply(collapse)","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:37:21.282768Z","iopub.execute_input":"2021-06-21T12:37:21.283329Z","iopub.status.idle":"2021-06-21T12:37:21.486755Z","shell.execute_reply.started":"2021-06-21T12:37:21.283292Z","shell.execute_reply":"2021-06-21T12:37:21.485878Z"},"trusted":true},"execution_count":44,"outputs":[]},{"cell_type":"code","source":"movies.head()","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:35:23.278025Z","iopub.execute_input":"2021-06-21T12:35:23.278589Z","iopub.status.idle":"2021-06-21T12:35:23.311346Z","shell.execute_reply.started":"2021-06-21T12:35:23.278539Z","shell.execute_reply":"2021-06-21T12:35:23.309971Z"},"trusted":true},"execution_count":26,"outputs":[{"execution_count":26,"output_type":"execute_result","data":{"text/plain":" movie_id title \\\n0 19995 Avatar \n1 285 Pirates of the Caribbean: At World's End \n2 206647 Spectre \n3 49026 The Dark Knight Rises \n4 49529 John Carter \n\n overview \\\n0 In the 22nd century, a paraplegic Marine is di... \n1 Captain Barbossa, long believed to be dead, ha... \n2 A cryptic message from Bond’s past sends him o... \n3 Following the death of District Attorney Harve... \n4 John Carter is a war-weary, former military ca... \n\n genres \\\n0 [Action, Adventure, Fantasy, ScienceFiction] \n1 [Adventure, Fantasy, Action] \n2 [Action, Adventure, Crime] \n3 [Action, Crime, Drama, Thriller] \n4 [Action, Adventure, ScienceFiction] \n\n keywords \\\n0 [cultureclash, future, spacewar, spacecolony, ... \n1 [ocean, drugabuse, exoticisland, eastindiatrad... \n2 [spy, basedonnovel, secretagent, sequel, mi6, ... \n3 [dccomics, crimefighter, terrorist, secretiden... \n4 [basedonnovel, mars, medallion, spacetravel, p... \n\n cast crew \n0 [SamWorthington, ZoeSaldana, SigourneyWeaver] [JamesCameron] \n1 [JohnnyDepp, OrlandoBloom, KeiraKnightley] [GoreVerbinski] \n2 [DanielCraig, ChristophWaltz, LéaSeydoux] [SamMendes] \n3 [ChristianBale, MichaelCaine, GaryOldman] [ChristopherNolan] \n4 [TaylorKitsch, LynnCollins, SamanthaMorton] [AndrewStanton] ","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>movie_id</th>\n <th>title</th>\n <th>overview</th>\n <th>genres</th>\n <th>keywords</th>\n <th>cast</th>\n <th>crew</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>19995</td>\n <td>Avatar</td>\n <td>In the 22nd century, a paraplegic Marine is di...</td>\n <td>[Action, Adventure, Fantasy, ScienceFiction]</td>\n <td>[cultureclash, future, spacewar, spacecolony, ...</td>\n <td>[SamWorthington, ZoeSaldana, SigourneyWeaver]</td>\n <td>[JamesCameron]</td>\n </tr>\n <tr>\n <th>1</th>\n <td>285</td>\n <td>Pirates of the Caribbean: At World's End</td>\n <td>Captain Barbossa, long believed to be dead, ha...</td>\n <td>[Adventure, Fantasy, Action]</td>\n <td>[ocean, drugabuse, exoticisland, eastindiatrad...</td>\n <td>[JohnnyDepp, OrlandoBloom, KeiraKnightley]</td>\n <td>[GoreVerbinski]</td>\n </tr>\n <tr>\n <th>2</th>\n <td>206647</td>\n <td>Spectre</td>\n <td>A cryptic message from Bond’s past sends him o...</td>\n <td>[Action, Adventure, Crime]</td>\n <td>[spy, basedonnovel, secretagent, sequel, mi6, ...</td>\n <td>[DanielCraig, ChristophWaltz, LéaSeydoux]</td>\n <td>[SamMendes]</td>\n </tr>\n <tr>\n <th>3</th>\n <td>49026</td>\n <td>The Dark Knight Rises</td>\n <td>Following the death of District Attorney Harve...</td>\n <td>[Action, Crime, Drama, Thriller]</td>\n <td>[dccomics, crimefighter, terrorist, secretiden...</td>\n <td>[ChristianBale, MichaelCaine, GaryOldman]</td>\n <td>[ChristopherNolan]</td>\n </tr>\n <tr>\n <th>4</th>\n <td>49529</td>\n <td>John Carter</td>\n <td>John Carter is a war-weary, former military ca...</td>\n <td>[Action, Adventure, ScienceFiction]</td>\n <td>[basedonnovel, mars, medallion, spacetravel, p...</td>\n <td>[TaylorKitsch, LynnCollins, SamanthaMorton]</td>\n <td>[AndrewStanton]</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"movies['overview'] = movies['overview'].apply(lambda x:x.split())","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:37:31.936003Z","iopub.execute_input":"2021-06-21T12:37:31.936583Z","iopub.status.idle":"2021-06-21T12:37:31.975155Z","shell.execute_reply.started":"2021-06-21T12:37:31.936546Z","shell.execute_reply":"2021-06-21T12:37:31.973928Z"},"trusted":true},"execution_count":45,"outputs":[]},{"cell_type":"code","source":"movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:37:34.465925Z","iopub.execute_input":"2021-06-21T12:37:34.466349Z","iopub.status.idle":"2021-06-21T12:37:34.572742Z","shell.execute_reply.started":"2021-06-21T12:37:34.466313Z","shell.execute_reply":"2021-06-21T12:37:34.571676Z"},"trusted":true},"execution_count":46,"outputs":[]},{"cell_type":"code","source":"new = movies.drop(columns=['overview','genres','keywords','cast','crew'])\n#new.head()","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:37:40.974644Z","iopub.execute_input":"2021-06-21T12:37:40.975174Z","iopub.status.idle":"2021-06-21T12:37:40.981843Z","shell.execute_reply.started":"2021-06-21T12:37:40.975140Z","shell.execute_reply":"2021-06-21T12:37:40.981059Z"},"trusted":true},"execution_count":47,"outputs":[]},{"cell_type":"code","source":"new['tags'] = new['tags'].apply(lambda x: \" \".join(x))\nnew.head()","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:38:00.130879Z","iopub.execute_input":"2021-06-21T12:38:00.131473Z","iopub.status.idle":"2021-06-21T12:38:00.160438Z","shell.execute_reply.started":"2021-06-21T12:38:00.131439Z","shell.execute_reply":"2021-06-21T12:38:00.159476Z"},"trusted":true},"execution_count":48,"outputs":[{"execution_count":48,"output_type":"execute_result","data":{"text/plain":" movie_id title \\\n0 19995 Avatar \n1 285 Pirates of the Caribbean: At World's End \n2 206647 Spectre \n3 49026 The Dark Knight Rises \n4 49529 John Carter \n\n tags \n0 In the 22nd century, a paraplegic Marine is di... \n1 Captain Barbossa, long believed to be dead, ha... \n2 A cryptic message from Bond’s past sends him o... \n3 Following the death of District Attorney Harve... \n4 John Carter is a war-weary, former military ca... ","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>movie_id</th>\n <th>title</th>\n <th>tags</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>19995</td>\n <td>Avatar</td>\n <td>In the 22nd century, a paraplegic Marine is di...</td>\n </tr>\n <tr>\n <th>1</th>\n <td>285</td>\n <td>Pirates of the Caribbean: At World's End</td>\n <td>Captain Barbossa, long believed to be dead, ha...</td>\n </tr>\n <tr>\n <th>2</th>\n <td>206647</td>\n <td>Spectre</td>\n <td>A cryptic message from Bond’s past sends him o...</td>\n </tr>\n <tr>\n <th>3</th>\n <td>49026</td>\n <td>The Dark Knight Rises</td>\n <td>Following the death of District Attorney Harve...</td>\n </tr>\n <tr>\n <th>4</th>\n <td>49529</td>\n <td>John Carter</td>\n <td>John Carter is a war-weary, former military ca...</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"from sklearn.feature_extraction.text import CountVectorizer\ncv = CountVectorizer(max_features=5000,stop_words='english')\n ","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:38:06.544599Z","iopub.execute_input":"2021-06-21T12:38:06.545110Z","iopub.status.idle":"2021-06-21T12:38:07.488307Z","shell.execute_reply.started":"2021-06-21T12:38:06.545079Z","shell.execute_reply":"2021-06-21T12:38:07.487238Z"},"trusted":true},"execution_count":49,"outputs":[]},{"cell_type":"code","source":"vector = cv.fit_transform(new['tags']).toarray()","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:38:10.163889Z","iopub.execute_input":"2021-06-21T12:38:10.164314Z","iopub.status.idle":"2021-06-21T12:38:10.821200Z","shell.execute_reply.started":"2021-06-21T12:38:10.164279Z","shell.execute_reply":"2021-06-21T12:38:10.820175Z"},"trusted":true},"execution_count":50,"outputs":[]},{"cell_type":"code","source":"vector.shape","metadata":{"execution":{"iopub.status.busy":"2021-06-19T14:00:41.894223Z","iopub.execute_input":"2021-06-19T14:00:41.894747Z","iopub.status.idle":"2021-06-19T14:00:41.900786Z","shell.execute_reply.started":"2021-06-19T14:00:41.894699Z","shell.execute_reply":"2021-06-19T14:00:41.899989Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from sklearn.metrics.pairwise import cosine_similarity","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:38:13.339041Z","iopub.execute_input":"2021-06-21T12:38:13.339451Z","iopub.status.idle":"2021-06-21T12:38:13.390575Z","shell.execute_reply.started":"2021-06-21T12:38:13.339412Z","shell.execute_reply":"2021-06-21T12:38:13.389373Z"},"trusted":true},"execution_count":51,"outputs":[]},{"cell_type":"code","source":"similarity = cosine_similarity(vector)","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:38:15.769495Z","iopub.execute_input":"2021-06-21T12:38:15.770001Z","iopub.status.idle":"2021-06-21T12:38:18.170463Z","shell.execute_reply.started":"2021-06-21T12:38:15.769960Z","shell.execute_reply":"2021-06-21T12:38:18.169319Z"},"trusted":true},"execution_count":52,"outputs":[]},{"cell_type":"code","source":"similarity","metadata":{"execution":{"iopub.status.busy":"2021-06-19T14:06:45.472777Z","iopub.execute_input":"2021-06-19T14:06:45.473087Z","iopub.status.idle":"2021-06-19T14:06:45.479647Z","shell.execute_reply.started":"2021-06-19T14:06:45.473061Z","shell.execute_reply":"2021-06-19T14:06:45.478831Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"new[new['title'] == 'The Lego Movie'].index[0]","metadata":{"execution":{"iopub.status.busy":"2021-06-19T14:08:46.7159Z","iopub.execute_input":"2021-06-19T14:08:46.716222Z","iopub.status.idle":"2021-06-19T14:08:46.7239Z","shell.execute_reply.started":"2021-06-19T14:08:46.716196Z","shell.execute_reply":"2021-06-19T14:08:46.722946Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"def recommend(movie):\n index = new[new['title'] == movie].index[0]\n distances = sorted(list(enumerate(similarity[index])),reverse=True,key = lambda x: x[1])\n for i in distances[1:6]:\n print(new.iloc[i[0]].title)\n \n ","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:38:22.542487Z","iopub.execute_input":"2021-06-21T12:38:22.542900Z","iopub.status.idle":"2021-06-21T12:38:22.549786Z","shell.execute_reply.started":"2021-06-21T12:38:22.542867Z","shell.execute_reply":"2021-06-21T12:38:22.548271Z"},"trusted":true},"execution_count":53,"outputs":[]},{"cell_type":"code","source":"recommend('Gandhi')","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:38:24.914258Z","iopub.execute_input":"2021-06-21T12:38:24.914650Z","iopub.status.idle":"2021-06-21T12:38:24.930018Z","shell.execute_reply.started":"2021-06-21T12:38:24.914616Z","shell.execute_reply":"2021-06-21T12:38:24.928876Z"},"trusted":true},"execution_count":54,"outputs":[{"name":"stdout","text":"Gandhi, My Father\nThe Wind That Shakes the Barley\nA Passage to India\nGuiana 1838\nRamanujan\n","output_type":"stream"}]},{"cell_type":"code","source":"import pickle","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:39:59.606475Z","iopub.execute_input":"2021-06-21T12:39:59.606924Z","iopub.status.idle":"2021-06-21T12:39:59.612177Z","shell.execute_reply.started":"2021-06-21T12:39:59.606890Z","shell.execute_reply":"2021-06-21T12:39:59.610858Z"},"trusted":true},"execution_count":55,"outputs":[]},{"cell_type":"code","source":"pickle.dump(new,open('movie_list.pkl','wb'))\npickle.dump(similarity,open('similarity.pkl','wb'))","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:40:53.373186Z","iopub.execute_input":"2021-06-21T12:40:53.373581Z","iopub.status.idle":"2021-06-21T12:40:53.784869Z","shell.execute_reply.started":"2021-06-21T12:40:53.373547Z","shell.execute_reply":"2021-06-21T12:40:53.783635Z"},"trusted":true},"execution_count":56,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]}
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ pandas==2.2.2
2
+ Requests==2.32.3
3
+ streamlit==1.35.0
4
+ python-dotenv
tmdb_5000_credits (1).csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d0050599ff88d40366c4841204b1489862bca346bfa46c20b05a65d14508435
3
+ size 40044293
tmdb_5000_movies (1).csv ADDED
The diff for this file is too large to render. See raw diff