Ephraimmm's picture
Update app.py
db8811e verified
# -*- coding: utf-8 -*-
"""Movie Recommendation
"""
import pandas as pd
import numpy as np
credits = pd.read_csv("tmdb_5000_credits.csv")
movies = pd.read_csv("tmdb_5000_movies.csv")
"""# Pre-processing"""
movies.head(1)
credits.head(100)["crew"]
new_movies = movies.merge(credits, on="title")
movies.shape
credits.shape
new_movies
new_movies.head(4)
#genre
#id
####
new_movies.head()
#movies.head()
movies = new_movies
movies.head()
movies = movies[["movie_id","overview","title","genres","keywords", "cast", "crew"]]
new_movies["original_language"].value_counts()
new_movies.info()
movies.isnull().sum()
movies.dropna(inplace=True)
movies.duplicated().sum()
movies.iloc[0].genres
'[{"id": 28, "name": "Action"}, {"id": 12, "name": "Adventure"}, {"id": 14, "name": "Fantasy"}, {"id": 878, "name": "Science Fiction"}]'
import ast
def convert (obj):
L=[]
for i in ast.literal_eval(obj):
L.append(i["name"])
return L
movies["genres"] =movies["genres"].apply(convert)
movies.head(1)
movies["keywords"] = movies["keywords"].apply(convert)
movies["cast"][1]
import ast
ast.literal_eval('[{"id": 28, "name": "Action"}, {"id": 12, "name": "Adventure"}, {"id": 14, "name": "Fantasy"}, {"id": 878, "name": "Science Fiction"}]')
def convert3(text):
L = []
counter = 0
for i in ast.literal_eval(text):
if counter < 3:
L.append(i['name'])
counter+=1
return L
movies['cast'] = movies['cast'].apply(convert)
movies.head()
def fetch_director(text):
L = []
for i in ast.literal_eval(text):
if i['job'] == 'Director':
L.append(i['name'])
return L
movies['crew'] = movies['crew'].apply(fetch_director)
movies.sample(5)
movies['overview'] = movies['overview'].apply(lambda x:x.split())
movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']
movies.head()
def collapse(L):
L1 = []
for i in L:
L1.append(i.replace(" ",""))
return L1
movies['cast'] = movies['cast'].apply(collapse)
movies['crew'] = movies['crew'].apply(collapse)
movies['genres'] = movies['genres'].apply(collapse)
movies['keywords'] = movies['keywords'].apply(collapse)
movies.head(1)
new = movies.drop(columns=['overview','genres','keywords','cast','crew'])
new['tags'] = new['tags'].apply(lambda x: " ".join(x))
new.head()
"""# Vectorization"""
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000,stop_words='english')
vector = cv.fit_transform(new['tags']).toarray()
vector.shape
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize
import numpy as np
# Normalize vectors to unit length
vector_normalized = normalize(vector)
similarity = cosine_similarity(vector)
similarity2 = np.dot(vector_normalized, vector_normalized.T)
new[new['title'] == 'The Lego Movie'].index[0]
def recommend(movie):
index = new[new['title'] == movie].index[0]
distances = sorted(list(enumerate(similarity[index])),reverse=True,key = lambda x: x[1])
for i in distances[1:6]:
print(new.iloc[i[0]].title)
def recommend2(movie):
index = new[new['title'] == movie].index[0]
distances = sorted(list(enumerate(similarity2[index])),reverse=True,key = lambda x: x[1])
for i in distances[1:6]:
print(new.iloc[i[0]].title)
recommend('Gandhi')
recommend('Superman')
recommend2('Gandhi')
import pickle
pickle.dump(new,open('movie_list.pkl','wb'))
pickle.dump(similarity2,open('similarity2.pkl','wb'))
"""# Lets Integrate AI
We will use Open AI api to call gpt-4o-mini to be able to give like an explainer as to why these movies were recommended.
We will use Deepseek as well to view the responses and compare the two, and see how they fair.
"""
# For Open ai
import openai
from openai import OpenAI
from dotenv import load_dotenv
import os
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')
if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
print("API key looks good so far")
else:
print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
MODEL = 'gpt-4o-mini'
openai = OpenAI()
#sample_call
response = openai.responses.create(
model="gpt-4.1",
input="Write a one-sentence bedtime story about a unicorn."
)
print(response.output_text)
#Lets make the markdown a bit more beautiful
from IPython.display import Markdown, display
#for deep seek
deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')
dp = OpenAI(api_key=deepseek_api_key, base_url="https://api.deepseek.com")
#sample_call
def reply():
response_deep = dp.chat.completions.create(
model="deepseek-chat",
messages=[
{"role": "system", "content": "You are a helpful assistant"},
{"role": "user", "content": "Hello, explain with markdown why life is good with LG. Two H1"},
],
stream=False
)
print(response_deep.choices[0].message.content)
responded = reply
def get_recommendations(movie_title):
index = new[new['title'] == movie_title].index[0]
distances = sorted(list(enumerate(similarity2[index])), reverse=True, key=lambda x: x[1])
recommended_movies = [new.iloc[i[0]].title for i in distances[1:6]]
return recommended_movies
from IPython.display import Markdown, display
def explain_recommendations(movie_title, recommended_movies, ai_model="openai"):
recommended_movies_str = ", ".join(recommended_movies)
user_prompt = f"""
Explain why the following movies were recommended based on the movie
'{movie_title}': {recommended_movies_str}. Focus on potential thematic, genre, or plot similarities.
"""
system_prompt = f"""
you are helpful assistant that understands movies and you know how to recommend and draw pointers
why movies are recommended.
Be specific. Avoid Halucinations. Focus on potential thematic, genre, or plot similarities. Make it interesting
and engaging and end with you should watch the movie.
Reply in well structured markdown and easy to understand way.
"""
if ai_model == "openai":
try:
response = openai.chat.completions.create(
model="gpt-4o-mini", # Using gpt-4o-mini as per the user's comment
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
]
)
return response.choices[0].message.content
except Exception as e:
return f"Error with OpenAI API: {e}"
elif ai_model == "deepseek":
try:
response_deep = dp.chat.completions.create(
model="deepseek-chat",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
]
)
return response_deep.choices[0].message.content
except Exception as e:
return f"Error with Deepseek API: {e}"
else:
return "Invalid AI model specified. Choose 'openai' or 'deepseek'."
display(Markdown(explain_recommendations('Gandhi', get_recommendations('Gandhi'), ai_model="openai")))
# prompt: mAke display a function from above
def display_explanation(movie_title, ai_model="openai"):
recommended_movies = get_recommendations(movie_title)
explanation = explain_recommendations(movie_title, recommended_movies, ai_model=ai_model)
display(Markdown(explanation))
# Example Usage:
movie_to_explain = 'Gandhi'
recommended = get_recommendations(movie_to_explain)
print(f"Recommended movies for '{movie_to_explain}': {recommended}")
explanation = explain_recommendations(movie_to_explain, recommended, ai_model="openai")
openai_explanation = display(Markdown(explanation))
print("\nOpenAI Explanation:")
print(openai_explanation)
explanation_deepseek = explain_recommendations(movie_to_explain, recommended, ai_model="deepseek")
deepseek_explanation = display(Markdown(explanation_deepseek))
print("\nDeepseek Explanation:")
deepseek_explanation
#!pip install gradio -q
import gradio as gr
def movie_recommendation_app(movie_title, ai_model):
recommended_movies = get_recommendations(movie_title)
recommendations_str = "\n".join(recommended_movies)
explanation3 = explain_recommendations(movie_title, recommended_movies, ai_model=ai_model)
explained = display(Markdown(explanation3))
return recommendations_str, explanation3
movie_titles = new['title'].tolist()
movie_recommendation_app("Superman", "openai")
"""# Gradio App"""
iface = gr.Interface(
fn=movie_recommendation_app,
inputs=[
gr.Dropdown(movie_titles, label="Select a Movie"),
gr.Radio(["openai", "deepseek"], label="Select AI Model for Explanation")
],
outputs=[
gr.Textbox(label="Recommended Movies"),
gr.Markdown(label="Explanation from AI")
],
title="Movie Recommendation System with AI Explanation",
description="""Select a movie amd get recommendations, select an AI model to get an explanation why the movies are recommended.
The cosine similarity encode each title with the overview of each movie, then
giving AI models to explain to users why it is recommended
"""
)
iface.launch()