Spaces:

Ephraimmm
/

Recommendation-app

Runtime error

App Files Files Community

Recommendation-app / app.py

Ephraimmm

Update app.py

db8811e verified 10 months ago

raw

history blame contribute delete

9.45 kB

	# -- coding: utf-8 --
	"""Movie Recommendation
	"""

	import pandas as pd
	import numpy as np

	credits = pd.read_csv("tmdb_5000_credits.csv")
	movies = pd.read_csv("tmdb_5000_movies.csv")



	"""# Pre-processing"""

	movies.head(1)

	credits.head(100)["crew"]

	new_movies = movies.merge(credits, on="title")

	movies.shape

	credits.shape

	new_movies

	new_movies.head(4)

	#genre
	#id
	####

	new_movies.head()

	#movies.head()

	movies = new_movies

	movies.head()

	movies = movies[["movie_id","overview","title","genres","keywords", "cast", "crew"]]

	new_movies["original_language"].value_counts()



	new_movies.info()



	movies.isnull().sum()

	movies.dropna(inplace=True)

	movies.duplicated().sum()

	movies.iloc[0].genres

	'[{"id": 28, "name": "Action"}, {"id": 12, "name": "Adventure"}, {"id": 14, "name": "Fantasy"}, {"id": 878, "name": "Science Fiction"}]'

	import ast

	def convert (obj):
	L=[]
	for i in ast.literal_eval(obj):
	L.append(i["name"])
	return L

	movies["genres"] =movies["genres"].apply(convert)

	movies.head(1)

	movies["keywords"] = movies["keywords"].apply(convert)

	movies["cast"][1]

	import ast
	ast.literal_eval('[{"id": 28, "name": "Action"}, {"id": 12, "name": "Adventure"}, {"id": 14, "name": "Fantasy"}, {"id": 878, "name": "Science Fiction"}]')

	def convert3(text):
	L = []
	counter = 0
	for i in ast.literal_eval(text):
	if counter < 3:
	L.append(i['name'])
	counter+=1
	return L



	movies['cast'] = movies['cast'].apply(convert)
	movies.head()

	def fetch_director(text):
	L = []
	for i in ast.literal_eval(text):
	if i['job'] == 'Director':
	L.append(i['name'])
	return L

	movies['crew'] = movies['crew'].apply(fetch_director)

	movies.sample(5)

	movies['overview'] = movies['overview'].apply(lambda x:x.split())

	movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']

	movies.head()

	def collapse(L):
	L1 = []
	for i in L:
	L1.append(i.replace(" ",""))
	return L1

	movies['cast'] = movies['cast'].apply(collapse)
	movies['crew'] = movies['crew'].apply(collapse)
	movies['genres'] = movies['genres'].apply(collapse)
	movies['keywords'] = movies['keywords'].apply(collapse)

	movies.head(1)

	new = movies.drop(columns=['overview','genres','keywords','cast','crew'])



	new['tags'] = new['tags'].apply(lambda x: " ".join(x))
	new.head()





	"""# Vectorization"""

	from sklearn.feature_extraction.text import CountVectorizer
	cv = CountVectorizer(max_features=5000,stop_words='english')

	vector = cv.fit_transform(new['tags']).toarray()

	vector.shape

	from sklearn.metrics.pairwise import cosine_similarity

	from sklearn.preprocessing import normalize
	import numpy as np

	# Normalize vectors to unit length
	vector_normalized = normalize(vector)

	similarity = cosine_similarity(vector)

	similarity2 = np.dot(vector_normalized, vector_normalized.T)

	new[new['title'] == 'The Lego Movie'].index[0]

	def recommend(movie):
	index = new[new['title'] == movie].index[0]
	distances = sorted(list(enumerate(similarity[index])),reverse=True,key = lambda x: x[1])
	for i in distances[1:6]:
	print(new.iloc[i[0]].title)

	def recommend2(movie):
	index = new[new['title'] == movie].index[0]
	distances = sorted(list(enumerate(similarity2[index])),reverse=True,key = lambda x: x[1])
	for i in distances[1:6]:
	print(new.iloc[i[0]].title)

	recommend('Gandhi')

	recommend('Superman')



	recommend2('Gandhi')

	import pickle

	pickle.dump(new,open('movie_list.pkl','wb'))
	pickle.dump(similarity2,open('similarity2.pkl','wb'))



	"""# Lets Integrate AI

	We will use Open AI api to call gpt-4o-mini to be able to give like an explainer as to why these movies were recommended.

	We will use Deepseek as well to view the responses and compare the two, and see how they fair.

	"""

	# For Open ai
	import openai
	from openai import OpenAI
	from dotenv import load_dotenv
	import os


	load_dotenv(override=True)
	api_key = os.getenv('OPENAI_API_KEY')

	if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
	print("API key looks good so far")
	else:
	print("There might be a problem with your API key? Please visit the troubleshooting notebook!")



	MODEL = 'gpt-4o-mini'
	openai = OpenAI()


	#sample_call
	response = openai.responses.create(
	model="gpt-4.1",
	input="Write a one-sentence bedtime story about a unicorn."
	)

	print(response.output_text)

	#Lets make the markdown a bit more beautiful

	from IPython.display import Markdown, display

	#for deep seek

	deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')

	dp = OpenAI(api_key=deepseek_api_key, base_url="https://api.deepseek.com")
	#sample_call


	def reply():
	response_deep = dp.chat.completions.create(
	model="deepseek-chat",
	messages=[
	{"role": "system", "content": "You are a helpful assistant"},
	{"role": "user", "content": "Hello, explain with markdown why life is good with LG. Two H1"},
	],
	stream=False
	)

	print(response_deep.choices[0].message.content)

	responded = reply





	def get_recommendations(movie_title):

	index = new[new['title'] == movie_title].index[0]
	distances = sorted(list(enumerate(similarity2[index])), reverse=True, key=lambda x: x[1])
	recommended_movies = [new.iloc[i[0]].title for i in distances[1:6]]
	return recommended_movies

	from IPython.display import Markdown, display

	def explain_recommendations(movie_title, recommended_movies, ai_model="openai"):

	recommended_movies_str = ", ".join(recommended_movies)
	user_prompt = f"""
	Explain why the following movies were recommended based on the movie
	'{movie_title}': {recommended_movies_str}. Focus on potential thematic, genre, or plot similarities.


	"""

	system_prompt = f"""
	you are helpful assistant that understands movies and you know how to recommend and draw pointers
	why movies are recommended.
	Be specific. Avoid Halucinations. Focus on potential thematic, genre, or plot similarities. Make it interesting
	and engaging and end with you should watch the movie.

	Reply in well structured markdown and easy to understand way.

	"""

	if ai_model == "openai":
	try:
	response = openai.chat.completions.create(
	model="gpt-4o-mini", # Using gpt-4o-mini as per the user's comment
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt},
	]
	)
	return response.choices[0].message.content
	except Exception as e:
	return f"Error with OpenAI API: {e}"
	elif ai_model == "deepseek":
	try:
	response_deep = dp.chat.completions.create(
	model="deepseek-chat",
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt},
	]
	)
	return response_deep.choices[0].message.content
	except Exception as e:
	return f"Error with Deepseek API: {e}"
	else:
	return "Invalid AI model specified. Choose 'openai' or 'deepseek'."

	display(Markdown(explain_recommendations('Gandhi', get_recommendations('Gandhi'), ai_model="openai")))

	# prompt: mAke display a function from above

	def display_explanation(movie_title, ai_model="openai"):
	recommended_movies = get_recommendations(movie_title)
	explanation = explain_recommendations(movie_title, recommended_movies, ai_model=ai_model)
	display(Markdown(explanation))

	# Example Usage:

	movie_to_explain = 'Gandhi'
	recommended = get_recommendations(movie_to_explain)

	print(f"Recommended movies for '{movie_to_explain}': {recommended}")

	explanation = explain_recommendations(movie_to_explain, recommended, ai_model="openai")
	openai_explanation = display(Markdown(explanation))
	print("\nOpenAI Explanation:")
	print(openai_explanation)

	explanation_deepseek = explain_recommendations(movie_to_explain, recommended, ai_model="deepseek")
	deepseek_explanation = display(Markdown(explanation_deepseek))
	print("\nDeepseek Explanation:")
	deepseek_explanation

	#!pip install gradio -q

	import gradio as gr

	def movie_recommendation_app(movie_title, ai_model):
	recommended_movies = get_recommendations(movie_title)
	recommendations_str = "\n".join(recommended_movies)
	explanation3 = explain_recommendations(movie_title, recommended_movies, ai_model=ai_model)
	explained = display(Markdown(explanation3))
	return recommendations_str, explanation3

	movie_titles = new['title'].tolist()

	movie_recommendation_app("Superman", "openai")



	"""# Gradio App"""

	iface = gr.Interface(
	fn=movie_recommendation_app,
	inputs=[
	gr.Dropdown(movie_titles, label="Select a Movie"),
	gr.Radio(["openai", "deepseek"], label="Select AI Model for Explanation")
	],
	outputs=[
	gr.Textbox(label="Recommended Movies"),
	gr.Markdown(label="Explanation from AI")
	],
	title="Movie Recommendation System with AI Explanation",
	description="""Select a movie amd get recommendations, select an AI model to get an explanation why the movies are recommended.
	The cosine similarity encode each title with the overview of each movie, then
	giving AI models to explain to users why it is recommended

	"""
	)

	iface.launch()