Spaces:
Runtime error
Runtime error
| # -*- coding: utf-8 -*- | |
| """Movie Recommendation | |
| """ | |
| import pandas as pd | |
| import numpy as np | |
| credits = pd.read_csv("tmdb_5000_credits.csv") | |
| movies = pd.read_csv("tmdb_5000_movies.csv") | |
| """# Pre-processing""" | |
| movies.head(1) | |
| credits.head(100)["crew"] | |
| new_movies = movies.merge(credits, on="title") | |
| movies.shape | |
| credits.shape | |
| new_movies | |
| new_movies.head(4) | |
| #genre | |
| #id | |
| #### | |
| new_movies.head() | |
| #movies.head() | |
| movies = new_movies | |
| movies.head() | |
| movies = movies[["movie_id","overview","title","genres","keywords", "cast", "crew"]] | |
| new_movies["original_language"].value_counts() | |
| new_movies.info() | |
| movies.isnull().sum() | |
| movies.dropna(inplace=True) | |
| movies.duplicated().sum() | |
| movies.iloc[0].genres | |
| '[{"id": 28, "name": "Action"}, {"id": 12, "name": "Adventure"}, {"id": 14, "name": "Fantasy"}, {"id": 878, "name": "Science Fiction"}]' | |
| import ast | |
| def convert (obj): | |
| L=[] | |
| for i in ast.literal_eval(obj): | |
| L.append(i["name"]) | |
| return L | |
| movies["genres"] =movies["genres"].apply(convert) | |
| movies.head(1) | |
| movies["keywords"] = movies["keywords"].apply(convert) | |
| movies["cast"][1] | |
| import ast | |
| ast.literal_eval('[{"id": 28, "name": "Action"}, {"id": 12, "name": "Adventure"}, {"id": 14, "name": "Fantasy"}, {"id": 878, "name": "Science Fiction"}]') | |
| def convert3(text): | |
| L = [] | |
| counter = 0 | |
| for i in ast.literal_eval(text): | |
| if counter < 3: | |
| L.append(i['name']) | |
| counter+=1 | |
| return L | |
| movies['cast'] = movies['cast'].apply(convert) | |
| movies.head() | |
| def fetch_director(text): | |
| L = [] | |
| for i in ast.literal_eval(text): | |
| if i['job'] == 'Director': | |
| L.append(i['name']) | |
| return L | |
| movies['crew'] = movies['crew'].apply(fetch_director) | |
| movies.sample(5) | |
| movies['overview'] = movies['overview'].apply(lambda x:x.split()) | |
| movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew'] | |
| movies.head() | |
| def collapse(L): | |
| L1 = [] | |
| for i in L: | |
| L1.append(i.replace(" ","")) | |
| return L1 | |
| movies['cast'] = movies['cast'].apply(collapse) | |
| movies['crew'] = movies['crew'].apply(collapse) | |
| movies['genres'] = movies['genres'].apply(collapse) | |
| movies['keywords'] = movies['keywords'].apply(collapse) | |
| movies.head(1) | |
| new = movies.drop(columns=['overview','genres','keywords','cast','crew']) | |
| new['tags'] = new['tags'].apply(lambda x: " ".join(x)) | |
| new.head() | |
| """# Vectorization""" | |
| from sklearn.feature_extraction.text import CountVectorizer | |
| cv = CountVectorizer(max_features=5000,stop_words='english') | |
| vector = cv.fit_transform(new['tags']).toarray() | |
| vector.shape | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from sklearn.preprocessing import normalize | |
| import numpy as np | |
| # Normalize vectors to unit length | |
| vector_normalized = normalize(vector) | |
| similarity = cosine_similarity(vector) | |
| similarity2 = np.dot(vector_normalized, vector_normalized.T) | |
| new[new['title'] == 'The Lego Movie'].index[0] | |
| def recommend(movie): | |
| index = new[new['title'] == movie].index[0] | |
| distances = sorted(list(enumerate(similarity[index])),reverse=True,key = lambda x: x[1]) | |
| for i in distances[1:6]: | |
| print(new.iloc[i[0]].title) | |
| def recommend2(movie): | |
| index = new[new['title'] == movie].index[0] | |
| distances = sorted(list(enumerate(similarity2[index])),reverse=True,key = lambda x: x[1]) | |
| for i in distances[1:6]: | |
| print(new.iloc[i[0]].title) | |
| recommend('Gandhi') | |
| recommend('Superman') | |
| recommend2('Gandhi') | |
| import pickle | |
| pickle.dump(new,open('movie_list.pkl','wb')) | |
| pickle.dump(similarity2,open('similarity2.pkl','wb')) | |
| """# Lets Integrate AI | |
| We will use Open AI api to call gpt-4o-mini to be able to give like an explainer as to why these movies were recommended. | |
| We will use Deepseek as well to view the responses and compare the two, and see how they fair. | |
| """ | |
| # For Open ai | |
| import openai | |
| from openai import OpenAI | |
| from dotenv import load_dotenv | |
| import os | |
| load_dotenv(override=True) | |
| api_key = os.getenv('OPENAI_API_KEY') | |
| if api_key and api_key.startswith('sk-proj-') and len(api_key)>10: | |
| print("API key looks good so far") | |
| else: | |
| print("There might be a problem with your API key? Please visit the troubleshooting notebook!") | |
| MODEL = 'gpt-4o-mini' | |
| openai = OpenAI() | |
| #sample_call | |
| response = openai.responses.create( | |
| model="gpt-4.1", | |
| input="Write a one-sentence bedtime story about a unicorn." | |
| ) | |
| print(response.output_text) | |
| #Lets make the markdown a bit more beautiful | |
| from IPython.display import Markdown, display | |
| #for deep seek | |
| deepseek_api_key = os.getenv('DEEPSEEK_API_KEY') | |
| dp = OpenAI(api_key=deepseek_api_key, base_url="https://api.deepseek.com") | |
| #sample_call | |
| def reply(): | |
| response_deep = dp.chat.completions.create( | |
| model="deepseek-chat", | |
| messages=[ | |
| {"role": "system", "content": "You are a helpful assistant"}, | |
| {"role": "user", "content": "Hello, explain with markdown why life is good with LG. Two H1"}, | |
| ], | |
| stream=False | |
| ) | |
| print(response_deep.choices[0].message.content) | |
| responded = reply | |
| def get_recommendations(movie_title): | |
| index = new[new['title'] == movie_title].index[0] | |
| distances = sorted(list(enumerate(similarity2[index])), reverse=True, key=lambda x: x[1]) | |
| recommended_movies = [new.iloc[i[0]].title for i in distances[1:6]] | |
| return recommended_movies | |
| from IPython.display import Markdown, display | |
| def explain_recommendations(movie_title, recommended_movies, ai_model="openai"): | |
| recommended_movies_str = ", ".join(recommended_movies) | |
| user_prompt = f""" | |
| Explain why the following movies were recommended based on the movie | |
| '{movie_title}': {recommended_movies_str}. Focus on potential thematic, genre, or plot similarities. | |
| """ | |
| system_prompt = f""" | |
| you are helpful assistant that understands movies and you know how to recommend and draw pointers | |
| why movies are recommended. | |
| Be specific. Avoid Halucinations. Focus on potential thematic, genre, or plot similarities. Make it interesting | |
| and engaging and end with you should watch the movie. | |
| Reply in well structured markdown and easy to understand way. | |
| """ | |
| if ai_model == "openai": | |
| try: | |
| response = openai.chat.completions.create( | |
| model="gpt-4o-mini", # Using gpt-4o-mini as per the user's comment | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt}, | |
| ] | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| return f"Error with OpenAI API: {e}" | |
| elif ai_model == "deepseek": | |
| try: | |
| response_deep = dp.chat.completions.create( | |
| model="deepseek-chat", | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt}, | |
| ] | |
| ) | |
| return response_deep.choices[0].message.content | |
| except Exception as e: | |
| return f"Error with Deepseek API: {e}" | |
| else: | |
| return "Invalid AI model specified. Choose 'openai' or 'deepseek'." | |
| display(Markdown(explain_recommendations('Gandhi', get_recommendations('Gandhi'), ai_model="openai"))) | |
| # prompt: mAke display a function from above | |
| def display_explanation(movie_title, ai_model="openai"): | |
| recommended_movies = get_recommendations(movie_title) | |
| explanation = explain_recommendations(movie_title, recommended_movies, ai_model=ai_model) | |
| display(Markdown(explanation)) | |
| # Example Usage: | |
| movie_to_explain = 'Gandhi' | |
| recommended = get_recommendations(movie_to_explain) | |
| print(f"Recommended movies for '{movie_to_explain}': {recommended}") | |
| explanation = explain_recommendations(movie_to_explain, recommended, ai_model="openai") | |
| openai_explanation = display(Markdown(explanation)) | |
| print("\nOpenAI Explanation:") | |
| print(openai_explanation) | |
| explanation_deepseek = explain_recommendations(movie_to_explain, recommended, ai_model="deepseek") | |
| deepseek_explanation = display(Markdown(explanation_deepseek)) | |
| print("\nDeepseek Explanation:") | |
| deepseek_explanation | |
| #!pip install gradio -q | |
| import gradio as gr | |
| def movie_recommendation_app(movie_title, ai_model): | |
| recommended_movies = get_recommendations(movie_title) | |
| recommendations_str = "\n".join(recommended_movies) | |
| explanation3 = explain_recommendations(movie_title, recommended_movies, ai_model=ai_model) | |
| explained = display(Markdown(explanation3)) | |
| return recommendations_str, explanation3 | |
| movie_titles = new['title'].tolist() | |
| movie_recommendation_app("Superman", "openai") | |
| """# Gradio App""" | |
| iface = gr.Interface( | |
| fn=movie_recommendation_app, | |
| inputs=[ | |
| gr.Dropdown(movie_titles, label="Select a Movie"), | |
| gr.Radio(["openai", "deepseek"], label="Select AI Model for Explanation") | |
| ], | |
| outputs=[ | |
| gr.Textbox(label="Recommended Movies"), | |
| gr.Markdown(label="Explanation from AI") | |
| ], | |
| title="Movie Recommendation System with AI Explanation", | |
| description="""Select a movie amd get recommendations, select an AI model to get an explanation why the movies are recommended. | |
| The cosine similarity encode each title with the overview of each movie, then | |
| giving AI models to explain to users why it is recommended | |
| """ | |
| ) | |
| iface.launch() |