| import pandas as pd |
| import numpy as np |
| import faiss |
| from sentence_transformers import SentenceTransformer |
| import pickle |
| import os |
|
|
| class MovieRecommender: |
| def __init__(self, model_name='all-MiniLM-L6-v2'): |
| |
| self.encoder = SentenceTransformer(model_name) |
| self.d = 384 |
| |
| |
| |
| self.index = None |
| self.df = pd.DataFrame() |
|
|
| def save(self, path='models/'): |
| """Saves the index and metadata to disk.""" |
| if not os.path.exists(path): |
| os.makedirs(path) |
| |
| |
| if self.index is not None: |
| faiss.write_index(self.index, os.path.join(path, 'movie_index.faiss')) |
| |
| |
| with open(os.path.join(path, 'metadata.pkl'), 'wb') as f: |
| pickle.dump(self.df, f) |
|
|
| def load(self, path='models/'): |
| """Loads the brain from disk.""" |
| index_path = os.path.join(path, 'movie_index.faiss') |
| meta_path = os.path.join(path, 'metadata.pkl') |
|
|
| if os.path.exists(index_path): |
| self.index = faiss.read_index(index_path) |
| |
| if os.path.exists(meta_path): |
| with open(meta_path, 'rb') as f: |
| self.df = pickle.load(f) |
|
|
| def add_new_movie(self, movie_data): |
| """Adds a single movie to the memory (used during ingest).""" |
| |
| vector = self.encoder.encode([movie_data['soup']]) |
| faiss.normalize_L2(vector) |
| |
| |
| if self.index is None: |
| self.index = faiss.IndexFlatL2(self.d) |
| self.index.add(vector) |
| |
| |
| new_row = pd.DataFrame([movie_data]) |
| if self.df.empty: |
| self.df = new_row |
| else: |
| self.df = pd.concat([self.df, new_row], ignore_index=True) |
|
|
| def get_banned_genres(self, query_text): |
| """ |
| Returns a list of genres to BAN based on the user's vibe. |
| """ |
| query_lower = query_text.lower() |
| banned_genres = set() |
|
|
| |
| if any(w in query_lower for w in ["happy", "uplifting", "comedy", "laugh", "cheerful", "funny"]): |
| banned_genres.update(["Horror", "Thriller", "War", "Crime", "Tragedy"]) |
|
|
| |
| |
| if any(w in query_lower for w in ["kid", "child", "animation", "disney", "pixar"]): |
| banned_genres.update(["Horror", "Crime", "War", "Romance", "Adult"]) |
| |
| |
| |
| elif "family" in query_lower and "crime" not in query_lower: |
| banned_genres.update(["Horror", "Crime", "War", "Romance", "Adult"]) |
|
|
| |
| if "romantic" in query_lower or "romance" in query_lower: |
| banned_genres.update(["Horror"]) |
|
|
| |
| |
| |
| |
| final_bans = [] |
| for ban in banned_genres: |
| |
| if ban.lower() in query_lower: |
| continue |
| final_bans.append(ban) |
|
|
| return final_bans |
| |
| def recommend(self, text_query, k=10): |
| """ |
| Smart Recommendation with Guardrails |
| """ |
| print(f"🔎 Searching for: '{text_query}'") |
| |
| if self.df.empty or self.index is None: |
| return [] |
|
|
| |
| query_vector = self.encoder.encode([text_query]) |
| faiss.normalize_L2(query_vector) |
|
|
| |
| distances, indices = self.index.search(query_vector, k=25) |
| |
| |
| banned_genres = self.get_banned_genres(text_query) |
| if banned_genres: |
| print(f"🛡️ Guardrails Active! Banning: {banned_genres}") |
|
|
| results = [] |
| seen_titles = set() |
| |
| for i, idx in enumerate(indices[0]): |
| if idx == -1 or idx >= len(self.df): continue |
| |
| |
| movie_data = self.df.iloc[idx].to_dict() |
| |
| |
| movie_soup = movie_data.get('soup', '').lower() |
| |
| is_banned = False |
| for ban in banned_genres: |
| if ban.lower() in movie_soup: |
| print(f"🚫 Blocking '{movie_data['title']}' (Contains {ban})") |
| is_banned = True |
| break |
| |
| if is_banned: continue |
| |
|
|
| |
| if movie_data['title'] in seen_titles: continue |
| |
| results.append({ |
| 'id': int(movie_data['id']), |
| 'title': movie_data['title'], |
| 'score': float(distances[0][i]), |
| }) |
| seen_titles.add(movie_data['title']) |
|
|
| if len(results) >= k: |
| break |
| |
| return results |
|
|
| def recommend_on_text(self, text_query, k=10): |
| """Wrapper for the main recommend function.""" |
| return self.recommend(text_query, k) |
|
|
| def recommend_for_user(self, liked_movie_titles, k=10): |
| """Personalized Logic based on liked movies.""" |
| if self.df.empty: return [] |
|
|
| vectors = [] |
| for title in liked_movie_titles: |
| |
| movie_row = self.df[self.df['title'].str.contains(title, case=False, na=False)] |
| if not movie_row.empty: |
| soup = movie_row.iloc[0]['soup'] |
| vectors.append(self.encoder.encode(soup)) |
| |
| if not vectors: |
| return [] |
|
|
| |
| user_vector = np.mean(vectors, axis=0) |
| |
| |
| user_vector = user_vector.reshape(1, -1) |
| faiss.normalize_L2(user_vector) |
| |
| distances, indices = self.index.search(user_vector, k) |
| |
| results = [] |
| for i, idx in enumerate(indices[0]): |
| if idx != -1 and idx < len(self.df): |
| movie_data = self.df.iloc[idx] |
| results.append({ |
| 'id': int(movie_data['id']), |
| 'title': movie_data['title'], |
| 'score': float(distances[0][i]) |
| }) |
| return results |
|
|