Spaces:
Sleeping
Sleeping
File size: 4,017 Bytes
aae5cfe c3e37b9 fd31c97 c3e37b9 fd31c97 c3e37b9 73969b9 fd31c97 73969b9 fd31c97 73969b9 fd31c97 c3e37b9 fd31c97 c3e37b9 fd31c97 c3e37b9 fd31c97 c3e37b9 fd31c97 c3e37b9 fd31c97 c3e37b9 fd31c97 c3e37b9 fd31c97 c3e37b9 fd31c97 c3e37b9 fd31c97 c3e37b9 fd31c97 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 | from course_project import data_import
from course_project import embedding
from course_project import data_persistence
from course_project import gemma_llm
from course_project import constants
import pandas as pd
import movieposters as mp
DATA_FOLDER_LOCATION = "course_project/data/"
OLLAMA_EMBEDDING_BK_LOCATION = DATA_FOLDER_LOCATION + "ollama_embedding_bk_location.pkl"
def get_movie_info(id):
movies_data = data_persistence.get_transformed_db()
movie = movies_data.iloc[id, :]
movie = (movie[["title", "director", "genres", "cast", "overview"]])
movie_info = f'Title: {movie["title"]}. Director(s): {movie["director"]}. Genres: {movie["genres"]}. Cast: {movie["cast"]}. Overview: {movie["overview"]}'
return movie_info
def get_data(overwrite_db=False):
movies_data = data_persistence.get_transformed_db()
if not isinstance(movies_data, pd.DataFrame) or overwrite_db:
movies_data = data_import.read_or_generate_movies_data()
movies_data["vector"] = \
"Title: " + movies_data["title"] \
+ ". Director(s): " + movies_data["director"].apply(data_import.format_list_of_strings) \
+ ". Genres: " + movies_data["genres"].apply(data_import.format_list_of_strings) \
+ ". Cast: " + movies_data["cast"].apply(data_import.format_list_of_strings) \
+ ". Overview: " + movies_data["overview"]
movies_data["vector"] = embedding.encode_text_series_ollama_method(movies_data["vector"], file_backup_location=OLLAMA_EMBEDDING_BK_LOCATION, batch_size=500)
print(movies_data["vector"])
data_persistence.save_transformed_db(movies_data)
return movies_data
def replace_indexes_with_titles(suggestions, movies_data):
suggestions_titles = suggestions.copy()
for user, movie_id in suggestions_titles.items():
movie_title = movies_data.at[movie_id, 'title']
suggestions_titles[user] = movie_title
return suggestions_titles
def get_recommendations_df(suggestions, n_results, similarity=constants.COSINE_SIMILARITY, rating_weight=0.15):
movies_data = get_data()
recommendations_raw = embedding.find_recommendations(suggestions, movies_data["vector"], n_results=n_results, similarity=similarity, rating_weight=rating_weight)
recommendations_indexes = [recommendation["movie_index"] for recommendation in recommendations_raw]
columns_of_interest = ["id", "title", "director", "genres", "cast", "overview", "rating", "poster_path", "imdb_id"]
recommendations = (movies_data.loc[recommendations_indexes])[columns_of_interest].copy()
suggestions_considered = [reco["suggestions_considered"] for reco in recommendations_raw]
recommendations["based_on_index"] = pd.Series(suggestions_considered, index=recommendations.index)
suggestions_considered_titles = [replace_indexes_with_titles(suggestion, movies_data) for suggestion in suggestions_considered]
recommendations["based_on"] = pd.Series(suggestions_considered_titles, index=recommendations.index)
return recommendations
def get_connections(row):
connections = {}
for person, request in row["based_on"].items():
connections[person] = request
return connections
def get_movie_poster(imdb_id):
link = mp.get_poster(id=imdb_id)
return link
def get_recommendations(suggestions, n_results=10, similarity=constants.COSINE_SIMILARITY, rating_weight=0.15):
recommendations = get_recommendations_df(suggestions, n_results, similarity=similarity, rating_weight=rating_weight)
recommendations["poster_path"] = recommendations["imdb_id"].apply(get_movie_poster)
return recommendations
def get_explanation_for_reco(suggestion_1_id, suggestion_2_id, recommendation_id):
suggestion_1_info = get_movie_info(suggestion_1_id)
suggestion_2_info = get_movie_info(suggestion_2_id)
recommendation_info = get_movie_info(recommendation_id)
return gemma_llm.find_reason_for_recommendation(suggestion_1_info, suggestion_2_info, recommendation_info)
|