MultiplayerAiReco / course_project /application_tools.py
Javier Real
Project Improvement
c3e37b9
from course_project import data_import
from course_project import embedding
from course_project import data_persistence
from course_project import gemma_llm
from course_project import constants
import pandas as pd
import movieposters as mp
DATA_FOLDER_LOCATION = "course_project/data/"
OLLAMA_EMBEDDING_BK_LOCATION = DATA_FOLDER_LOCATION + "ollama_embedding_bk_location.pkl"
def get_movie_info(id):
movies_data = data_persistence.get_transformed_db()
movie = movies_data.iloc[id, :]
movie = (movie[["title", "director", "genres", "cast", "overview"]])
movie_info = f'Title: {movie["title"]}. Director(s): {movie["director"]}. Genres: {movie["genres"]}. Cast: {movie["cast"]}. Overview: {movie["overview"]}'
return movie_info
def get_data(overwrite_db=False):
movies_data = data_persistence.get_transformed_db()
if not isinstance(movies_data, pd.DataFrame) or overwrite_db:
movies_data = data_import.read_or_generate_movies_data()
movies_data["vector"] = \
"Title: " + movies_data["title"] \
+ ". Director(s): " + movies_data["director"].apply(data_import.format_list_of_strings) \
+ ". Genres: " + movies_data["genres"].apply(data_import.format_list_of_strings) \
+ ". Cast: " + movies_data["cast"].apply(data_import.format_list_of_strings) \
+ ". Overview: " + movies_data["overview"]
movies_data["vector"] = embedding.encode_text_series_ollama_method(movies_data["vector"], file_backup_location=OLLAMA_EMBEDDING_BK_LOCATION, batch_size=500)
print(movies_data["vector"])
data_persistence.save_transformed_db(movies_data)
return movies_data
def replace_indexes_with_titles(suggestions, movies_data):
suggestions_titles = suggestions.copy()
for user, movie_id in suggestions_titles.items():
movie_title = movies_data.at[movie_id, 'title']
suggestions_titles[user] = movie_title
return suggestions_titles
def get_recommendations_df(suggestions, n_results, similarity=constants.COSINE_SIMILARITY, rating_weight=0.15):
movies_data = get_data()
recommendations_raw = embedding.find_recommendations(suggestions, movies_data["vector"], n_results=n_results, similarity=similarity, rating_weight=rating_weight)
recommendations_indexes = [recommendation["movie_index"] for recommendation in recommendations_raw]
columns_of_interest = ["id", "title", "director", "genres", "cast", "overview", "rating", "poster_path", "imdb_id"]
recommendations = (movies_data.loc[recommendations_indexes])[columns_of_interest].copy()
suggestions_considered = [reco["suggestions_considered"] for reco in recommendations_raw]
recommendations["based_on_index"] = pd.Series(suggestions_considered, index=recommendations.index)
suggestions_considered_titles = [replace_indexes_with_titles(suggestion, movies_data) for suggestion in suggestions_considered]
recommendations["based_on"] = pd.Series(suggestions_considered_titles, index=recommendations.index)
return recommendations
def get_connections(row):
connections = {}
for person, request in row["based_on"].items():
connections[person] = request
return connections
def get_movie_poster(imdb_id):
link = mp.get_poster(id=imdb_id)
return link
def get_recommendations(suggestions, n_results=10, similarity=constants.COSINE_SIMILARITY, rating_weight=0.15):
recommendations = get_recommendations_df(suggestions, n_results, similarity=similarity, rating_weight=rating_weight)
recommendations["poster_path"] = recommendations["imdb_id"].apply(get_movie_poster)
return recommendations
def get_explanation_for_reco(suggestion_1_id, suggestion_2_id, recommendation_id):
suggestion_1_info = get_movie_info(suggestion_1_id)
suggestion_2_info = get_movie_info(suggestion_2_id)
recommendation_info = get_movie_info(recommendation_id)
return gemma_llm.find_reason_for_recommendation(suggestion_1_info, suggestion_2_info, recommendation_info)