from course_project import data_import from course_project import embedding from course_project import data_persistence from course_project import gemma_llm from course_project import constants import pandas as pd import movieposters as mp DATA_FOLDER_LOCATION = "course_project/data/" OLLAMA_EMBEDDING_BK_LOCATION = DATA_FOLDER_LOCATION + "ollama_embedding_bk_location.pkl" def get_movie_info(id): movies_data = data_persistence.get_transformed_db() movie = movies_data.iloc[id, :] movie = (movie[["title", "director", "genres", "cast", "overview"]]) movie_info = f'Title: {movie["title"]}. Director(s): {movie["director"]}. Genres: {movie["genres"]}. Cast: {movie["cast"]}. Overview: {movie["overview"]}' return movie_info def get_data(overwrite_db=False): movies_data = data_persistence.get_transformed_db() if not isinstance(movies_data, pd.DataFrame) or overwrite_db: movies_data = data_import.read_or_generate_movies_data() movies_data["vector"] = \ "Title: " + movies_data["title"] \ + ". Director(s): " + movies_data["director"].apply(data_import.format_list_of_strings) \ + ". Genres: " + movies_data["genres"].apply(data_import.format_list_of_strings) \ + ". Cast: " + movies_data["cast"].apply(data_import.format_list_of_strings) \ + ". Overview: " + movies_data["overview"] movies_data["vector"] = embedding.encode_text_series_ollama_method(movies_data["vector"], file_backup_location=OLLAMA_EMBEDDING_BK_LOCATION, batch_size=500) print(movies_data["vector"]) data_persistence.save_transformed_db(movies_data) return movies_data def replace_indexes_with_titles(suggestions, movies_data): suggestions_titles = suggestions.copy() for user, movie_id in suggestions_titles.items(): movie_title = movies_data.at[movie_id, 'title'] suggestions_titles[user] = movie_title return suggestions_titles def get_recommendations_df(suggestions, n_results, similarity=constants.COSINE_SIMILARITY, rating_weight=0.15): movies_data = get_data() recommendations_raw = embedding.find_recommendations(suggestions, movies_data["vector"], n_results=n_results, similarity=similarity, rating_weight=rating_weight) recommendations_indexes = [recommendation["movie_index"] for recommendation in recommendations_raw] columns_of_interest = ["id", "title", "director", "genres", "cast", "overview", "rating", "poster_path", "imdb_id"] recommendations = (movies_data.loc[recommendations_indexes])[columns_of_interest].copy() suggestions_considered = [reco["suggestions_considered"] for reco in recommendations_raw] recommendations["based_on_index"] = pd.Series(suggestions_considered, index=recommendations.index) suggestions_considered_titles = [replace_indexes_with_titles(suggestion, movies_data) for suggestion in suggestions_considered] recommendations["based_on"] = pd.Series(suggestions_considered_titles, index=recommendations.index) return recommendations def get_connections(row): connections = {} for person, request in row["based_on"].items(): connections[person] = request return connections def get_movie_poster(imdb_id): link = mp.get_poster(id=imdb_id) return link def get_recommendations(suggestions, n_results=10, similarity=constants.COSINE_SIMILARITY, rating_weight=0.15): recommendations = get_recommendations_df(suggestions, n_results, similarity=similarity, rating_weight=rating_weight) recommendations["poster_path"] = recommendations["imdb_id"].apply(get_movie_poster) return recommendations def get_explanation_for_reco(suggestion_1_id, suggestion_2_id, recommendation_id): suggestion_1_info = get_movie_info(suggestion_1_id) suggestion_2_info = get_movie_info(suggestion_2_id) recommendation_info = get_movie_info(recommendation_id) return gemma_llm.find_reason_for_recommendation(suggestion_1_info, suggestion_2_info, recommendation_info)