Spaces:
Sleeping
Sleeping
| from course_project import data_import | |
| from course_project import embedding | |
| from course_project import data_persistence | |
| from course_project import gemma_llm | |
| from course_project import constants | |
| import pandas as pd | |
| import movieposters as mp | |
| DATA_FOLDER_LOCATION = "course_project/data/" | |
| OLLAMA_EMBEDDING_BK_LOCATION = DATA_FOLDER_LOCATION + "ollama_embedding_bk_location.pkl" | |
| def get_movie_info(id): | |
| movies_data = data_persistence.get_transformed_db() | |
| movie = movies_data.iloc[id, :] | |
| movie = (movie[["title", "director", "genres", "cast", "overview"]]) | |
| movie_info = f'Title: {movie["title"]}. Director(s): {movie["director"]}. Genres: {movie["genres"]}. Cast: {movie["cast"]}. Overview: {movie["overview"]}' | |
| return movie_info | |
| def get_data(overwrite_db=False): | |
| movies_data = data_persistence.get_transformed_db() | |
| if not isinstance(movies_data, pd.DataFrame) or overwrite_db: | |
| movies_data = data_import.read_or_generate_movies_data() | |
| movies_data["vector"] = \ | |
| "Title: " + movies_data["title"] \ | |
| + ". Director(s): " + movies_data["director"].apply(data_import.format_list_of_strings) \ | |
| + ". Genres: " + movies_data["genres"].apply(data_import.format_list_of_strings) \ | |
| + ". Cast: " + movies_data["cast"].apply(data_import.format_list_of_strings) \ | |
| + ". Overview: " + movies_data["overview"] | |
| movies_data["vector"] = embedding.encode_text_series_ollama_method(movies_data["vector"], file_backup_location=OLLAMA_EMBEDDING_BK_LOCATION, batch_size=500) | |
| print(movies_data["vector"]) | |
| data_persistence.save_transformed_db(movies_data) | |
| return movies_data | |
| def replace_indexes_with_titles(suggestions, movies_data): | |
| suggestions_titles = suggestions.copy() | |
| for user, movie_id in suggestions_titles.items(): | |
| movie_title = movies_data.at[movie_id, 'title'] | |
| suggestions_titles[user] = movie_title | |
| return suggestions_titles | |
| def get_recommendations_df(suggestions, n_results, similarity=constants.COSINE_SIMILARITY, rating_weight=0.15): | |
| movies_data = get_data() | |
| recommendations_raw = embedding.find_recommendations(suggestions, movies_data["vector"], n_results=n_results, similarity=similarity, rating_weight=rating_weight) | |
| recommendations_indexes = [recommendation["movie_index"] for recommendation in recommendations_raw] | |
| columns_of_interest = ["id", "title", "director", "genres", "cast", "overview", "rating", "poster_path", "imdb_id"] | |
| recommendations = (movies_data.loc[recommendations_indexes])[columns_of_interest].copy() | |
| suggestions_considered = [reco["suggestions_considered"] for reco in recommendations_raw] | |
| recommendations["based_on_index"] = pd.Series(suggestions_considered, index=recommendations.index) | |
| suggestions_considered_titles = [replace_indexes_with_titles(suggestion, movies_data) for suggestion in suggestions_considered] | |
| recommendations["based_on"] = pd.Series(suggestions_considered_titles, index=recommendations.index) | |
| return recommendations | |
| def get_connections(row): | |
| connections = {} | |
| for person, request in row["based_on"].items(): | |
| connections[person] = request | |
| return connections | |
| def get_movie_poster(imdb_id): | |
| link = mp.get_poster(id=imdb_id) | |
| return link | |
| def get_recommendations(suggestions, n_results=10, similarity=constants.COSINE_SIMILARITY, rating_weight=0.15): | |
| recommendations = get_recommendations_df(suggestions, n_results, similarity=similarity, rating_weight=rating_weight) | |
| recommendations["poster_path"] = recommendations["imdb_id"].apply(get_movie_poster) | |
| return recommendations | |
| def get_explanation_for_reco(suggestion_1_id, suggestion_2_id, recommendation_id): | |
| suggestion_1_info = get_movie_info(suggestion_1_id) | |
| suggestion_2_info = get_movie_info(suggestion_2_id) | |
| recommendation_info = get_movie_info(recommendation_id) | |
| return gemma_llm.find_reason_for_recommendation(suggestion_1_info, suggestion_2_info, recommendation_info) | |