File size: 4,017 Bytes
aae5cfe
 
 
c3e37b9
 
fd31c97
c3e37b9
fd31c97
 
 
 
c3e37b9
 
 
 
 
 
73969b9
fd31c97
73969b9
fd31c97
 
 
 
 
 
 
 
 
 
 
 
73969b9
fd31c97
 
c3e37b9
 
 
fd31c97
c3e37b9
 
fd31c97
c3e37b9
fd31c97
c3e37b9
fd31c97
c3e37b9
fd31c97
 
c3e37b9
 
 
fd31c97
 
c3e37b9
 
 
 
 
fd31c97
c3e37b9
 
 
fd31c97
c3e37b9
 
 
 
fd31c97
c3e37b9
 
 
 
 
fd31c97
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from course_project import data_import
from course_project import embedding
from course_project import data_persistence
from course_project import gemma_llm
from course_project import constants
import pandas as pd
import movieposters as mp

DATA_FOLDER_LOCATION = "course_project/data/"
OLLAMA_EMBEDDING_BK_LOCATION = DATA_FOLDER_LOCATION + "ollama_embedding_bk_location.pkl"

def get_movie_info(id):
    movies_data = data_persistence.get_transformed_db()
    movie = movies_data.iloc[id, :]
    movie = (movie[["title", "director", "genres", "cast", "overview"]])
    movie_info = f'Title: {movie["title"]}. Director(s): {movie["director"]}. Genres: {movie["genres"]}. Cast: {movie["cast"]}. Overview: {movie["overview"]}'
    return movie_info

def get_data(overwrite_db=False):
    movies_data = data_persistence.get_transformed_db()
    if not isinstance(movies_data, pd.DataFrame) or overwrite_db:
        movies_data = data_import.read_or_generate_movies_data()
        movies_data["vector"] = \
            "Title: " + movies_data["title"] \
            + ". Director(s): " + movies_data["director"].apply(data_import.format_list_of_strings) \
            + ". Genres: " + movies_data["genres"].apply(data_import.format_list_of_strings) \
            + ". Cast: " + movies_data["cast"].apply(data_import.format_list_of_strings) \
            + ". Overview: " + movies_data["overview"]

        movies_data["vector"] = embedding.encode_text_series_ollama_method(movies_data["vector"], file_backup_location=OLLAMA_EMBEDDING_BK_LOCATION, batch_size=500)
        print(movies_data["vector"])

        data_persistence.save_transformed_db(movies_data)
    return movies_data

def replace_indexes_with_titles(suggestions, movies_data):
    suggestions_titles = suggestions.copy()
    for user, movie_id in suggestions_titles.items():
        movie_title = movies_data.at[movie_id, 'title']
        suggestions_titles[user] = movie_title
    return suggestions_titles

def get_recommendations_df(suggestions, n_results, similarity=constants.COSINE_SIMILARITY, rating_weight=0.15):
    movies_data = get_data()
    recommendations_raw = embedding.find_recommendations(suggestions, movies_data["vector"], n_results=n_results, similarity=similarity, rating_weight=rating_weight)
    recommendations_indexes = [recommendation["movie_index"] for recommendation in recommendations_raw]
    columns_of_interest = ["id", "title", "director", "genres", "cast", "overview", "rating", "poster_path", "imdb_id"]
    recommendations = (movies_data.loc[recommendations_indexes])[columns_of_interest].copy()
    suggestions_considered = [reco["suggestions_considered"] for reco in recommendations_raw]
    recommendations["based_on_index"] = pd.Series(suggestions_considered, index=recommendations.index)
    suggestions_considered_titles = [replace_indexes_with_titles(suggestion, movies_data) for suggestion in suggestions_considered]
    recommendations["based_on"] = pd.Series(suggestions_considered_titles, index=recommendations.index)
    return recommendations

def get_connections(row):
    connections = {}
    for person, request in row["based_on"].items():
        connections[person] = request
    return connections

def get_movie_poster(imdb_id):
    link = mp.get_poster(id=imdb_id)
    return link

def get_recommendations(suggestions, n_results=10, similarity=constants.COSINE_SIMILARITY, rating_weight=0.15):
    recommendations = get_recommendations_df(suggestions, n_results, similarity=similarity, rating_weight=rating_weight)
    recommendations["poster_path"] = recommendations["imdb_id"].apply(get_movie_poster)
    return recommendations

def get_explanation_for_reco(suggestion_1_id, suggestion_2_id, recommendation_id):
    suggestion_1_info = get_movie_info(suggestion_1_id)
    suggestion_2_info = get_movie_info(suggestion_2_id)
    recommendation_info = get_movie_info(recommendation_id)
    return gemma_llm.find_reason_for_recommendation(suggestion_1_info, suggestion_2_info, recommendation_info)