File size: 8,580 Bytes
c3c0d39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
from .places import Places
from functools import reduce
from rapidfuzz import fuzz
from .emotions import EmotionWheel


class Optimizer:
    """
    An object used to process optimization queries on a database.
    This class is general: you should consider using one of its derivate e.g. WeightedSumOptimizer.

    Such queries use the "Aggregate and Compare" paradigm: for each piece of art,
    a score is given, which is then compared to the scores of the other pieces.
    """

    def __init__(self, scoring_fn, sorting_fn):
        """
        Creates a new optimization query given the function that create the score
        column (called "obj") in the dataframe and the one that sorts the dataframe
        according to it.
        """
        self.scoring_fn = scoring_fn
        self.sorting_fn = sorting_fn
        self.places = None

    def optimize_max(self, database):
        """
        Returns a table containing the maximal elements w.r.t. the objective function.
        """
        df = database.get_dataframe()
        df['score'] = df.apply(self.scoring_fn, axis=1)
        result = self.sorting_fn(df.copy())
        df.drop(columns='score', inplace=True)
        return result

    def name_similarity(target_name, related_names):
        """
        Returns a score indicating a similarity between the target name and a list of related name.
        The similarity is the maximal similarity between the target and any of the related names.

        The similarity between two names is computed using rapidfuzz partial ratio.
        """
        if target_name is None:
            return 0.

        def similarity(related):
            return fuzz.partial_ratio(related, target_name, score_cutoff=90)/100
        return reduce(max, map(similarity, related_names), 0.)

    def date_similarity(target_date, related_dates):
        """
        Returns a score indicating a similarity between the target date and a list of related dates.
        The similarity is the maximal similarity between the target and any of the related dates.

        If days and months correspond, the similarity is maximal.
        If days or months correspond, the similarity is half the maximal.
        """
        if target_date is None:
            return 0.

        def similarity(related):
            if related.day == target_date.day and related.month == target_date.month:
                return 1.
            if related.day == target_date.day or related.month == target_date.month:
                return 0.5
            return 0.
        return reduce(max, map(similarity, related_dates), 0.)

    def place_similarity(self, target_place, related_places):
        """
        Returns a score indicating a similarity between the target place and a related place.
        """
        if target_place is not None:
            if self.places is None:
                self.places = Places()
                self.places_alt_names_cache = {}
            for related_place in related_places:
                related_place_lower = related_place.lower()
                if related_place_lower not in self.places_alt_names_cache:
                    self.places_alt_names_cache[related_place_lower] = list(map(str.lower, self.places.alternate_names(
                        related_place_lower)))
                if target_place.lower() in self.places_alt_names_cache[related_place_lower]:
                    return 1.
        return 0.

    # Initialize EmotionWheel for all Optimizer instances
    emotion_wheel = EmotionWheel()

    def emotion_similarity(target_emotion, related_emotions):
        """
        Returns a score indicating emotional similarity using Plutchik's geometric distance.
        Only works when target_emotion is provided.
        """
        if not related_emotions or not target_emotion:
            return 0.0

        wheel = Optimizer.emotion_wheel
        scores = []
        for emotion in related_emotions:
            similarity = wheel.calculate_emotion_similarity(target_emotion, emotion)
            scores.append(similarity)

        # Return average similarity
        return sum(scores) / len(scores) if scores else 0.0


class WeightedSumOptimizer(Optimizer):
    """
    An object used to process optimization queries on a database which underlying
    optimization function is a weighted sum computing the closeness of an art piece
    to a target profile.
    """

    def __init__(self, profile, weights):
        """
        Creates a new weighted sum based optimization function.

        The "weights" parameter is a mapping from the database column names to their weights.
        The "profile" parameter is the target profile.
        """
        self.profile = profile
        self.weights = weights
        for col in ['related_names', 'related_dates', 'related_places', 'related_emotions']:
            if col not in self.weights:
                self.weights[col] = 0

        def scoring_fn(row):
            score_name = self.weights['related_names'] * Optimizer.name_similarity(
                self.profile.target_name, row['related_names'])
            score_date = self.weights['related_dates'] * Optimizer.date_similarity(
                self.profile.target_date, row['related_dates'])
            score_place = self.weights['related_places'] * self.place_similarity(
                self.profile.target_place, row['related_places'])
            score_emotion = self.weights['related_emotions'] * Optimizer.emotion_similarity(
                self.profile.target_emotion, row['related_emotions'])
            return score_name + score_date + score_place + score_emotion

        def sorting_fn(df):
            return df.sort_values(by='score', ascending=False)
        Optimizer.__init__(self, scoring_fn, sorting_fn)


class WeightedLeximaxOptimizer(Optimizer):
    """
    An object used to process optimization queries on a database which underlying
    optimization function is a weighted leximax computing the closeness of an art
    piece to a target profile.
    """

    def __init__(self, profile, weights):
        """
        Creates a new weighted leximax based optimization function.

        The "weights" parameter is a mapping from the database column names to their weights.
        The "profile" parameter is the target profile.
        """
        self.profile = profile
        self.weights = weights
        for col in ['related_names', 'related_dates', 'related_places', 'related_emotions']:
            if col not in self.weights:
                self.weights[col] = 0

        def scoring_fn(row):
            score_name = self.weights['related_names'] * Optimizer.name_similarity(
                self.profile.target_name, row['related_names'])
            score_date = self.weights['related_dates'] * Optimizer.date_similarity(
                self.profile.target_date, row['related_dates'])
            score_place = self.weights['related_places'] * self.place_similarity(
                self.profile.target_place, row['related_places'])
            score_emotion = self.weights['related_emotions'] * Optimizer.emotion_similarity(
                self.profile.target_emotion, row['related_emotions'])
            scores = [score_name, score_date, score_place, score_emotion]
            # Store original scores for logging before sorting
            row['_original_scores'] = tuple(scores)
            scores.sort(reverse=True)
            return tuple(scores)

        def sorting_fn(df):
            return df.sort_values(by='score', ascending=False)
        Optimizer.__init__(self, scoring_fn, sorting_fn)


class TargetProfile:
    """
    The query, in term of searched values (i.e. the name we look for, the date we look for, ...)
    """

    def __init__(self):
        """
        Create a new, empty target profile.
        """
        self.target_name = None
        self.target_date = None
        self.target_place = None
        self.target_emotion = None

    def __str__(self):
        return (f"TargetProfile(target_name={self.target_name}, "
                f"target_date={self.target_date}, "
                f"target_place={self.target_place}, "
                f"target_emotion={self.target_emotion})")

    def set_target_name(self, target_name):
        self.target_name = target_name

    def set_target_date(self, target_date):
        self.target_date = target_date

    def set_target_emotion(self, target_emotion):
        self.target_emotion = target_emotion

    def set_target_place(self, target_place):
        self.target_place = target_place