Spaces:
Sleeping
Sleeping
File size: 8,580 Bytes
c3c0d39 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 |
from .places import Places
from functools import reduce
from rapidfuzz import fuzz
from .emotions import EmotionWheel
class Optimizer:
"""
An object used to process optimization queries on a database.
This class is general: you should consider using one of its derivate e.g. WeightedSumOptimizer.
Such queries use the "Aggregate and Compare" paradigm: for each piece of art,
a score is given, which is then compared to the scores of the other pieces.
"""
def __init__(self, scoring_fn, sorting_fn):
"""
Creates a new optimization query given the function that create the score
column (called "obj") in the dataframe and the one that sorts the dataframe
according to it.
"""
self.scoring_fn = scoring_fn
self.sorting_fn = sorting_fn
self.places = None
def optimize_max(self, database):
"""
Returns a table containing the maximal elements w.r.t. the objective function.
"""
df = database.get_dataframe()
df['score'] = df.apply(self.scoring_fn, axis=1)
result = self.sorting_fn(df.copy())
df.drop(columns='score', inplace=True)
return result
def name_similarity(target_name, related_names):
"""
Returns a score indicating a similarity between the target name and a list of related name.
The similarity is the maximal similarity between the target and any of the related names.
The similarity between two names is computed using rapidfuzz partial ratio.
"""
if target_name is None:
return 0.
def similarity(related):
return fuzz.partial_ratio(related, target_name, score_cutoff=90)/100
return reduce(max, map(similarity, related_names), 0.)
def date_similarity(target_date, related_dates):
"""
Returns a score indicating a similarity between the target date and a list of related dates.
The similarity is the maximal similarity between the target and any of the related dates.
If days and months correspond, the similarity is maximal.
If days or months correspond, the similarity is half the maximal.
"""
if target_date is None:
return 0.
def similarity(related):
if related.day == target_date.day and related.month == target_date.month:
return 1.
if related.day == target_date.day or related.month == target_date.month:
return 0.5
return 0.
return reduce(max, map(similarity, related_dates), 0.)
def place_similarity(self, target_place, related_places):
"""
Returns a score indicating a similarity between the target place and a related place.
"""
if target_place is not None:
if self.places is None:
self.places = Places()
self.places_alt_names_cache = {}
for related_place in related_places:
related_place_lower = related_place.lower()
if related_place_lower not in self.places_alt_names_cache:
self.places_alt_names_cache[related_place_lower] = list(map(str.lower, self.places.alternate_names(
related_place_lower)))
if target_place.lower() in self.places_alt_names_cache[related_place_lower]:
return 1.
return 0.
# Initialize EmotionWheel for all Optimizer instances
emotion_wheel = EmotionWheel()
def emotion_similarity(target_emotion, related_emotions):
"""
Returns a score indicating emotional similarity using Plutchik's geometric distance.
Only works when target_emotion is provided.
"""
if not related_emotions or not target_emotion:
return 0.0
wheel = Optimizer.emotion_wheel
scores = []
for emotion in related_emotions:
similarity = wheel.calculate_emotion_similarity(target_emotion, emotion)
scores.append(similarity)
# Return average similarity
return sum(scores) / len(scores) if scores else 0.0
class WeightedSumOptimizer(Optimizer):
"""
An object used to process optimization queries on a database which underlying
optimization function is a weighted sum computing the closeness of an art piece
to a target profile.
"""
def __init__(self, profile, weights):
"""
Creates a new weighted sum based optimization function.
The "weights" parameter is a mapping from the database column names to their weights.
The "profile" parameter is the target profile.
"""
self.profile = profile
self.weights = weights
for col in ['related_names', 'related_dates', 'related_places', 'related_emotions']:
if col not in self.weights:
self.weights[col] = 0
def scoring_fn(row):
score_name = self.weights['related_names'] * Optimizer.name_similarity(
self.profile.target_name, row['related_names'])
score_date = self.weights['related_dates'] * Optimizer.date_similarity(
self.profile.target_date, row['related_dates'])
score_place = self.weights['related_places'] * self.place_similarity(
self.profile.target_place, row['related_places'])
score_emotion = self.weights['related_emotions'] * Optimizer.emotion_similarity(
self.profile.target_emotion, row['related_emotions'])
return score_name + score_date + score_place + score_emotion
def sorting_fn(df):
return df.sort_values(by='score', ascending=False)
Optimizer.__init__(self, scoring_fn, sorting_fn)
class WeightedLeximaxOptimizer(Optimizer):
"""
An object used to process optimization queries on a database which underlying
optimization function is a weighted leximax computing the closeness of an art
piece to a target profile.
"""
def __init__(self, profile, weights):
"""
Creates a new weighted leximax based optimization function.
The "weights" parameter is a mapping from the database column names to their weights.
The "profile" parameter is the target profile.
"""
self.profile = profile
self.weights = weights
for col in ['related_names', 'related_dates', 'related_places', 'related_emotions']:
if col not in self.weights:
self.weights[col] = 0
def scoring_fn(row):
score_name = self.weights['related_names'] * Optimizer.name_similarity(
self.profile.target_name, row['related_names'])
score_date = self.weights['related_dates'] * Optimizer.date_similarity(
self.profile.target_date, row['related_dates'])
score_place = self.weights['related_places'] * self.place_similarity(
self.profile.target_place, row['related_places'])
score_emotion = self.weights['related_emotions'] * Optimizer.emotion_similarity(
self.profile.target_emotion, row['related_emotions'])
scores = [score_name, score_date, score_place, score_emotion]
# Store original scores for logging before sorting
row['_original_scores'] = tuple(scores)
scores.sort(reverse=True)
return tuple(scores)
def sorting_fn(df):
return df.sort_values(by='score', ascending=False)
Optimizer.__init__(self, scoring_fn, sorting_fn)
class TargetProfile:
"""
The query, in term of searched values (i.e. the name we look for, the date we look for, ...)
"""
def __init__(self):
"""
Create a new, empty target profile.
"""
self.target_name = None
self.target_date = None
self.target_place = None
self.target_emotion = None
def __str__(self):
return (f"TargetProfile(target_name={self.target_name}, "
f"target_date={self.target_date}, "
f"target_place={self.target_place}, "
f"target_emotion={self.target_emotion})")
def set_target_name(self, target_name):
self.target_name = target_name
def set_target_date(self, target_date):
self.target_date = target_date
def set_target_emotion(self, target_emotion):
self.target_emotion = target_emotion
def set_target_place(self, target_place):
self.target_place = target_place
|