Aurel-test's picture
Upload folder using huggingface_hub
c3c0d39 verified
from .places import Places
from functools import reduce
from rapidfuzz import fuzz
from .emotions import EmotionWheel
class Optimizer:
"""
An object used to process optimization queries on a database.
This class is general: you should consider using one of its derivate e.g. WeightedSumOptimizer.
Such queries use the "Aggregate and Compare" paradigm: for each piece of art,
a score is given, which is then compared to the scores of the other pieces.
"""
def __init__(self, scoring_fn, sorting_fn):
"""
Creates a new optimization query given the function that create the score
column (called "obj") in the dataframe and the one that sorts the dataframe
according to it.
"""
self.scoring_fn = scoring_fn
self.sorting_fn = sorting_fn
self.places = None
def optimize_max(self, database):
"""
Returns a table containing the maximal elements w.r.t. the objective function.
"""
df = database.get_dataframe()
df['score'] = df.apply(self.scoring_fn, axis=1)
result = self.sorting_fn(df.copy())
df.drop(columns='score', inplace=True)
return result
def name_similarity(target_name, related_names):
"""
Returns a score indicating a similarity between the target name and a list of related name.
The similarity is the maximal similarity between the target and any of the related names.
The similarity between two names is computed using rapidfuzz partial ratio.
"""
if target_name is None:
return 0.
def similarity(related):
return fuzz.partial_ratio(related, target_name, score_cutoff=90)/100
return reduce(max, map(similarity, related_names), 0.)
def date_similarity(target_date, related_dates):
"""
Returns a score indicating a similarity between the target date and a list of related dates.
The similarity is the maximal similarity between the target and any of the related dates.
If days and months correspond, the similarity is maximal.
If days or months correspond, the similarity is half the maximal.
"""
if target_date is None:
return 0.
def similarity(related):
if related.day == target_date.day and related.month == target_date.month:
return 1.
if related.day == target_date.day or related.month == target_date.month:
return 0.5
return 0.
return reduce(max, map(similarity, related_dates), 0.)
def place_similarity(self, target_place, related_places):
"""
Returns a score indicating a similarity between the target place and a related place.
"""
if target_place is not None:
if self.places is None:
self.places = Places()
self.places_alt_names_cache = {}
for related_place in related_places:
related_place_lower = related_place.lower()
if related_place_lower not in self.places_alt_names_cache:
self.places_alt_names_cache[related_place_lower] = list(map(str.lower, self.places.alternate_names(
related_place_lower)))
if target_place.lower() in self.places_alt_names_cache[related_place_lower]:
return 1.
return 0.
# Initialize EmotionWheel for all Optimizer instances
emotion_wheel = EmotionWheel()
def emotion_similarity(target_emotion, related_emotions):
"""
Returns a score indicating emotional similarity using Plutchik's geometric distance.
Only works when target_emotion is provided.
"""
if not related_emotions or not target_emotion:
return 0.0
wheel = Optimizer.emotion_wheel
scores = []
for emotion in related_emotions:
similarity = wheel.calculate_emotion_similarity(target_emotion, emotion)
scores.append(similarity)
# Return average similarity
return sum(scores) / len(scores) if scores else 0.0
class WeightedSumOptimizer(Optimizer):
"""
An object used to process optimization queries on a database which underlying
optimization function is a weighted sum computing the closeness of an art piece
to a target profile.
"""
def __init__(self, profile, weights):
"""
Creates a new weighted sum based optimization function.
The "weights" parameter is a mapping from the database column names to their weights.
The "profile" parameter is the target profile.
"""
self.profile = profile
self.weights = weights
for col in ['related_names', 'related_dates', 'related_places', 'related_emotions']:
if col not in self.weights:
self.weights[col] = 0
def scoring_fn(row):
score_name = self.weights['related_names'] * Optimizer.name_similarity(
self.profile.target_name, row['related_names'])
score_date = self.weights['related_dates'] * Optimizer.date_similarity(
self.profile.target_date, row['related_dates'])
score_place = self.weights['related_places'] * self.place_similarity(
self.profile.target_place, row['related_places'])
score_emotion = self.weights['related_emotions'] * Optimizer.emotion_similarity(
self.profile.target_emotion, row['related_emotions'])
return score_name + score_date + score_place + score_emotion
def sorting_fn(df):
return df.sort_values(by='score', ascending=False)
Optimizer.__init__(self, scoring_fn, sorting_fn)
class WeightedLeximaxOptimizer(Optimizer):
"""
An object used to process optimization queries on a database which underlying
optimization function is a weighted leximax computing the closeness of an art
piece to a target profile.
"""
def __init__(self, profile, weights):
"""
Creates a new weighted leximax based optimization function.
The "weights" parameter is a mapping from the database column names to their weights.
The "profile" parameter is the target profile.
"""
self.profile = profile
self.weights = weights
for col in ['related_names', 'related_dates', 'related_places', 'related_emotions']:
if col not in self.weights:
self.weights[col] = 0
def scoring_fn(row):
score_name = self.weights['related_names'] * Optimizer.name_similarity(
self.profile.target_name, row['related_names'])
score_date = self.weights['related_dates'] * Optimizer.date_similarity(
self.profile.target_date, row['related_dates'])
score_place = self.weights['related_places'] * self.place_similarity(
self.profile.target_place, row['related_places'])
score_emotion = self.weights['related_emotions'] * Optimizer.emotion_similarity(
self.profile.target_emotion, row['related_emotions'])
scores = [score_name, score_date, score_place, score_emotion]
# Store original scores for logging before sorting
row['_original_scores'] = tuple(scores)
scores.sort(reverse=True)
return tuple(scores)
def sorting_fn(df):
return df.sort_values(by='score', ascending=False)
Optimizer.__init__(self, scoring_fn, sorting_fn)
class TargetProfile:
"""
The query, in term of searched values (i.e. the name we look for, the date we look for, ...)
"""
def __init__(self):
"""
Create a new, empty target profile.
"""
self.target_name = None
self.target_date = None
self.target_place = None
self.target_emotion = None
def __str__(self):
return (f"TargetProfile(target_name={self.target_name}, "
f"target_date={self.target_date}, "
f"target_place={self.target_place}, "
f"target_emotion={self.target_emotion})")
def set_target_name(self, target_name):
self.target_name = target_name
def set_target_date(self, target_date):
self.target_date = target_date
def set_target_emotion(self, target_emotion):
self.target_emotion = target_emotion
def set_target_place(self, target_place):
self.target_place = target_place