from .places import Places from functools import reduce from rapidfuzz import fuzz from .emotions import EmotionWheel class Optimizer: """ An object used to process optimization queries on a database. This class is general: you should consider using one of its derivate e.g. WeightedSumOptimizer. Such queries use the "Aggregate and Compare" paradigm: for each piece of art, a score is given, which is then compared to the scores of the other pieces. """ def __init__(self, scoring_fn, sorting_fn): """ Creates a new optimization query given the function that create the score column (called "obj") in the dataframe and the one that sorts the dataframe according to it. """ self.scoring_fn = scoring_fn self.sorting_fn = sorting_fn self.places = None def optimize_max(self, database): """ Returns a table containing the maximal elements w.r.t. the objective function. """ df = database.get_dataframe() df['score'] = df.apply(self.scoring_fn, axis=1) result = self.sorting_fn(df.copy()) df.drop(columns='score', inplace=True) return result def name_similarity(target_name, related_names): """ Returns a score indicating a similarity between the target name and a list of related name. The similarity is the maximal similarity between the target and any of the related names. The similarity between two names is computed using rapidfuzz partial ratio. """ if target_name is None: return 0. def similarity(related): return fuzz.partial_ratio(related, target_name, score_cutoff=90)/100 return reduce(max, map(similarity, related_names), 0.) def date_similarity(target_date, related_dates): """ Returns a score indicating a similarity between the target date and a list of related dates. The similarity is the maximal similarity between the target and any of the related dates. If days and months correspond, the similarity is maximal. If days or months correspond, the similarity is half the maximal. """ if target_date is None: return 0. def similarity(related): if related.day == target_date.day and related.month == target_date.month: return 1. if related.day == target_date.day or related.month == target_date.month: return 0.5 return 0. return reduce(max, map(similarity, related_dates), 0.) def place_similarity(self, target_place, related_places): """ Returns a score indicating a similarity between the target place and a related place. """ if target_place is not None: if self.places is None: self.places = Places() self.places_alt_names_cache = {} for related_place in related_places: related_place_lower = related_place.lower() if related_place_lower not in self.places_alt_names_cache: self.places_alt_names_cache[related_place_lower] = list(map(str.lower, self.places.alternate_names( related_place_lower))) if target_place.lower() in self.places_alt_names_cache[related_place_lower]: return 1. return 0. # Initialize EmotionWheel for all Optimizer instances emotion_wheel = EmotionWheel() def emotion_similarity(target_emotion, related_emotions): """ Returns a score indicating emotional similarity using Plutchik's geometric distance. Only works when target_emotion is provided. """ if not related_emotions or not target_emotion: return 0.0 wheel = Optimizer.emotion_wheel scores = [] for emotion in related_emotions: similarity = wheel.calculate_emotion_similarity(target_emotion, emotion) scores.append(similarity) # Return average similarity return sum(scores) / len(scores) if scores else 0.0 class WeightedSumOptimizer(Optimizer): """ An object used to process optimization queries on a database which underlying optimization function is a weighted sum computing the closeness of an art piece to a target profile. """ def __init__(self, profile, weights): """ Creates a new weighted sum based optimization function. The "weights" parameter is a mapping from the database column names to their weights. The "profile" parameter is the target profile. """ self.profile = profile self.weights = weights for col in ['related_names', 'related_dates', 'related_places', 'related_emotions']: if col not in self.weights: self.weights[col] = 0 def scoring_fn(row): score_name = self.weights['related_names'] * Optimizer.name_similarity( self.profile.target_name, row['related_names']) score_date = self.weights['related_dates'] * Optimizer.date_similarity( self.profile.target_date, row['related_dates']) score_place = self.weights['related_places'] * self.place_similarity( self.profile.target_place, row['related_places']) score_emotion = self.weights['related_emotions'] * Optimizer.emotion_similarity( self.profile.target_emotion, row['related_emotions']) return score_name + score_date + score_place + score_emotion def sorting_fn(df): return df.sort_values(by='score', ascending=False) Optimizer.__init__(self, scoring_fn, sorting_fn) class WeightedLeximaxOptimizer(Optimizer): """ An object used to process optimization queries on a database which underlying optimization function is a weighted leximax computing the closeness of an art piece to a target profile. """ def __init__(self, profile, weights): """ Creates a new weighted leximax based optimization function. The "weights" parameter is a mapping from the database column names to their weights. The "profile" parameter is the target profile. """ self.profile = profile self.weights = weights for col in ['related_names', 'related_dates', 'related_places', 'related_emotions']: if col not in self.weights: self.weights[col] = 0 def scoring_fn(row): score_name = self.weights['related_names'] * Optimizer.name_similarity( self.profile.target_name, row['related_names']) score_date = self.weights['related_dates'] * Optimizer.date_similarity( self.profile.target_date, row['related_dates']) score_place = self.weights['related_places'] * self.place_similarity( self.profile.target_place, row['related_places']) score_emotion = self.weights['related_emotions'] * Optimizer.emotion_similarity( self.profile.target_emotion, row['related_emotions']) scores = [score_name, score_date, score_place, score_emotion] # Store original scores for logging before sorting row['_original_scores'] = tuple(scores) scores.sort(reverse=True) return tuple(scores) def sorting_fn(df): return df.sort_values(by='score', ascending=False) Optimizer.__init__(self, scoring_fn, sorting_fn) class TargetProfile: """ The query, in term of searched values (i.e. the name we look for, the date we look for, ...) """ def __init__(self): """ Create a new, empty target profile. """ self.target_name = None self.target_date = None self.target_place = None self.target_emotion = None def __str__(self): return (f"TargetProfile(target_name={self.target_name}, " f"target_date={self.target_date}, " f"target_place={self.target_place}, " f"target_emotion={self.target_emotion})") def set_target_name(self, target_name): self.target_name = target_name def set_target_date(self, target_date): self.target_date = target_date def set_target_emotion(self, target_emotion): self.target_emotion = target_emotion def set_target_place(self, target_place): self.target_place = target_place