Spaces:
Build error
Build error
| from fake_face_detection.utils.generation import PI_generate_sample as generate_sample | |
| from fake_face_detection.utils.acquisitions import PI_acquisition as acquisition | |
| from fake_face_detection.utils.sampling import get_random_samples | |
| from sklearn.gaussian_process import GaussianProcessRegressor | |
| from functools import partial | |
| from typing import * | |
| import pandas as pd | |
| import numpy as np | |
| import string | |
| import random | |
| import pickle | |
| import os | |
| letters = string.ascii_letters + string.digits | |
| class SimpleBayesianOptimizationForFakeReal: | |
| def __init__(self, objective: Callable, search_spaces: dict, maximize: bool = True, random_kwargs: dict = {}, kwargs: dict = {}, checkpoint: str = "data/trials/checkpoint.txt"): | |
| # recuperate the optimization strategy | |
| self.maximize = maximize | |
| # checkpoint where the data and score will be saved | |
| self.checkpoint = checkpoint | |
| # initialize the search spaces | |
| self.search_spaces = search_spaces | |
| # recuperate the random kwargs | |
| self.random_kwargs = random_kwargs | |
| # initialize the objective function | |
| self.objective = objective | |
| # initialize the kwargs | |
| self.kwargs = kwargs | |
| # initialize the model | |
| self.model = GaussianProcessRegressor() | |
| # initialize the random kwargs with a random values | |
| random_kwargs = {key: value + ''.join(random.choice(letters) for i in range(7)) for key, value in self.random_kwargs.items()} | |
| # add random kwargs to the kwargs | |
| self.kwargs.update(random_kwargs) | |
| # recuperate random sample | |
| config = get_random_samples(search_spaces) | |
| if os.path.exists(self.checkpoint): | |
| with open(self.checkpoint, 'rb') as f: | |
| pickler = pickle.Unpickler(f) | |
| checkpoint = pickler.load() | |
| self.data = checkpoint['data'] | |
| self.scores = checkpoint['scores'] | |
| self.model = checkpoint['model'] | |
| self.current_trial = checkpoint['trial'] | |
| print(f"Checkpoint loaded at trial {self.current_trial}") | |
| else: | |
| # add config to kwargs | |
| self.kwargs['config'] = config | |
| # calculate the first score | |
| score = self.objective(**self.kwargs) | |
| # initialize the input data | |
| self.data = [list(config.values())] | |
| # initialize the scores | |
| self.scores = [[score]] | |
| # fit the model with the input data and the target | |
| self.model.fit(self.data, self.scores) | |
| # initialize the number of trials to zero | |
| self.current_trial = 0 | |
| with open(self.checkpoint, 'wb') as f: | |
| pickler = pickle.Pickler(f) | |
| checkpoint = { | |
| 'data': self.data, | |
| 'scores': self.scores, | |
| 'model': self.model, | |
| 'trial': self.current_trial | |
| } | |
| pickler.dump(checkpoint) | |
| def optimize(self, n_trials: int = 50, n_tests: int = 100): | |
| """Finding the best hyperparameters with the Bayesian Optimization | |
| Args: | |
| n_trials (int, optional): The number of trials. Defaults to 50. | |
| n_tests (int, optional): The number of random samples to test for each trial. Defaults to 100. | |
| """ | |
| # let us make multiple trials in order to find the best params | |
| for trial in range(self.current_trial + 1, self.current_trial + n_trials + 1): | |
| # let us generate new samples with the acquisition and the surrogate functions | |
| new_sample = generate_sample(self.data, self.model, self.search_spaces, n_tests, maximize = self.maximize) | |
| config = {key: new_sample[i] for i, key in enumerate(self.search_spaces)} | |
| # recuperate a new score | |
| new_score = self.get_score(config) | |
| # let us add the new sample, target and score to their lists | |
| self.data.append(new_sample) | |
| self.scores.append([new_score]) | |
| # let us train again the model | |
| self.model.fit(self.data, self.scores) | |
| # recuperate the current trial | |
| self.current_trial = trial | |
| with open(self.checkpoint, 'wb') as f: | |
| pickler = pickle.Pickler(f) | |
| checkpoint = { | |
| 'data': self.data, | |
| 'scores': self.scores, | |
| 'model': self.model, | |
| 'trial': self.current_trial | |
| } | |
| pickler.dump(checkpoint) | |
| def get_score(self, config: dict): | |
| # add random seed (since we have always the same problem of randomizing the seed) | |
| random.seed(None) | |
| # initialize the random kwargs with a random values | |
| random_kwargs = {key: value + ''.join(random.choice(letters) for i in range(7)) for key, value in self.random_kwargs.items()} | |
| print(random_kwargs) | |
| # add random kwargs to the kwargs | |
| self.kwargs.update(random_kwargs) | |
| # add config to kwargs | |
| self.kwargs['config'] = config | |
| # calculate the first score | |
| new_score = self.objective(**self.kwargs) | |
| return new_score | |
| def get_results(self): | |
| """Recuperate the generated samples and the scores | |
| Returns: | |
| pd.DataFrame: A data frame containing the results | |
| """ | |
| # let us return the results as a data frame | |
| data = {key: np.array(self.data, dtype = object)[:, i] for i, key in enumerate(self.search_spaces)} | |
| data.update({'score': np.array(self.scores)[:, 0]}) | |
| return pd.DataFrame(data) | |