File size: 3,086 Bytes
b88cc47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from typing import List, Dict
import numpy as np


def parameter_range_low_high(parameter_range: List):
    """
    :param parameter_range:List[Float]--midpoints of bins
    :return: List[Float]--lower and upper bounds of bins
    """
    temp1 = np.array(parameter_range[1:])
    temp2 = np.array(parameter_range[:len(temp1)])
    temp1 = 0.5 * (temp1 + temp2)

    return np.hstack([parameter_range[0], temp1, parameter_range[len(parameter_range) - 1]])


class ParameterValue:
    """Describes a one hot encoded parameter value."""

    name: str
    value: float
    encoding: List[float]
    index: int

    def __init__(self, name, value, encoding, index):
        self.name = name
        self.value = value
        self.encoding = encoding
        self.index = index


class ParameterDescription:
    """A description for generating a parameter value."""

    # Discrete is used to generate samples that don't exactly fit into a bin for training.
    def __init__(self, name, values: List[float], discrete=True):
        self.name = name
        self.values = values
        self.discrete = discrete
        self.parameter_low_high = parameter_range_low_high(values)

    # one-hot encoding as per paper
    # Value used for specifying a different value than values[index], useful for non-discrete params. todo: too adhoc?
    def parameter_value(self, index, value=None) -> ParameterValue:
        if value is None:
            value = self.values[index]
        encoding = np.zeros(len(self.values), dtype=float)
        encoding[index] = 1.0
        return ParameterValue(
            name=self.name,
            value=value,
            encoding=encoding,
            index=index
        )

    # random even distribution as per paper
    def generate(self) -> ParameterValue:
        # choose a bin if parameter is discrete
        if self.discrete:
            index = np.random.randint(0, len(self.values))
            return self.parameter_value(index)
        # otherwise generate a random value
        else:
            indexFinder = np.random.uniform(0, 1)
            l = np.linspace(0.0, 1, len(self.values))
            index = np.argmin(np.abs(l - indexFinder))
            value = (self.parameter_low_high[index+1] - self.parameter_low_high[index]) * np.random.uniform(0, 1) + self.parameter_low_high[index]

            return self.parameter_value(index, value)

    # get the index of the best matching bin
    def get_bin_index(self, value):
        return np.argmin(np.abs(np.array(self.values) - value))

    def decode(self, encoding: List[float]) -> ParameterValue:
        index = np.array(encoding).argmax()
        return self.parameter_value(index)


class Sample:
    """Describes the label of one training sample."""

    parameters: List[ParameterValue]

    def __init__(self, parameters):
        self.parameters = parameters

    def get_values(self) -> Dict[str, dict]:
        return {
            "parameters": {p.name: p.value for p in self.parameters},
            "encoding": list(np.hstack(p.encoding for p in self.parameters))
        }