WatermelonPapaya commited on
Commit
de019e5
·
verified ·
1 Parent(s): 402f7e5

Upload 2 files

Browse files
Files changed (2) hide show
  1. helper/model.py +21 -0
  2. helper/optimizer_def.py +296 -0
helper/model.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import helper.optimizer_def
2
+ from keras.models import load_model
3
+ from keras.preprocessing import image
4
+ import numpy as np
5
+
6
+ CONVERT_CLASS_PRED_TO_NAME = ["Common Rust", "Gray Leaf Spot", "Leaf Blight"]
7
+
8
+ def fetch_model(opt_name: str):
9
+ return load_model(f'models/{opt_name}-model-001.h5', safe_mode=False)
10
+
11
+ def preprocess_image(img):
12
+ img = img.resize((224, 224))
13
+ img_array = image.img_to_array(img)
14
+ img_array = np.expand_dims(img_array, axis=0)
15
+ return img_array
16
+
17
+ def classify_image(model, img):
18
+ prediction = model.predict(img)
19
+ predicted_class = np.argmax(prediction, axis=1)[0]
20
+
21
+ return CONVERT_CLASS_PRED_TO_NAME[predicted_class]
helper/optimizer_def.py ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Adan Implementation based from https://github.com/cpuimage/keras-optimizer.git
2
+ import tensorflow as tf
3
+ import keras
4
+
5
+
6
+ # From https://github.com/cpuimage/keras-optimizer/blob/main/optimizer/Adan.py
7
+ @keras.saving.register_keras_serializable()
8
+ class Adan(tf.keras.optimizers.Optimizer):
9
+ r"""Optimizer that implements the Adan algorithm.
10
+ Adan: Adaptive Nesterov Momentum Algorithm for Faster Optimizing Deep Models
11
+ https://arxiv.org/abs/2208.06677
12
+ """
13
+
14
+ def __init__(
15
+ self,
16
+ learning_rate=0.001,
17
+ weight_decay=0.05,
18
+ beta_1=0.98,
19
+ beta_2=0.92,
20
+ beta_3=0.99,
21
+ epsilon=1e-16,
22
+ clipnorm=None,
23
+ clipvalue=None,
24
+ global_clipnorm=None,
25
+ use_ema=False,
26
+ ema_momentum=0.99,
27
+ ema_overwrite_frequency=None,
28
+ jit_compile=True,
29
+ name="Adan",
30
+ **kwargs
31
+ ):
32
+ super().__init__(
33
+ name=name,
34
+ clipnorm=clipnorm,
35
+ clipvalue=clipvalue,
36
+ global_clipnorm=global_clipnorm,
37
+ use_ema=use_ema,
38
+ ema_momentum=ema_momentum,
39
+ ema_overwrite_frequency=ema_overwrite_frequency,
40
+ jit_compile=jit_compile,
41
+ **kwargs
42
+ )
43
+ self._learning_rate = self._build_learning_rate(learning_rate)
44
+ self.weight_decay = weight_decay
45
+ self.beta_1 = beta_1
46
+ self.beta_2 = beta_2
47
+ self.beta_3 = beta_3
48
+ self.epsilon = epsilon
49
+ if self.weight_decay is None:
50
+ raise ValueError(
51
+ "Missing value of `weight_decay` which is required and"
52
+ " must be a float value.")
53
+
54
+ def build(self, var_list):
55
+ super().build(var_list)
56
+ if hasattr(self, "_built") and self._built:
57
+ return
58
+ self._built = True
59
+ self._momentums = []
60
+ self._beliefs = []
61
+ self._prev_gradients = []
62
+ self._velocities = []
63
+ for var in var_list:
64
+ self._beliefs.append(self.add_variable_from_reference(model_variable=var, variable_name="v"))
65
+ self._momentums.append(self.add_variable_from_reference(model_variable=var, variable_name="m"))
66
+ self._prev_gradients.append(self.add_variable_from_reference(model_variable=var, variable_name="p"))
67
+ self._velocities.append(self.add_variable_from_reference(model_variable=var, variable_name="n"))
68
+
69
+ def _use_weight_decay(self, variable):
70
+ exclude_from_weight_decay = getattr(self, "_exclude_from_weight_decay", [])
71
+ exclude_from_weight_decay_names = getattr(self, "_exclude_from_weight_decay_names", [])
72
+ if variable in exclude_from_weight_decay:
73
+ return False
74
+ for name in exclude_from_weight_decay_names:
75
+ if re.search(name, variable.name) is not None:
76
+ return False
77
+ return True
78
+
79
+ def update_step(self, gradient, variable):
80
+ """Update step given gradient and the associated model variable."""
81
+ var_dtype = variable.dtype
82
+ lr = tf.cast(self.learning_rate, var_dtype)
83
+ local_step = tf.cast(self.iterations + 1, var_dtype)
84
+ beta_1_power = tf.pow(tf.cast(self.beta_1, var_dtype), local_step)
85
+ beta_2_power = tf.pow(tf.cast(self.beta_2, var_dtype), local_step)
86
+ beta_3_power = tf.pow(tf.cast(self.beta_3, var_dtype), local_step)
87
+ alpha_n = tf.sqrt(1.0 - beta_3_power)
88
+ alpha_m = alpha_n / (1.0 - beta_1_power)
89
+ alpha_v = alpha_n / (1.0 - beta_2_power)
90
+ index = self._index_dict[self._var_key(variable)]
91
+ m = self._momentums[index]
92
+ v = self._beliefs[index]
93
+ p = self._prev_gradients[index]
94
+ n = self._velocities[index]
95
+ one_minus_beta_1 = (1 - self.beta_1)
96
+ one_minus_beta_2 = (1 - self.beta_2)
97
+ one_minus_beta_3 = (1 - self.beta_3)
98
+
99
+ if isinstance(gradient, tf.IndexedSlices):
100
+ # Sparse gradients.
101
+ m.scatter_add(tf.IndexedSlices((gradient.values - m) * one_minus_beta_1, gradient.indices))
102
+ diff = (gradient.values - p) * tf.cast(local_step != 1.0, var_dtype)
103
+ v.scatter_add(tf.IndexedSlices((diff - v) * one_minus_beta_2), gradient.indices)
104
+ n.scatter_add(tf.IndexedSlices(
105
+ (tf.math.square(gradient.values + one_minus_beta_2 * diff) - n) * one_minus_beta_3,
106
+ gradient.indices))
107
+ p.scatter_update(tf.IndexedSlices(gradient.values, gradient.indices))
108
+ else:
109
+ # Dense gradients.
110
+ m.assign_add((gradient - m) * one_minus_beta_1)
111
+ diff = (gradient - p) * tf.cast(local_step != 1.0, var_dtype)
112
+ v.assign_add((diff - v) * one_minus_beta_2)
113
+ n.assign_add((tf.math.square(gradient + one_minus_beta_2 * diff) - n) * one_minus_beta_3)
114
+ p.assign(gradient)
115
+ var_t = tf.math.rsqrt(n + self.epsilon) * (alpha_m * m + one_minus_beta_2 * v * alpha_v)
116
+ # Apply step weight decay
117
+ if self._use_weight_decay(variable):
118
+ wd = tf.cast(self.weight_decay, variable.dtype)
119
+ var_updated = variable - var_t * lr
120
+ var_updated = var_updated / (1.0 + lr * wd)
121
+ variable.assign(var_updated)
122
+ else:
123
+ variable.assign_sub(var_t * lr)
124
+
125
+ def get_config(self):
126
+ config = super().get_config()
127
+ config.update(
128
+ {
129
+ "learning_rate": self._serialize_hyperparameter(self._learning_rate),
130
+ "weight_decay": self.weight_decay,
131
+ "beta_1": self.beta_1,
132
+ "beta_2": self.beta_2,
133
+ "beta_3": self.beta_3,
134
+ "epsilon": self.epsilon,
135
+ }
136
+ )
137
+ return config
138
+
139
+ def exclude_from_weight_decay(self, var_list=None, var_names=None):
140
+ """Exclude variables from weight decays.
141
+ This method must be called before the optimizer's `build` method is
142
+ called. You can set specific variables to exclude out, or set a list of
143
+ strings as the anchor words, if any of which appear in a variable's
144
+ name, then the variable is excluded.
145
+ Args:
146
+ var_list: A list of `tf.Variable`s to exclude from weight decay.
147
+ var_names: A list of strings. If any string in `var_names` appear
148
+ in the model variable's name, then this model variable is
149
+ excluded from weight decay. For example, `var_names=['bias']`
150
+ excludes all bias variables from weight decay.
151
+ """
152
+ if hasattr(self, "_built") and self._built:
153
+ raise ValueError(
154
+ "`exclude_from_weight_decay()` can only be configued before "
155
+ "the optimizer is built."
156
+ )
157
+
158
+ self._exclude_from_weight_decay = var_list or []
159
+ self._exclude_from_weight_decay_names = var_names or []
160
+
161
+
162
+ import tensorflow as tf
163
+ import re
164
+
165
+ @keras.saving.register_keras_serializable()
166
+ class AdaBoundOptimizer(tf.keras.optimizers.Optimizer):
167
+ """Optimizer that implements the AdaBound algorithm."""
168
+
169
+ def __init__(self,
170
+ learning_rate=0.001,
171
+ final_lr=0.1,
172
+ beta1=0.9,
173
+ beta2=0.999,
174
+ gamma=1e-3,
175
+ epsilon=1e-8,
176
+ amsbound=False,
177
+ decay=0.,
178
+ weight_decay=0.,
179
+ exclude_from_weight_decay=None,
180
+ name='AdaBound', **kwargs):
181
+ super(AdaBoundOptimizer, self).__init__(name, **kwargs)
182
+
183
+ if final_lr <= 0.:
184
+ raise ValueError(f"Invalid final learning rate : {final_lr}")
185
+ if not 0. <= beta1 < 1.:
186
+ raise ValueError(f"Invalid beta1 value : {beta1}")
187
+ if not 0. <= beta2 < 1.:
188
+ raise ValueError(f"Invalid beta2 value : {beta2}")
189
+ if not 0. <= gamma < 1.:
190
+ raise ValueError(f"Invalid gamma value : {gamma}")
191
+ if epsilon <= 0.:
192
+ raise ValueError(f"Invalid epsilon value : {epsilon}")
193
+
194
+ self._lr = learning_rate
195
+ self._final_lr = final_lr
196
+ self._beta1 = beta1
197
+ self._beta2 = beta2
198
+ self._gamma = gamma
199
+ self._epsilon = epsilon
200
+ self._amsbound = amsbound
201
+ self._decay = decay
202
+ self._weight_decay = weight_decay
203
+ self._exclude_from_weight_decay = exclude_from_weight_decay
204
+
205
+ self._base_lr = learning_rate
206
+ self.global_step = tf.Variable(initial_value=0, trainable=False, name="global_step")
207
+ self.m_dict = {}
208
+ self.v_dict = {}
209
+ if amsbound:
210
+ self.v_hat_dict = {}
211
+
212
+ def apply_gradients(self, grads_and_vars, global_step=None, name=None):
213
+ if global_step is None:
214
+ global_step = self.global_step # Assuming global_step is a class attribute
215
+
216
+ lr = self._lr
217
+ t = tf.cast(global_step, dtype=tf.float32)
218
+
219
+ if self._decay > 0.:
220
+ lr *= (1. / (1. + self._decay * t))
221
+
222
+ t += 1
223
+
224
+ bias_correction1 = 1. - (self._beta1 ** t)
225
+ bias_correction2 = 1. - (self._beta2 ** t)
226
+ step_size = (lr * tf.sqrt(bias_correction2) / bias_correction1)
227
+
228
+ final_lr = self._final_lr * lr / self._base_lr
229
+ lower_bound = final_lr * (1. - 1. / (self._gamma * t + 1.))
230
+ upper_bound = final_lr * (1. + 1. / (self._gamma * t))
231
+
232
+ assignments = []
233
+ for grad, param in grads_and_vars:
234
+ if grad is None or param is None:
235
+ continue
236
+
237
+ param_name = self._get_variable_name(param.name)
238
+
239
+ if param_name not in self.m_dict:
240
+ self.m_dict[param_name] = tf.Variable(tf.zeros(shape=param.shape), trainable=False)
241
+ self.v_dict[param_name] = tf.Variable(tf.zeros(shape=param.shape), trainable=False)
242
+ if self._amsbound:
243
+ self.v_hat_dict[param_name] = tf.Variable(tf.zeros(shape=param.shape), trainable=False)
244
+
245
+ m = self.m_dict[param_name]
246
+ v = self.v_dict[param_name]
247
+ v_hat = self.v_hat_dict[param_name] if self._amsbound else None
248
+
249
+ m_t = (self._beta1 * m + (1. - self._beta1) * grad)
250
+ v_t = (self._beta2 * v + (1. - self._beta2) * tf.square(grad))
251
+
252
+ if self._amsbound:
253
+ v_hat_t = tf.maximum(v_hat, v_t)
254
+ denom = (tf.sqrt(v_hat_t) + self._epsilon)
255
+ else:
256
+ denom = (tf.sqrt(v_t) + self._epsilon)
257
+
258
+ step_size_p = step_size * tf.ones_like(denom)
259
+ step_size_p_bound = step_size_p / denom
260
+
261
+ lr_t = m_t * tf.clip_by_value(step_size_p_bound,
262
+ clip_value_min=lower_bound,
263
+ clip_value_max=upper_bound)
264
+ p_t = param - lr_t
265
+
266
+ if self._do_use_weight_decay(param_name):
267
+ p_t += self._weight_decay * param
268
+
269
+ update_list = [param.assign(p_t), m.assign(m_t), v.assign(v_t)]
270
+ if self._amsbound:
271
+ update_list.append(v_hat.assign(v_hat_t))
272
+
273
+ assignments.extend(update_list)
274
+
275
+ # update the global step
276
+ assignments.append(global_step.assign_add(1))
277
+
278
+ return tf.group(*assignments, name=name)
279
+
280
+ def _do_use_weight_decay(self, var):
281
+ """Whether to use L2 weight decay for `var`."""
282
+ if not self._weight_decay:
283
+ return False
284
+ if self._exclude_from_weight_decay:
285
+ for r in self._exclude_from_weight_decay:
286
+ if re.search(r, var.name) is not None:
287
+ return False
288
+ return True
289
+
290
+ @staticmethod
291
+ def _get_variable_name(var_name):
292
+ """Get the variable name from the tensor name."""
293
+ m = re.match("^(.*):\\d+$", var_name)
294
+ if m is not None:
295
+ var_name = m.group(1)
296
+ return var_name