PierreHanna commited on
Commit
8d66428
Β·
1 Parent(s): e628964

Delete models.py

Browse files
Files changed (1) hide show
  1. models.py +0 -382
models.py DELETED
@@ -1,382 +0,0 @@
1
- import tensorflow as tf
2
- from tensorflow.keras.layers import BatchNormalization, Concatenate
3
- from tensorflow.keras.layers import Lambda, Flatten, Dense
4
- from tensorflow.keras.initializers import glorot_uniform, RandomNormal, Zeros, HeNormal, Constant
5
- from tensorflow.keras.layers import Input, Subtract, Dense, Lambda, Dropout,LeakyReLU, ReLU, PReLU, Attention
6
- from tensorflow.keras.models import Sequential
7
- from tensorflow.keras.layers import Conv2D, Conv1D, ZeroPadding2D, Activation, Input, concatenate, ConvLSTM2D, LSTM
8
- from tensorflow.keras.layers import AveragePooling1D, MaxPooling1D, GlobalMaxPooling1D, GlobalMaxPooling2D, TimeDistributed, GlobalAveragePooling1D
9
- from tensorflow.keras.layers import MaxPooling2D, AveragePooling2D, GlobalAveragePooling2D, UpSampling1D, Reshape
10
- from tensorflow.keras.models import Model
11
- from tensorflow.keras.layers import Conv2D, Conv1D, ZeroPadding2D, Activation, Multiply, Add, MaxPool1D, Permute
12
- from keras import backend as K
13
- import tensorflow_addons as tfa
14
- import numpy as np
15
-
16
-
17
- MARGIN = 0.5
18
- DIM_OUT = 1024
19
-
20
- def triplet_loss_new(y_true, y_pred):
21
- anchor, positive, negative = y_pred[:,:DIM_OUT], y_pred[:,DIM_OUT:2*DIM_OUT], y_pred[:,2*DIM_OUT:]
22
- positive_dist = K.sum(K.square(anchor-positive), axis=-1)
23
- negative_dist = K.sum(K.square(anchor-negative), axis=-1)
24
- return K.sum(K.maximum(positive_dist - negative_dist + MARGIN, 0), axis=0)
25
-
26
-
27
-
28
- # Define the contrastive loss function, NT_Xent (Tensorflow version)
29
- def nt_xent_loss_4(y_true, y_pred, tau=0.07):
30
- '''call
31
-
32
- Calculates the infonce loss described in SimCLR
33
- https://arxiv.org/abs/2002.05709
34
-
35
- Args:
36
- z1 (tf.Tensor): The embeddings, view 1 (half of batch)
37
- z2 (tf.Tensor): The embeddings, view 2 (half of batch)
38
-
39
- Returns:
40
- tf.Tensor: The loss
41
- '''
42
- z1 = y_pred[:,:DIM_OUT]
43
- z2 = y_pred[:,DIM_OUT:2*DIM_OUT]
44
-
45
- # Combine the two embeddings
46
- z = tf.concat([z1, z2], axis=0)
47
-
48
- # Normalize each row
49
- z = tf.math.l2_normalize(z, axis=1)
50
-
51
- batch_size = tf.shape(z)[0]
52
- ones = tf.ones((batch_size // 2, ))
53
- labels = tf.experimental.numpy.diagflat(ones, batch_size // 2) + \
54
- tf.experimental.numpy.diagflat(ones, -batch_size // 2)
55
-
56
- # Similarity matrix
57
- sim_m = z @ tf.transpose(z)
58
-
59
- # Setting diagonal to -1
60
- sim_m = tf.linalg.set_diag(sim_m, -tf.ones((batch_size, )))
61
-
62
- # Crossentropy
63
- sim_m = sim_m / tau
64
- entropy = tf.multiply(-labels, tf.nn.log_softmax(sim_m, axis=1))
65
-
66
- return tf.reduce_mean(tf.reduce_sum(entropy, axis=1))
67
-
68
-
69
- # Define the contrastive loss function, NT_Xent (Tensorflow version)
70
- def nt_xent_loss_3(y_true, y_pred, tau=0.07):
71
- """ Calculates the contrastive loss of the input data using NT_Xent. The
72
- equation can be found in the paper: https://arxiv.org/pdf/2002.05709.pdf
73
- (This is the Tensorflow implementation of the standard numpy version found
74
- in the NT_Xent function).
75
-
76
- Args:
77
- zi: One half of the input data, shape = (batch_size, feature_1, feature_2, ..., feature_N)
78
- zj: Other half of the input data, must have the same shape as zi
79
- tau: Temperature parameter (a constant), default = 1.
80
-
81
- Returns:
82
- loss: The complete NT_Xent constrastive loss
83
- """
84
- zi = y_pred[:,:DIM_OUT]
85
- zj = y_pred[:,DIM_OUT:2*DIM_OUT]
86
-
87
- z = tf.cast(tf.concat((zi, zj), 0), dtype=tf.float32)
88
- loss = 0
89
- for k in range(zi.shape[0]):
90
- # Numerator (compare i,j & j,i)
91
- i = k
92
- j = k + zi.shape[0]
93
- # Instantiate the cosine similarity loss function
94
- cosine_sim = tf.keras.losses.CosineSimilarity(axis=-1, reduction=tf.keras.losses.Reduction.NONE)
95
- sim = tf.squeeze(- cosine_sim(tf.reshape(z[i], (1, -1)), tf.reshape(z[j], (1, -1))))
96
- numerator = tf.math.exp(sim / tau)
97
-
98
- # Denominator (compare i & j to all samples apart from themselves)
99
- sim_ik = - cosine_sim(tf.reshape(z[i], (1, -1)), z[tf.range(z.shape[0]) != i])
100
- sim_jk = - cosine_sim(tf.reshape(z[j], (1, -1)), z[tf.range(z.shape[0]) != j])
101
- denominator_ik = tf.reduce_sum(tf.math.exp(sim_ik / tau))
102
- denominator_jk = tf.reduce_sum(tf.math.exp(sim_jk / tau))
103
-
104
- # Calculate individual and combined losses
105
- loss_ij = - tf.math.log(numerator / denominator_ik)
106
- loss_ji = - tf.math.log(numerator / denominator_jk)
107
- loss += loss_ij + loss_ji
108
-
109
- # Divide by the total number of samples
110
- loss /= z.shape[0]
111
-
112
- return loss
113
-
114
- def nt_xent_loss_2(y_true, y_pred, temperature=0.07):
115
- # InfoNCE loss (information noise-contrastive estimation)
116
- # NT-Xent loss (normalized temperature-scaled cross entropy)
117
-
118
- projections_1 = y_pred[:,:DIM_OUT]
119
- projections_2 = y_pred[:,DIM_OUT:2*DIM_OUT]
120
-
121
- # Cosine similarity: the dot product of the l2-normalized feature vectors
122
- projections_1 = tf.math.l2_normalize(projections_1, axis=1)
123
- projections_2 = tf.math.l2_normalize(projections_2, axis=1)
124
- similarities = (
125
- tf.matmul(projections_1, projections_2, transpose_b=True) / temperature
126
- )
127
-
128
- # The similarity between the representations of two augmented views of the
129
- # same image should be higher than their similarity with other views
130
- batch_size = tf.shape(projections_1)[0]
131
- contrastive_labels = tf.range(batch_size)
132
- contrastive_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
133
- contrastive_accuracy.update_state(contrastive_labels, similarities)
134
- contrastive_accuracy.update_state(
135
- contrastive_labels, tf.transpose(similarities)
136
- )
137
-
138
- # The temperature-scaled similarities are used as logits for cross-entropy
139
- # a symmetrized version of the loss is used here
140
- loss_1_2 = tf.keras.losses.sparse_categorical_crossentropy(
141
- contrastive_labels, similarities, from_logits=True
142
- )
143
- loss_2_1 = tf.keras.losses.sparse_categorical_crossentropy(
144
- contrastive_labels, tf.transpose(similarities), from_logits=True
145
- )
146
- return (loss_1_2 + loss_2_1) / 2
147
-
148
-
149
- #def contrastive_loss(xi, xj, tau=1, normalize=False):
150
- ################# ERREUR SUR CETTE VERSION ???
151
- def nt_xent_loss(y_true, y_pred, tau=0.07, normalize=False):
152
- ''' this loss is the modified torch implementation by M Diephuis here: https://github.com/mdiephuis/SimCLR/
153
- the inputs:
154
- xi, xj: image features extracted from a batch of images 2N, composed of N matching paints
155
- tau: temperature parameter
156
- normalize: normalize or not. seem to not be very useful, so better to try without.
157
- '''
158
-
159
- xi = y_pred[:,:DIM_OUT]
160
- xj = y_pred[:,DIM_OUT:2*DIM_OUT]
161
-
162
- #xi=tf.transpose(xi)
163
- #xj=tf.transpose(xj)
164
- x = tf.keras.backend.concatenate((xi, xj), axis=0)
165
-
166
- #print(xi.shape)
167
- #print(x.shape)
168
-
169
- sim_mat = tf.keras.backend.dot(x, tf.keras.backend.transpose(x))
170
-
171
- if normalize:
172
- sim_mat_denom = tf.keras.backend.dot(tf.keras.backend.l2_normalize(x, axis=1).unsqueeze(1), tf.keras.backend.l2_normalize(x, axis=1).unsqueeze(1).T)
173
- sim_mat = sim_mat / sim_mat_denom.clamp(min=1e-16)
174
-
175
- sim_mat = tf.keras.backend.exp(sim_mat /tau)
176
-
177
-
178
- if normalize:
179
- sim_mat_denom = tf.keras.backend.l2_normalize(xi, dim=1) * tf.keras.backend.l2_normalize(xj, axis=1)
180
- sim_match = tf.keras.backend.exp(tf.keras.backend.sum(xi * xj, axis=-1) / sim_mat_denom / tau)
181
- else:
182
- sim_match = tf.keras.backend.exp(tf.keras.backend.sum(xi * xj, axis=-1) / tau)
183
-
184
-
185
- sim_match = tf.keras.backend.concatenate((sim_match, sim_match), axis=0)
186
-
187
- #print(tf.keras.backend.shape(x)[0])
188
- norm_sum = tf.keras.backend.exp(tf.keras.backend.ones(tf.keras.backend.shape(x)[0]) / tau)
189
-
190
- #norm_sum = tf.keras.backend.ones(12) # NON
191
- #norm_sum = tf.keras.backend.exp(32/ tau) #OK
192
- #norm_sum = tf.keras.backend.shape(x)[0] #OK
193
-
194
-
195
- #return K.sum(xi)
196
- return tf.math.reduce_mean(-tf.keras.backend.log(sim_match / (tf.keras.backend.sum(sim_mat, axis=-1) - norm_sum)))
197
-
198
-
199
-
200
-
201
- def create_encoder_model_audio(in_shape, dim, final_activ):
202
- #return create_encoder_model_resnet_byte_1d(in_shape)
203
- return create_encoder_model_mlp(in_shape, dim, final_activ=final_activ) #1024
204
-
205
- def create_encoder_model_text(in_shape, dim, final_activ):
206
- #return create_encoder_model_resnet_byte_1d(in_shape)
207
- return create_encoder_model_mlp(in_shape, dim, final_activ=final_activ) #1024
208
-
209
-
210
-
211
-
212
- ######### RESNET 1D
213
- def residual_block_byte_1d(x, filters, activation="relu"):
214
- # Shortcut
215
- s = Conv1D(filters, 1, padding="same")(x)
216
- y = BatchNormalization()(s)
217
- y = Activation(activation)(y)
218
-
219
- y = Conv1D(filters, 3, padding="same")(y)
220
- y = BatchNormalization()(y)
221
- y = Conv1D(filters, 1, padding="same")(y)
222
- y = BatchNormalization()(y)
223
-
224
- y = Add()([y, s])
225
- y = Activation(activation)(y)
226
- return y
227
- #return MaxPool1D(pool_size=2, strides=2)(x)
228
-
229
- def create_encoder_model_resnet_byte_1d(input_shape):
230
-
231
- inputs = Input(shape=input_shape)
232
- x = Conv1D(32, 7, strides = 2, padding="same")(inputs)
233
- x = MaxPooling1D(pool_size=3, strides=2)(x)
234
-
235
- for i in range(3):
236
- x = residual_block_byte_1d(x, 32)
237
-
238
- for i in range(4):
239
- x = residual_block_byte_1d(x, 64)
240
-
241
- for i in range(6):
242
- x = residual_block_byte_1d(x, 128)
243
-
244
- for i in range(3):
245
- x = residual_block_byte_1d(x, 256)
246
-
247
- #print(x.shape)
248
-
249
- x = AveragePooling1D(pool_size=3, strides=3)(x)
250
-
251
- x = GlobalAveragePooling1D()(x)
252
- #x = Flatten()(x)
253
- x = Dense(DIM_OUT, activation="relu")(x)
254
-
255
- model = Dense(DIM_OUT, activation='sigmoid')(x)
256
- model = BatchNormalization()(model)
257
- model = Lambda(lambda x: K.l2_normalize(x,axis=-1))(model)
258
- model = Model(inputs=inputs,outputs=model)
259
- #model.summary()
260
-
261
- return model
262
-
263
- # simple MLP
264
- def create_encoder_model_mlp(input_shape, size1, final_activ=None):
265
-
266
- inputs = Input(shape=input_shape)
267
- x = Dense(size1, activation="relu")(inputs)
268
- x = Dropout(0.1)(x)
269
- #x = BatchNormalization()(x)
270
-
271
- '''
272
- x = Dense(1024, activation="relu")(x)
273
- x = Dropout(0.1)(x)
274
- x = Dense(1024, activation="relu")(x)
275
- x = Dropout(0.1)(x)
276
- x = Dense(1024, activation="relu")(x)
277
- x = Dropout(0.1)(x)
278
- x = Dense(1024, activation="relu")(x)
279
- x = Dropout(0.1)(x)
280
- '''
281
- #x = BatchNormalization()(x)
282
- #x = Dense(512, activation="relu")(x)
283
- #x = BatchNormalization()(x)
284
- '''
285
- if final_activ != None :
286
- model = Dense(DIM_OUT)(x)#, activation='sigmoid')(x)
287
- else :
288
- model = Dense(DIM_OUT, activation=final_activ)(x)
289
- '''
290
- model = Dense(DIM_OUT, activation=final_activ)(x)
291
- model = Dropout(0.1)(model)
292
- #model = BatchNormalization()(model)
293
- model = Lambda(lambda x: K.l2_normalize(x,axis=-1))(model)
294
- model = Model(inputs=inputs,outputs=model)
295
- model.summary()
296
-
297
- return model
298
-
299
- def make_bert_preprocess_model(sentence_features, tfhub_handle_preprocess, seq_length=128):
300
- """Returns Model mapping string features to BERT inputs.
301
- """
302
-
303
- input_segments = [
304
- tf.keras.layers.Input(shape=(), dtype=tf.string, name=ft)
305
- for ft in sentence_features]
306
-
307
- bert_preprocess = hub.load(tfhub_handle_preprocess)
308
- tokenizer = hub.KerasLayer(bert_preprocess.tokenize, name='tokenizer')
309
- segments = [tokenizer(s) for s in input_segments]
310
-
311
- truncated_segments = segments
312
- packer = hub.KerasLayer(bert_preprocess.bert_pack_inputs,
313
- arguments=dict(seq_length=seq_length),
314
- name='packer')
315
- model_inputs = packer(truncated_segments)
316
- return tf.keras.Model(input_segments, model_inputs)
317
-
318
- def process(prompt, lang):
319
-
320
- # Getting prompt user
321
- #prompt = input("Audio Search - enter text : ")
322
- #print(prompt)
323
-
324
- # prompt embedding
325
- bert_model_name = 'small_bert/bert_en_uncased_L-4_H-512_A-8'
326
- tfhub_handle_encoder = 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1'
327
- tfhub_handle_preprocess = 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3'
328
-
329
- MAX_LENGTH = 130 # MAX de 512 !!! TENSORFLOW !!!
330
- TOP = 10
331
-
332
-
333
- bert_preprocess_model = make_bert_preprocess_model(['my_input'], tfhub_handle_preprocess, seq_length = MAX_LENGTH)
334
- bert_model = hub.KerasLayer(tfhub_handle_encoder)
335
-
336
- now = datetime.datetime.now()
337
- print()
338
- print('*************')
339
- print("Current Time: ", str(now))
340
- print("Text input : ", prompt)
341
- print('*************')
342
- print()
343
- prompt=[prompt]
344
- text_preprocessed = bert_preprocess_model([np.array(prompt)])
345
- embed_prompt = bert_model(text_preprocessed)
346
- print(" text representation computed.")
347
-
348
- # Embed text
349
- #from models import *
350
- encoder_text = tf.keras.models.load_model(encoder_text_path)
351
- embed_query = encoder_text.predict(embed_prompt["pooled_output"])
352
- faiss.normalize_L2(embed_query)
353
- print(" text embed computed.")
354
-
355
- # load embed audio catalog
356
- index = faiss.read_index("BMG_221022.index")
357
-
358
- # distance computing
359
- D, I = index.search(embed_query, TOP)
360
-
361
- # names index
362
- import joblib
363
- audio_names = joblib.load(open('BMG_221022_names.index', 'rb'))
364
-
365
- #url
366
- url_dict={}
367
- with open("bmg_clean.csv") as csv_file:
368
- csv_reader = csv.reader(csv_file, delimiter=';')
369
- for row in csv_reader:
370
- f = row[2].split('/')[-1]
371
- url_dict[f.split('/')[-1][:-4]] = row[2]
372
-
373
- # output : top N audio file names
374
- print(I)
375
- print(D)
376
- print("----")
377
- for i in range(len(I[0])):
378
- print(audio_names[I[0][i]], " with distance ", D[0][i])
379
- print(" url : ", url_dict[audio_names[I[0][i]]])
380
-
381
-
382
- return [url_dict[audio_names[I[0][0]]], url_dict[audio_names[I[0][1]]], url_dict[audio_names[I[0][2]]], url_dict[audio_names[I[0][3]]], url_dict[audio_names[I[0][4]]]]