dixisouls commited on
Commit
8de6539
·
1 Parent(s): 2c4ca43

Math error

Browse files
Files changed (1) hide show
  1. app/image_captioning_service.py +9 -3
app/image_captioning_service.py CHANGED
@@ -6,6 +6,7 @@ import nltk
6
  import pickle
7
  import warnings
8
  import logging
 
9
  warnings.filterwarnings("ignore")
10
 
11
  # Configure logging
@@ -184,7 +185,6 @@ class EncoderCNN(torch.nn.Module):
184
  class PositionalEncoding(torch.nn.Module):
185
  def __init__(self, d_model, max_len=5000):
186
  super(PositionalEncoding, self).__init__()
187
- import math
188
 
189
  # Create positional encoding
190
  pe = torch.zeros(max_len, d_model)
@@ -208,6 +208,9 @@ class TransformerDecoder(torch.nn.Module):
208
  super(TransformerDecoder, self).__init__()
209
  import math
210
 
 
 
 
211
  # Embedding layer
212
  self.embedding = torch.nn.Embedding(vocab_size, embed_dim)
213
  self.positional_encoding = PositionalEncoding(embed_dim)
@@ -241,7 +244,7 @@ class TransformerDecoder(torch.nn.Module):
241
  tgt_mask = self.generate_square_subsequent_mask(tgt.size(1)).to(tgt.device)
242
 
243
  # Embed tokens and add positional encoding
244
- tgt = self.embedding(tgt) * math.sqrt(self.embedding.embedding_dim)
245
  tgt = self.positional_encoding(tgt)
246
  tgt = self.dropout(tgt)
247
 
@@ -262,6 +265,9 @@ class ImageCaptioningModel(torch.nn.Module):
262
  def __init__(self, vocab_size, embed_dim, hidden_dim, num_heads, num_layers):
263
  super(ImageCaptioningModel, self).__init__()
264
 
 
 
 
265
  # Image encoder
266
  self.encoder = EncoderCNN(embed_dim)
267
 
@@ -295,7 +301,7 @@ class ImageCaptioningModel(torch.nn.Module):
295
  img_features = img_features.unsqueeze(1)
296
 
297
  # Start with < SOS > token
298
- current_ids = torch.tensor([[vocab.word2idx['<SOS>']]], dtype=torch.long).to(image.device)
299
 
300
  # Generate words one by one
301
  result_caption = []
 
6
  import pickle
7
  import warnings
8
  import logging
9
+ import math
10
  warnings.filterwarnings("ignore")
11
 
12
  # Configure logging
 
185
  class PositionalEncoding(torch.nn.Module):
186
  def __init__(self, d_model, max_len=5000):
187
  super(PositionalEncoding, self).__init__()
 
188
 
189
  # Create positional encoding
190
  pe = torch.zeros(max_len, d_model)
 
208
  super(TransformerDecoder, self).__init__()
209
  import math
210
 
211
+ # Store math module as an instance variable so we can use it in forward
212
+ self.math = math
213
+
214
  # Embedding layer
215
  self.embedding = torch.nn.Embedding(vocab_size, embed_dim)
216
  self.positional_encoding = PositionalEncoding(embed_dim)
 
244
  tgt_mask = self.generate_square_subsequent_mask(tgt.size(1)).to(tgt.device)
245
 
246
  # Embed tokens and add positional encoding
247
+ tgt = self.embedding(tgt) * self.math.sqrt(self.embedding.embedding_dim)
248
  tgt = self.positional_encoding(tgt)
249
  tgt = self.dropout(tgt)
250
 
 
265
  def __init__(self, vocab_size, embed_dim, hidden_dim, num_heads, num_layers):
266
  super(ImageCaptioningModel, self).__init__()
267
 
268
+ # Make sure math is available
269
+ self.math = math
270
+
271
  # Image encoder
272
  self.encoder = EncoderCNN(embed_dim)
273
 
 
301
  img_features = img_features.unsqueeze(1)
302
 
303
  # Start with < SOS > token
304
+ current_ids = torch.tensor([[vocab.word2idx['< SOS >']]], dtype=torch.long).to(image.device)
305
 
306
  # Generate words one by one
307
  result_caption = []