Spaces:
Sleeping
Sleeping
Math error
Browse files
app/image_captioning_service.py
CHANGED
|
@@ -6,6 +6,7 @@ import nltk
|
|
| 6 |
import pickle
|
| 7 |
import warnings
|
| 8 |
import logging
|
|
|
|
| 9 |
warnings.filterwarnings("ignore")
|
| 10 |
|
| 11 |
# Configure logging
|
|
@@ -184,7 +185,6 @@ class EncoderCNN(torch.nn.Module):
|
|
| 184 |
class PositionalEncoding(torch.nn.Module):
|
| 185 |
def __init__(self, d_model, max_len=5000):
|
| 186 |
super(PositionalEncoding, self).__init__()
|
| 187 |
-
import math
|
| 188 |
|
| 189 |
# Create positional encoding
|
| 190 |
pe = torch.zeros(max_len, d_model)
|
|
@@ -208,6 +208,9 @@ class TransformerDecoder(torch.nn.Module):
|
|
| 208 |
super(TransformerDecoder, self).__init__()
|
| 209 |
import math
|
| 210 |
|
|
|
|
|
|
|
|
|
|
| 211 |
# Embedding layer
|
| 212 |
self.embedding = torch.nn.Embedding(vocab_size, embed_dim)
|
| 213 |
self.positional_encoding = PositionalEncoding(embed_dim)
|
|
@@ -241,7 +244,7 @@ class TransformerDecoder(torch.nn.Module):
|
|
| 241 |
tgt_mask = self.generate_square_subsequent_mask(tgt.size(1)).to(tgt.device)
|
| 242 |
|
| 243 |
# Embed tokens and add positional encoding
|
| 244 |
-
tgt = self.embedding(tgt) * math.sqrt(self.embedding.embedding_dim)
|
| 245 |
tgt = self.positional_encoding(tgt)
|
| 246 |
tgt = self.dropout(tgt)
|
| 247 |
|
|
@@ -262,6 +265,9 @@ class ImageCaptioningModel(torch.nn.Module):
|
|
| 262 |
def __init__(self, vocab_size, embed_dim, hidden_dim, num_heads, num_layers):
|
| 263 |
super(ImageCaptioningModel, self).__init__()
|
| 264 |
|
|
|
|
|
|
|
|
|
|
| 265 |
# Image encoder
|
| 266 |
self.encoder = EncoderCNN(embed_dim)
|
| 267 |
|
|
@@ -295,7 +301,7 @@ class ImageCaptioningModel(torch.nn.Module):
|
|
| 295 |
img_features = img_features.unsqueeze(1)
|
| 296 |
|
| 297 |
# Start with < SOS > token
|
| 298 |
-
current_ids = torch.tensor([[vocab.word2idx['<SOS>']]], dtype=torch.long).to(image.device)
|
| 299 |
|
| 300 |
# Generate words one by one
|
| 301 |
result_caption = []
|
|
|
|
| 6 |
import pickle
|
| 7 |
import warnings
|
| 8 |
import logging
|
| 9 |
+
import math
|
| 10 |
warnings.filterwarnings("ignore")
|
| 11 |
|
| 12 |
# Configure logging
|
|
|
|
| 185 |
class PositionalEncoding(torch.nn.Module):
|
| 186 |
def __init__(self, d_model, max_len=5000):
|
| 187 |
super(PositionalEncoding, self).__init__()
|
|
|
|
| 188 |
|
| 189 |
# Create positional encoding
|
| 190 |
pe = torch.zeros(max_len, d_model)
|
|
|
|
| 208 |
super(TransformerDecoder, self).__init__()
|
| 209 |
import math
|
| 210 |
|
| 211 |
+
# Store math module as an instance variable so we can use it in forward
|
| 212 |
+
self.math = math
|
| 213 |
+
|
| 214 |
# Embedding layer
|
| 215 |
self.embedding = torch.nn.Embedding(vocab_size, embed_dim)
|
| 216 |
self.positional_encoding = PositionalEncoding(embed_dim)
|
|
|
|
| 244 |
tgt_mask = self.generate_square_subsequent_mask(tgt.size(1)).to(tgt.device)
|
| 245 |
|
| 246 |
# Embed tokens and add positional encoding
|
| 247 |
+
tgt = self.embedding(tgt) * self.math.sqrt(self.embedding.embedding_dim)
|
| 248 |
tgt = self.positional_encoding(tgt)
|
| 249 |
tgt = self.dropout(tgt)
|
| 250 |
|
|
|
|
| 265 |
def __init__(self, vocab_size, embed_dim, hidden_dim, num_heads, num_layers):
|
| 266 |
super(ImageCaptioningModel, self).__init__()
|
| 267 |
|
| 268 |
+
# Make sure math is available
|
| 269 |
+
self.math = math
|
| 270 |
+
|
| 271 |
# Image encoder
|
| 272 |
self.encoder = EncoderCNN(embed_dim)
|
| 273 |
|
|
|
|
| 301 |
img_features = img_features.unsqueeze(1)
|
| 302 |
|
| 303 |
# Start with < SOS > token
|
| 304 |
+
current_ids = torch.tensor([[vocab.word2idx['< SOS >']]], dtype=torch.long).to(image.device)
|
| 305 |
|
| 306 |
# Generate words one by one
|
| 307 |
result_caption = []
|