Spaces:

AhsanAftab
/

Action-recognition-and-captioning

Sleeping

App Files Files Community

AhsanAftab commited on Jan 4

Commit

4d0c7ca

verified ·

1 Parent(s): 30dbc69

Update model_loader.py

Browse files

Files changed (1) hide show

model_loader.py +45 -65

model_loader.py CHANGED Viewed

@@ -4,72 +4,51 @@ from torchvision import models
 import pickle
 from pathlib import Path
 import sys
-# ==========================================
-# 1. DEFINE THE VOCABULARY CLASS
-# (This allows pickle to reconstruct the object)
-# ==========================================
-class Vocabulary:
-    def __init__(self, freq_threshold):
-        self.itos = {0: "<PAD>", 1: "<SOS>", 2: "<EOS>", 3: "<UNK>"}
-        self.stoi = {"<PAD>": 0, "<SOS>": 1, "<EOS>": 2, "<UNK>": 3}
-        self.freq_threshold = freq_threshold
     def __len__(self):
-        return len(self.itos)
-    @staticmethod
-    def tokenizer_eng(text):
-        return text.lower().split()
-    def build_vocabulary(self, sentence_list):
-        frequencies = {}
-        idx = 4
-        for sentence in sentence_list:
-            for word in self.tokenizer_eng(sentence):
-                if word not in frequencies:
-                    frequencies[word] = 1
-                else:
-                    frequencies[word] += 1
-                if frequencies[word] == self.freq_threshold:
-                    self.stoi[word] = idx
-                    self.itos[idx] = word
-                    idx += 1
-    def numericalize(self, text):
-        tokenized_text = self.tokenizer_eng(text)
-        return [
-            self.stoi[token] if token in self.stoi else self.stoi["<UNK>"]
-            for token in tokenized_text
-        ]
-    # Helper for inference
-    def decode(self, tokens):
-        return [self.itos[token] if token in self.itos else "<UNK>" for token in tokens]
-    @property
-    def start_token(self):
-        return "<SOS>"
-    @property
-    def end_token(self):
-        return "<EOS>"
-    @property
-    def pad_token(self):
-        return "<PAD>"
-# ==========================================
-# 2. REDIRECT __main__.Vocabulary
-# (Crucial step for pickle loading)
-# ==========================================
 import __main__
 setattr(__main__, "Vocabulary", Vocabulary)
-# ==========================================
-# MODEL CLASSES
-# ==========================================
 class EncoderCNN(nn.Module):
     def __init__(self, embed_size):
@@ -153,9 +132,6 @@ class ActionRecognitionModel(nn.Module):
     def forward(self, x):
         return self.backbone(x)
-# ==========================================
-# LOADER FUNCTIONS
-# ==========================================
 def load_caption_model(device, model_dir=None):
     if model_dir is None:
@@ -168,10 +144,14 @@ def load_caption_model(device, model_dir=None):
         config = pickle.load(f)
     # Load vocabulary
-    # The 'setattr' fix above allows this line to work
-    with open(model_dir / 'vocab.pkl', 'rb') as f:
-        vocab = pickle.load(f)
     # Create model
     model = ImageCaptioningModel(
         embed_size=config['embed_size'],

 import pickle
 from pathlib import Path
 import sys
+import logging
+# Configure logger
+logger = logging.getLogger(__name__)
+class Vocabulary:
+    def __init__(self, freq_threshold=5):
+        self.freq_threshold = freq_threshold
+        self.word2idx = {}
+        self.idx2word = {}
+        self.idx = 0
+        # Special tokens
+        self.pad_token = "<PAD>"
+        self.start_token = "<SOS>"
+        self.end_token = "<EOS>"
+        self.unk_token = "<UNK>"
+        # Add special tokens
+        for token in [self.pad_token, self.start_token, self.end_token, self.unk_token]:
+            self.add_word(token)
+    def add_word(self, word):
+        """Add a word to the vocabulary"""
+        if word not in self.word2idx:
+            self.word2idx[word] = self.idx
+            self.idx2word[self.idx] = word
+            self.idx += 1
     def __len__(self):
+        return len(self.word2idx)
+    def __call__(self, word):
+        """Convert word to index"""
+        if word not in self.word2idx:
+            return self.word2idx[self.unk_token]
+        return self.word2idx[word]
+    def decode(self, indices):
+        """Convert indices back to words"""
+        return [self.idx2word[idx] for idx in indices if idx in self.idx2word]
 import __main__
 setattr(__main__, "Vocabulary", Vocabulary)
 class EncoderCNN(nn.Module):
     def __init__(self, embed_size):
     def forward(self, x):
         return self.backbone(x)
 def load_caption_model(device, model_dir=None):
     if model_dir is None:
         config = pickle.load(f)
     # Load vocabulary
+    try:
+        with open(model_dir / 'vocab.pkl', 'rb') as f:
+            vocab = pickle.load(f)
+        logger.info(f"Vocabulary loaded successfully. Size: {len(vocab)}")
+    except Exception as e:
+        logger.error(f"Failed to load vocabulary: {e}")
+        raise e
     # Create model
     model = ImageCaptioningModel(
         embed_size=config['embed_size'],