Spaces:

dixisouls
/

image-captioning-api

Sleeping

App Files Files Community

dixisouls commited on Mar 18, 2025

Commit

9f11f00

1 Parent(s): ce43c34

Vocabulary class error

Browse files

Files changed (3) hide show

app.py +54 -33
app/api.py +9 -1
app/image_captioning_service.py +36 -1

app.py CHANGED Viewed

@@ -1,41 +1,59 @@
-def setup_nltk():
-    """Set up NLTK data directory and ensure punkt tokenizer is available"""
-    logger.info("Setting up NLTK...")
-    # Create potential NLTK data directories with proper permissions
-    nltk_dirs = [
-        os.path.expanduser('~/.nltk_data'),
-        './nltk_data',
-        '/usr/local/share/nltk_data'
-    ]
-    for directory in nltk_dirs:
-        try:
-            os.makedirs(directory, exist_ok=True)
-            logger.info(f"Created NLTK data directory: {directory}")
-        except Exception as e:
-            logger.warning(f"Could not create NLTK directory {directory}: {e}")
-    # Try to find punkt tokenizer
     try:
-        nltk.data.find('tokenizers/punkt')
-        logger.info("NLTK punkt tokenizer found!")
-        return
-    except LookupError:
-        # Not found, try to download to different locations
         for directory in nltk_dirs:
             try:
-                logger.info(f"Attempting to download punkt tokenizer to {directory}")
-                nltk.download('punkt', download_dir=directory)
-                logger.info(f"Successfully downloaded punkt tokenizer to {directory}")
-                return
             except Exception as e:
-                logger.warning(f"Failed to download punkt to {directory}: {e}")
-        # If we get here, we couldn't download punkt anywhere
-        logger.error("Could not download NLTK punkt tokenizer to any location")
-        logger.error("The application may not function correctly")
-"""
 Main application entry point for Image Captioning API
 """
 import os
@@ -106,6 +124,9 @@ if __name__ == "__main__":
     # Setup NLTK
     setup_nltk()
     # Ensure model files exist
     ensure_models_exist()

+def register_vocabulary_in_main():
+    """Register the Vocabulary class in __main__ to help with unpickling"""
     try:
+        logger.info("Registering Vocabulary class in __main__ module")
+        import sys
+        import __main__
+        from app.image_captioning_service import Vocabulary, ImageCaptioningModel, EncoderCNN, TransformerDecoder, PositionalEncoding
+        # Register classes in main module
+        setattr(__main__, 'Vocabulary', Vocabulary)
+        setattr(__main__, 'ImageCaptioningModel', ImageCaptioningModel)
+        setattr(__main__, 'EncoderCNN', EncoderCNN)
+        setattr(__main__, 'TransformerDecoder', TransformerDecoder)
+        setattr(__main__, 'PositionalEncoding', PositionalEncoding)
+        logger.info("Successfully registered classes in __main__")
+    except Exception as e:
+        logger.warning(f"Could not register classes in __main__: {e}")
+    def setup_nltk():
+        """Set up NLTK data directory and ensure punkt tokenizer is available"""
+        logger.info("Setting up NLTK...")
+        # Create potential NLTK data directories with proper permissions
+        nltk_dirs = [
+            os.path.expanduser('~/.nltk_data'),
+            './nltk_data',
+            '/usr/local/share/nltk_data'
+        ]
         for directory in nltk_dirs:
             try:
+                os.makedirs(directory, exist_ok=True)
+                logger.info(f"Created NLTK data directory: {directory}")
             except Exception as e:
+                logger.warning(f"Could not create NLTK directory {directory}: {e}")
+        # Try to find punkt tokenizer
+        try:
+            nltk.data.find('tokenizers/punkt')
+            logger.info("NLTK punkt tokenizer found!")
+            return
+        except LookupError:
+            # Not found, try to download to different locations
+            for directory in nltk_dirs:
+                try:
+                    logger.info(f"Attempting to download punkt tokenizer to {directory}")
+                    nltk.download('punkt', download_dir=directory)
+                    logger.info(f"Successfully downloaded punkt tokenizer to {directory}")
+                    return
+                except Exception as e:
+                    logger.warning(f"Failed to download punkt to {directory}: {e}")
+            # If we get here, we couldn't download punkt anywhere
+            logger.error("Could not download NLTK punkt tokenizer to any location")
+            logger.error("The application may not function correctly")
+        """
 Main application entry point for Image Captioning API
 """
 import os
     # Setup NLTK
     setup_nltk()
+    # Register Vocabulary in main module
+    register_vocabulary_in_main()
     # Ensure model files exist
     ensure_models_exist()

app/api.py CHANGED Viewed

@@ -9,7 +9,15 @@ from typing import Dict, Any
 import torch
 # Import image captioning service
-from app.image_captioning_service import generate_caption
 # Configure logging
 logging.basicConfig(level=logging.INFO)

 import torch
 # Import image captioning service
+from app.image_captioning_service import generate_caption, Vocabulary, ImageCaptioningModel, EncoderCNN, TransformerDecoder, PositionalEncoding
+# Register these classes in the main module to help with unpickling
+import __main__
+setattr(__main__, 'Vocabulary', Vocabulary)
+setattr(__main__, 'ImageCaptioningModel', ImageCaptioningModel)
+setattr(__main__, 'EncoderCNN', EncoderCNN)
+setattr(__main__, 'TransformerDecoder', TransformerDecoder)
+setattr(__main__, 'PositionalEncoding', PositionalEncoding)
 # Configure logging
 logging.basicConfig(level=logging.INFO)

app/image_captioning_service.py CHANGED Viewed

@@ -325,6 +325,8 @@ class ImageCaptioningModel(torch.nn.Module):
             return ' '.join(words)
 def load_image(image_path, transform=None):
     """Load and preprocess an image"""
     image = Image.open(image_path).convert('RGB')
@@ -396,7 +398,40 @@ def generate_caption(
     # Load model weights
     logger.info(f"Loading model weights from {model_path}")
     try:
-        checkpoint = torch.load(model_path, map_location=device)
         model.load_state_dict(checkpoint['model_state_dict'])
         model.eval()
         logger.info("Model loaded successfully")

             return ' '.join(words)
 def load_image(image_path, transform=None):
     """Load and preprocess an image"""
     image = Image.open(image_path).convert('RGB')
     # Load model weights
     logger.info(f"Loading model weights from {model_path}")
     try:
+        # First try our custom loader
+        try:
+            logger.info("Trying custom model loader...")
+            # Replace this with Python's built-in pickle that we can customize
+            # Define a custom unpickler
+            class CustomUnpickler(pickle.Unpickler):
+                def find_class(self, module, name):
+                    # If it's looking for the Vocabulary class in __main__
+                    if name == 'Vocabulary':
+                        # Return our current Vocabulary class
+                        return Vocabulary
+                    if module == '__main__':
+                        if name == 'ImageCaptioningModel':
+                            return ImageCaptioningModel
+                        if name == 'EncoderCNN':
+                            return EncoderCNN
+                        if name == 'TransformerDecoder':
+                            return TransformerDecoder
+                        if name == 'PositionalEncoding':
+                            return PositionalEncoding
+                    # Use the normal behavior for everything else
+                    return super().find_class(module, name)
+            # Use a custom loading approach
+            with open(model_path, 'rb') as f:
+                checkpoint = CustomUnpickler(f).load()
+            logger.info("Successfully loaded model using custom unpickler")
+        except Exception as e:
+            logger.warning(f"Custom loader failed: {str(e)}")
+            logger.info("Falling back to standard torch.load...")
+            # Fall back to standard loader
+            checkpoint = torch.load(model_path, map_location=device)
         model.load_state_dict(checkpoint['model_state_dict'])
         model.eval()
         logger.info("Model loaded successfully")