Spaces:

dixisouls
/

image-captioning-api

Running

App Files Files Community

dixisouls commited on Mar 18, 2025

Commit

ce43c34

1 Parent(s): 91a5e40

new file

Browse files

Files changed (4) hide show

Dockerfile +10 -4
app.py +54 -16
app/download_resnet.py +42 -0
app/image_captioning_service.py +70 -10

Dockerfile CHANGED Viewed

@@ -22,11 +22,14 @@ RUN mkdir -p app/models && chmod 777 app/models
 COPY app ./app
 COPY app.py .
-# Create NLTK data directory with proper permissions
-RUN mkdir -p /usr/local/share/nltk_data && chmod 777 /usr/local/share/nltk_data
-# Set NLTK_DATA environment variable
-ENV NLTK_DATA=/usr/local/share/nltk_data
 # Download NLTK data with explicit directory
 RUN python -c "import nltk; nltk.download('punkt', download_dir='/usr/local/share/nltk_data')"
@@ -34,6 +37,9 @@ RUN python -c "import nltk; nltk.download('punkt', download_dir='/usr/local/shar
 # Download model files during build
 RUN python -m app.download_model
 # Expose port
 EXPOSE 7860

 COPY app ./app
 COPY app.py .
+# Create cache directories with proper permissions
+RUN mkdir -p /.cache && chmod 777 /.cache
+RUN mkdir -p /root/.cache/torch && chmod -R 777 /root/.cache
+RUN mkdir -p /home/.cache/torch && chmod -R 777 /home/.cache
+# Set PyTorch cache environment variable
+ENV TORCH_HOME=/home/.cache/torch
+ENV TRANSFORMERS_CACHE=/home/.cache/transformers
 # Download NLTK data with explicit directory
 RUN python -c "import nltk; nltk.download('punkt', download_dir='/usr/local/share/nltk_data')"
 # Download model files during build
 RUN python -m app.download_model
+# Download ResNet50 model to avoid permission issues at runtime
+RUN python -m app.download_resnet
 # Expose port
 EXPOSE 7860

app.py CHANGED Viewed

@@ -1,19 +1,3 @@
-"""
-Main application entry point for Image Captioning API
-"""
-import os
-import sys
-import logging
-import nltk
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-)
-logger = logging.getLogger(__name__)
-# Setup NLTK data path
 def setup_nltk():
     """Set up NLTK data directory and ensure punkt tokenizer is available"""
     logger.info("Setting up NLTK...")
@@ -51,6 +35,57 @@ def setup_nltk():
         # If we get here, we couldn't download punkt anywhere
         logger.error("Could not download NLTK punkt tokenizer to any location")
         logger.error("The application may not function correctly")
 # Check if model files exist and download if needed
 def ensure_models_exist():
@@ -65,6 +100,9 @@ def ensure_models_exist():
         logger.info("Model files found.")
 if __name__ == "__main__":
     # Setup NLTK
     setup_nltk()

 def setup_nltk():
     """Set up NLTK data directory and ensure punkt tokenizer is available"""
     logger.info("Setting up NLTK...")
         # If we get here, we couldn't download punkt anywhere
         logger.error("Could not download NLTK punkt tokenizer to any location")
         logger.error("The application may not function correctly")
+"""
+Main application entry point for Image Captioning API
+"""
+import os
+import sys
+import logging
+import nltk
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# Setup NLTK data path
+def setup_cache_directories():
+    """Create and set up cache directories for PyTorch and other libraries"""
+    cache_dirs = [
+        '/.cache',
+        '/root/.cache',
+        '/root/.cache/torch',
+        '/home/.cache',
+        '/home/.cache/torch',
+        '/tmp/.cache',
+        '/tmp/.cache/torch'
+    ]
+    for directory in cache_dirs:
+        try:
+            os.makedirs(directory, exist_ok=True)
+            # Try to set permissions
+            try:
+                os.chmod(directory, 0o777)
+                logger.info(f"Created cache directory with permissions: {directory}")
+            except Exception as e:
+                logger.warning(f"Could not set permissions for {directory}: {e}")
+        except Exception as e:
+            logger.warning(f"Could not create cache directory {directory}: {e}")
+    # Try setting environment variables for torch home
+    for cache_dir in ['/home/.cache/torch', '/tmp/.cache/torch', './torch_cache']:
+        try:
+            os.makedirs(cache_dir, exist_ok=True)
+            os.environ['TORCH_HOME'] = cache_dir
+            logger.info(f"Set TORCH_HOME to {cache_dir}")
+            break
+        except Exception as e:
+            logger.warning(f"Could not use {cache_dir} as TORCH_HOME: {e}")
+    logger.info(f"TORCH_HOME is set to: {os.environ.get('TORCH_HOME', 'Not set')}")
 # Check if model files exist and download if needed
 def ensure_models_exist():
         logger.info("Model files found.")
 if __name__ == "__main__":
+    # Setup cache directories
+    setup_cache_directories()
     # Setup NLTK
     setup_nltk()

app/download_resnet.py ADDED Viewed

	@@ -0,0 +1,42 @@

+#!/usr/bin/env python3
+"""
+Script to download ResNet50 model and save it locally to avoid
+permission issues when downloading at runtime.
+"""
+import os
+import torch
+import torchvision.models as models
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def download_resnet():
+    """Download ResNet50 model and save it to app/models/resnet50.pth"""
+    logger.info("Downloading ResNet50 model...")
+    # Create models directory if it doesn't exist
+    os.makedirs("app/models", exist_ok=True)
+    # Create torch cache directory with proper permissions
+    os.makedirs("/tmp/torch_cache", exist_ok=True)
+    os.environ["TORCH_HOME"] = "/tmp/torch_cache"
+    try:
+        # Load the model
+        model = models.resnet50(pretrained=True)
+        # Save the model
+        output_path = "app/models/resnet50.pth"
+        torch.save(model.state_dict(), output_path)
+        logger.info(f"ResNet50 model saved to {output_path}")
+        return True
+    except Exception as e:
+        logger.error(f"Error downloading ResNet50 model: {e}")
+        return False
+if __name__ == "__main__":
+    download_resnet()

app/image_captioning_service.py CHANGED Viewed

@@ -123,7 +123,46 @@ class EncoderCNN(torch.nn.Module):
         super(EncoderCNN, self).__init__()
         # Load pretrained ResNet
         import torchvision.models as models
-        resnet = models.resnet50(pretrained=True)
         # Remove the final FC layer
         modules = list(resnet.children())[:-1]
         self.resnet = torch.nn.Sequential(*modules)
@@ -324,6 +363,14 @@ def generate_caption(
     if not os.path.exists(vocab_path):
         raise FileNotFoundError(f"Vocabulary not found at {vocab_path}")
     # Load vocabulary
     logger.info(f"Loading vocabulary from {vocab_path}")
     vocab = Vocabulary.load(vocab_path)
@@ -348,18 +395,31 @@ def generate_caption(
     # Load model weights
     logger.info(f"Loading model weights from {model_path}")
-    checkpoint = torch.load(model_path, map_location=device)
-    model.load_state_dict(checkpoint['model_state_dict'])
-    model.eval()
     # Load and process image
     logger.info(f"Loading and processing image from {image_path}")
-    image = load_image(image_path)
-    image = image.to(device)
     # Generate caption
     logger.info("Generating caption")
-    caption = model.generate_caption(image, vocab, max_length=max_length)
-    logger.info(f"Generated caption: {caption}")
-    return caption

         super(EncoderCNN, self).__init__()
         # Load pretrained ResNet
         import torchvision.models as models
+        # Try different approaches to load ResNet50
+        resnet = None
+        # Option 1: Try to load the locally saved model
+        try:
+            logger.info("Trying to load locally saved ResNet50 model...")
+            resnet = models.resnet50(pretrained=False)
+            local_model_path = "app/models/resnet50.pth"
+            if os.path.exists(local_model_path):
+                resnet.load_state_dict(torch.load(local_model_path))
+                logger.info("Successfully loaded ResNet50 from local file")
+            else:
+                logger.warning(f"Local ResNet50 model not found at {local_model_path}")
+                # Fall back to pretrained model
+                resnet = None
+        except Exception as e:
+            logger.warning(f"Error loading local ResNet50 model: {str(e)}")
+            resnet = None
+        # Option 2: Try loading with pretrained weights
+        if resnet is None:
+            try:
+                logger.info("Trying to load ResNet50 with pretrained weights...")
+                # Set cache directory
+                os.makedirs('/tmp/torch_cache', exist_ok=True)
+                os.environ['TORCH_HOME'] = '/tmp/torch_cache'
+                resnet = models.resnet50(pretrained=True)
+                logger.info("Successfully loaded pretrained ResNet50 model")
+            except Exception as e:
+                logger.warning(f"Error loading pretrained ResNet50: {str(e)}")
+                resnet = None
+        # Option 3: Fall back to model without pretrained weights
+        if resnet is None:
+            logger.info("Falling back to ResNet50 without pretrained weights...")
+            resnet = models.resnet50(pretrained=False)
+            logger.warning("Using ResNet50 WITHOUT pretrained weights - captions may be less accurate")
         # Remove the final FC layer
         modules = list(resnet.children())[:-1]
         self.resnet = torch.nn.Sequential(*modules)
     if not os.path.exists(vocab_path):
         raise FileNotFoundError(f"Vocabulary not found at {vocab_path}")
+    # Setup temporary cache directory for torch if needed
+    try:
+        os.makedirs('/tmp/torch_cache', exist_ok=True)
+        os.environ['TORCH_HOME'] = '/tmp/torch_cache'
+        logger.info(f"Set TORCH_HOME to /tmp/torch_cache")
+    except Exception as e:
+        logger.warning(f"Could not set up temporary torch cache: {e}")
     # Load vocabulary
     logger.info(f"Loading vocabulary from {vocab_path}")
     vocab = Vocabulary.load(vocab_path)
     # Load model weights
     logger.info(f"Loading model weights from {model_path}")
+    try:
+        checkpoint = torch.load(model_path, map_location=device)
+        model.load_state_dict(checkpoint['model_state_dict'])
+        model.eval()
+        logger.info("Model loaded successfully")
+    except Exception as e:
+        logger.error(f"Error loading model: {str(e)}")
+        raise
     # Load and process image
     logger.info(f"Loading and processing image from {image_path}")
+    try:
+        image = load_image(image_path)
+        image = image.to(device)
+        logger.info("Image processed successfully")
+    except Exception as e:
+        logger.error(f"Error processing image: {str(e)}")
+        raise
     # Generate caption
     logger.info("Generating caption")
+    try:
+        caption = model.generate_caption(image, vocab, max_length=max_length)
+        logger.info(f"Generated caption: {caption}")
+        return caption
+    except Exception as e:
+        logger.error(f"Error generating caption: {str(e)}")
+        raise