File size: 4,987 Bytes
9f11f00
 
91a5e40
9f11f00
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7db49ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91a5e40
 
7db49ff
 
 
 
91a5e40
7db49ff
91a5e40
7db49ff
 
 
9f11f00
ce43c34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91a5e40
a0c5c81
 
 
 
 
 
 
 
 
 
 
 
 
ce43c34
 
 
91a5e40
 
 
9f11f00
 
 
a0c5c81
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
def register_vocabulary_in_main():
    """Register the Vocabulary class in __main__ to help with unpickling"""
    try:
        logger.info("Registering Vocabulary class in __main__ module")
        import sys
        import __main__
        from app.image_captioning_service import Vocabulary, ImageCaptioningModel, EncoderCNN, TransformerDecoder, PositionalEncoding
        
        # Register classes in main module
        setattr(__main__, 'Vocabulary', Vocabulary)
        setattr(__main__, 'ImageCaptioningModel', ImageCaptioningModel)
        setattr(__main__, 'EncoderCNN', EncoderCNN)
        setattr(__main__, 'TransformerDecoder', TransformerDecoder)
        setattr(__main__, 'PositionalEncoding', PositionalEncoding)
        
        logger.info("Successfully registered classes in __main__")
    except Exception as e:
        logger.warning(f"Could not register classes in __main__: {e}")
def setup_nltk():
    """Set up NLTK data directory and ensure punkt tokenizer is available"""
    logger.info("Setting up NLTK...")
    
    # Create potential NLTK data directories with proper permissions
    nltk_dirs = [
        os.path.expanduser('~/.nltk_data'),
        './nltk_data',
        '/usr/local/share/nltk_data'
    ]
    
    for directory in nltk_dirs:
        try:
            os.makedirs(directory, exist_ok=True)
            logger.info(f"Created NLTK data directory: {directory}")
        except Exception as e:
            logger.warning(f"Could not create NLTK directory {directory}: {e}")
    
    # Try to find punkt tokenizer
    try:
        nltk.data.find('tokenizers/punkt')
        logger.info("NLTK punkt tokenizer found!")
        return
    except LookupError:
        # Not found, try to download to different locations
        for directory in nltk_dirs:
            try:
                logger.info(f"Attempting to download punkt tokenizer to {directory}")
                nltk.download('punkt', download_dir=directory)
                logger.info(f"Successfully downloaded punkt tokenizer to {directory}")
                return
            except Exception as e:
                logger.warning(f"Failed to download punkt to {directory}: {e}")
        
        # If we get here, we couldn't download punkt anywhere
        logger.error("Could not download NLTK punkt tokenizer to any location")
        logger.error("The application may not function correctly")
        """
Main application entry point for Image Captioning API
"""
import os
import sys
import logging
import nltk

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Setup NLTK data path
def setup_cache_directories():
    """Create and set up cache directories for PyTorch and other libraries"""
    cache_dirs = [
        '/.cache',
        '/root/.cache',
        '/root/.cache/torch', 
        '/home/.cache',
        '/home/.cache/torch',
        '/tmp/.cache',
        '/tmp/.cache/torch'
    ]
    
    for directory in cache_dirs:
        try:
            os.makedirs(directory, exist_ok=True)
            # Try to set permissions
            try:
                os.chmod(directory, 0o777)
                logger.info(f"Created cache directory with permissions: {directory}")
            except Exception as e:
                logger.warning(f"Could not set permissions for {directory}: {e}")
        except Exception as e:
            logger.warning(f"Could not create cache directory {directory}: {e}")
    
    # Try setting environment variables for torch home
    for cache_dir in ['/home/.cache/torch', '/tmp/.cache/torch', './torch_cache']:
        try:
            os.makedirs(cache_dir, exist_ok=True)
            os.environ['TORCH_HOME'] = cache_dir
            logger.info(f"Set TORCH_HOME to {cache_dir}")
            break
        except Exception as e:
            logger.warning(f"Could not use {cache_dir} as TORCH_HOME: {e}")
    
    logger.info(f"TORCH_HOME is set to: {os.environ.get('TORCH_HOME', 'Not set')}")

# Check if model files exist and download if needed
def ensure_models_exist():
    model_path = "app/models/image_captioning_model.pth"
    vocab_path = "app/models/vocab.pkl"
    
    if not os.path.exists(model_path) or not os.path.exists(vocab_path):
        logger.info("Model files not found. Downloading...")
        from app.download_model import download_models
        download_models()
    else:
        logger.info("Model files found.")

if __name__ == "__main__":
    # Setup cache directories
    setup_cache_directories()
    
    # Setup NLTK
    setup_nltk()
    
    # Register Vocabulary in main module
    register_vocabulary_in_main()
    
    # Ensure model files exist
    ensure_models_exist()
    
    # Run the FastAPI application
    import uvicorn
    from app.api import app
    
    logger.info("Starting Image Captioning API server...")
    uvicorn.run(app, host="0.0.0.0", port=7860)