Spaces:
Sleeping
Sleeping
nltk error
Browse files- Dockerfile +8 -2
- app.py +44 -0
- app/fix_vocab_pickle.py +14 -1
- app/image_captioning_service.py +14 -1
Dockerfile
CHANGED
|
@@ -22,8 +22,14 @@ RUN mkdir -p app/models && chmod 777 app/models
|
|
| 22 |
COPY app ./app
|
| 23 |
COPY app.py .
|
| 24 |
|
| 25 |
-
#
|
| 26 |
-
RUN
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
# Download model files during build
|
| 29 |
RUN python -m app.download_model
|
|
|
|
| 22 |
COPY app ./app
|
| 23 |
COPY app.py .
|
| 24 |
|
| 25 |
+
# Create NLTK data directory with proper permissions
|
| 26 |
+
RUN mkdir -p /usr/local/share/nltk_data && chmod 777 /usr/local/share/nltk_data
|
| 27 |
+
|
| 28 |
+
# Set NLTK_DATA environment variable
|
| 29 |
+
ENV NLTK_DATA=/usr/local/share/nltk_data
|
| 30 |
+
|
| 31 |
+
# Download NLTK data with explicit directory
|
| 32 |
+
RUN python -c "import nltk; nltk.download('punkt', download_dir='/usr/local/share/nltk_data')"
|
| 33 |
|
| 34 |
# Download model files during build
|
| 35 |
RUN python -m app.download_model
|
app.py
CHANGED
|
@@ -2,7 +2,9 @@
|
|
| 2 |
Main application entry point for Image Captioning API
|
| 3 |
"""
|
| 4 |
import os
|
|
|
|
| 5 |
import logging
|
|
|
|
| 6 |
|
| 7 |
# Configure logging
|
| 8 |
logging.basicConfig(
|
|
@@ -11,6 +13,45 @@ logging.basicConfig(
|
|
| 11 |
)
|
| 12 |
logger = logging.getLogger(__name__)
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
# Check if model files exist and download if needed
|
| 15 |
def ensure_models_exist():
|
| 16 |
model_path = "app/models/image_captioning_model.pth"
|
|
@@ -24,6 +65,9 @@ def ensure_models_exist():
|
|
| 24 |
logger.info("Model files found.")
|
| 25 |
|
| 26 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
| 27 |
# Ensure model files exist
|
| 28 |
ensure_models_exist()
|
| 29 |
|
|
|
|
| 2 |
Main application entry point for Image Captioning API
|
| 3 |
"""
|
| 4 |
import os
|
| 5 |
+
import sys
|
| 6 |
import logging
|
| 7 |
+
import nltk
|
| 8 |
|
| 9 |
# Configure logging
|
| 10 |
logging.basicConfig(
|
|
|
|
| 13 |
)
|
| 14 |
logger = logging.getLogger(__name__)
|
| 15 |
|
| 16 |
+
# Setup NLTK data path
|
| 17 |
+
def setup_nltk():
|
| 18 |
+
"""Set up NLTK data directory and ensure punkt tokenizer is available"""
|
| 19 |
+
logger.info("Setting up NLTK...")
|
| 20 |
+
|
| 21 |
+
# Create potential NLTK data directories with proper permissions
|
| 22 |
+
nltk_dirs = [
|
| 23 |
+
os.path.expanduser('~/.nltk_data'),
|
| 24 |
+
'./nltk_data',
|
| 25 |
+
'/usr/local/share/nltk_data'
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
for directory in nltk_dirs:
|
| 29 |
+
try:
|
| 30 |
+
os.makedirs(directory, exist_ok=True)
|
| 31 |
+
logger.info(f"Created NLTK data directory: {directory}")
|
| 32 |
+
except Exception as e:
|
| 33 |
+
logger.warning(f"Could not create NLTK directory {directory}: {e}")
|
| 34 |
+
|
| 35 |
+
# Try to find punkt tokenizer
|
| 36 |
+
try:
|
| 37 |
+
nltk.data.find('tokenizers/punkt')
|
| 38 |
+
logger.info("NLTK punkt tokenizer found!")
|
| 39 |
+
return
|
| 40 |
+
except LookupError:
|
| 41 |
+
# Not found, try to download to different locations
|
| 42 |
+
for directory in nltk_dirs:
|
| 43 |
+
try:
|
| 44 |
+
logger.info(f"Attempting to download punkt tokenizer to {directory}")
|
| 45 |
+
nltk.download('punkt', download_dir=directory)
|
| 46 |
+
logger.info(f"Successfully downloaded punkt tokenizer to {directory}")
|
| 47 |
+
return
|
| 48 |
+
except Exception as e:
|
| 49 |
+
logger.warning(f"Failed to download punkt to {directory}: {e}")
|
| 50 |
+
|
| 51 |
+
# If we get here, we couldn't download punkt anywhere
|
| 52 |
+
logger.error("Could not download NLTK punkt tokenizer to any location")
|
| 53 |
+
logger.error("The application may not function correctly")
|
| 54 |
+
|
| 55 |
# Check if model files exist and download if needed
|
| 56 |
def ensure_models_exist():
|
| 57 |
model_path = "app/models/image_captioning_model.pth"
|
|
|
|
| 65 |
logger.info("Model files found.")
|
| 66 |
|
| 67 |
if __name__ == "__main__":
|
| 68 |
+
# Setup NLTK
|
| 69 |
+
setup_nltk()
|
| 70 |
+
|
| 71 |
# Ensure model files exist
|
| 72 |
ensure_models_exist()
|
| 73 |
|
app/fix_vocab_pickle.py
CHANGED
|
@@ -17,7 +17,20 @@ logger = logging.getLogger(__name__)
|
|
| 17 |
try:
|
| 18 |
nltk.data.find('tokenizers/punkt')
|
| 19 |
except LookupError:
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
# Vocabulary class for loading the vocabulary
|
| 23 |
class Vocabulary:
|
|
|
|
| 17 |
try:
|
| 18 |
nltk.data.find('tokenizers/punkt')
|
| 19 |
except LookupError:
|
| 20 |
+
# Try to download to a directory where we have write permissions
|
| 21 |
+
try:
|
| 22 |
+
# Try user home directory first
|
| 23 |
+
nltk.download('punkt', download_dir=os.path.expanduser('~/.nltk_data'))
|
| 24 |
+
logger.info("Downloaded NLTK punkt to user home directory")
|
| 25 |
+
except:
|
| 26 |
+
# Then try current directory
|
| 27 |
+
try:
|
| 28 |
+
os.makedirs('./nltk_data', exist_ok=True)
|
| 29 |
+
nltk.download('punkt', download_dir='./nltk_data')
|
| 30 |
+
logger.info("Downloaded NLTK punkt to current directory")
|
| 31 |
+
except Exception as e:
|
| 32 |
+
logger.error(f"Failed to download NLTK punkt: {str(e)}")
|
| 33 |
+
# Continue anyway, as we might have the data elsewhere
|
| 34 |
|
| 35 |
# Vocabulary class for loading the vocabulary
|
| 36 |
class Vocabulary:
|
app/image_captioning_service.py
CHANGED
|
@@ -16,7 +16,20 @@ logger = logging.getLogger(__name__)
|
|
| 16 |
try:
|
| 17 |
nltk.data.find('tokenizers/punkt')
|
| 18 |
except LookupError:
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
# Vocabulary class for loading the vocabulary
|
| 22 |
class Vocabulary:
|
|
|
|
| 16 |
try:
|
| 17 |
nltk.data.find('tokenizers/punkt')
|
| 18 |
except LookupError:
|
| 19 |
+
# Try to download to a directory where we have write permissions
|
| 20 |
+
try:
|
| 21 |
+
# Try user home directory first
|
| 22 |
+
nltk.download('punkt', download_dir=os.path.expanduser('~/.nltk_data'))
|
| 23 |
+
logger.info("Downloaded NLTK punkt to user home directory")
|
| 24 |
+
except:
|
| 25 |
+
# Then try current directory
|
| 26 |
+
try:
|
| 27 |
+
os.makedirs('./nltk_data', exist_ok=True)
|
| 28 |
+
nltk.download('punkt', download_dir='./nltk_data')
|
| 29 |
+
logger.info("Downloaded NLTK punkt to current directory")
|
| 30 |
+
except Exception as e:
|
| 31 |
+
logger.error(f"Failed to download NLTK punkt: {str(e)}")
|
| 32 |
+
# Continue anyway, as we might have the data elsewhere
|
| 33 |
|
| 34 |
# Vocabulary class for loading the vocabulary
|
| 35 |
class Vocabulary:
|