dixisouls commited on
Commit
91a5e40
·
1 Parent(s): 77684e5

nltk error

Browse files
Files changed (4) hide show
  1. Dockerfile +8 -2
  2. app.py +44 -0
  3. app/fix_vocab_pickle.py +14 -1
  4. app/image_captioning_service.py +14 -1
Dockerfile CHANGED
@@ -22,8 +22,14 @@ RUN mkdir -p app/models && chmod 777 app/models
22
  COPY app ./app
23
  COPY app.py .
24
 
25
- # Download NLTK data
26
- RUN python -c "import nltk; nltk.download('punkt')"
 
 
 
 
 
 
27
 
28
  # Download model files during build
29
  RUN python -m app.download_model
 
22
  COPY app ./app
23
  COPY app.py .
24
 
25
+ # Create NLTK data directory with proper permissions
26
+ RUN mkdir -p /usr/local/share/nltk_data && chmod 777 /usr/local/share/nltk_data
27
+
28
+ # Set NLTK_DATA environment variable
29
+ ENV NLTK_DATA=/usr/local/share/nltk_data
30
+
31
+ # Download NLTK data with explicit directory
32
+ RUN python -c "import nltk; nltk.download('punkt', download_dir='/usr/local/share/nltk_data')"
33
 
34
  # Download model files during build
35
  RUN python -m app.download_model
app.py CHANGED
@@ -2,7 +2,9 @@
2
  Main application entry point for Image Captioning API
3
  """
4
  import os
 
5
  import logging
 
6
 
7
  # Configure logging
8
  logging.basicConfig(
@@ -11,6 +13,45 @@ logging.basicConfig(
11
  )
12
  logger = logging.getLogger(__name__)
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # Check if model files exist and download if needed
15
  def ensure_models_exist():
16
  model_path = "app/models/image_captioning_model.pth"
@@ -24,6 +65,9 @@ def ensure_models_exist():
24
  logger.info("Model files found.")
25
 
26
  if __name__ == "__main__":
 
 
 
27
  # Ensure model files exist
28
  ensure_models_exist()
29
 
 
2
  Main application entry point for Image Captioning API
3
  """
4
  import os
5
+ import sys
6
  import logging
7
+ import nltk
8
 
9
  # Configure logging
10
  logging.basicConfig(
 
13
  )
14
  logger = logging.getLogger(__name__)
15
 
16
+ # Setup NLTK data path
17
+ def setup_nltk():
18
+ """Set up NLTK data directory and ensure punkt tokenizer is available"""
19
+ logger.info("Setting up NLTK...")
20
+
21
+ # Create potential NLTK data directories with proper permissions
22
+ nltk_dirs = [
23
+ os.path.expanduser('~/.nltk_data'),
24
+ './nltk_data',
25
+ '/usr/local/share/nltk_data'
26
+ ]
27
+
28
+ for directory in nltk_dirs:
29
+ try:
30
+ os.makedirs(directory, exist_ok=True)
31
+ logger.info(f"Created NLTK data directory: {directory}")
32
+ except Exception as e:
33
+ logger.warning(f"Could not create NLTK directory {directory}: {e}")
34
+
35
+ # Try to find punkt tokenizer
36
+ try:
37
+ nltk.data.find('tokenizers/punkt')
38
+ logger.info("NLTK punkt tokenizer found!")
39
+ return
40
+ except LookupError:
41
+ # Not found, try to download to different locations
42
+ for directory in nltk_dirs:
43
+ try:
44
+ logger.info(f"Attempting to download punkt tokenizer to {directory}")
45
+ nltk.download('punkt', download_dir=directory)
46
+ logger.info(f"Successfully downloaded punkt tokenizer to {directory}")
47
+ return
48
+ except Exception as e:
49
+ logger.warning(f"Failed to download punkt to {directory}: {e}")
50
+
51
+ # If we get here, we couldn't download punkt anywhere
52
+ logger.error("Could not download NLTK punkt tokenizer to any location")
53
+ logger.error("The application may not function correctly")
54
+
55
  # Check if model files exist and download if needed
56
  def ensure_models_exist():
57
  model_path = "app/models/image_captioning_model.pth"
 
65
  logger.info("Model files found.")
66
 
67
  if __name__ == "__main__":
68
+ # Setup NLTK
69
+ setup_nltk()
70
+
71
  # Ensure model files exist
72
  ensure_models_exist()
73
 
app/fix_vocab_pickle.py CHANGED
@@ -17,7 +17,20 @@ logger = logging.getLogger(__name__)
17
  try:
18
  nltk.data.find('tokenizers/punkt')
19
  except LookupError:
20
- nltk.download('punkt')
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  # Vocabulary class for loading the vocabulary
23
  class Vocabulary:
 
17
  try:
18
  nltk.data.find('tokenizers/punkt')
19
  except LookupError:
20
+ # Try to download to a directory where we have write permissions
21
+ try:
22
+ # Try user home directory first
23
+ nltk.download('punkt', download_dir=os.path.expanduser('~/.nltk_data'))
24
+ logger.info("Downloaded NLTK punkt to user home directory")
25
+ except:
26
+ # Then try current directory
27
+ try:
28
+ os.makedirs('./nltk_data', exist_ok=True)
29
+ nltk.download('punkt', download_dir='./nltk_data')
30
+ logger.info("Downloaded NLTK punkt to current directory")
31
+ except Exception as e:
32
+ logger.error(f"Failed to download NLTK punkt: {str(e)}")
33
+ # Continue anyway, as we might have the data elsewhere
34
 
35
  # Vocabulary class for loading the vocabulary
36
  class Vocabulary:
app/image_captioning_service.py CHANGED
@@ -16,7 +16,20 @@ logger = logging.getLogger(__name__)
16
  try:
17
  nltk.data.find('tokenizers/punkt')
18
  except LookupError:
19
- nltk.download('punkt')
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  # Vocabulary class for loading the vocabulary
22
  class Vocabulary:
 
16
  try:
17
  nltk.data.find('tokenizers/punkt')
18
  except LookupError:
19
+ # Try to download to a directory where we have write permissions
20
+ try:
21
+ # Try user home directory first
22
+ nltk.download('punkt', download_dir=os.path.expanduser('~/.nltk_data'))
23
+ logger.info("Downloaded NLTK punkt to user home directory")
24
+ except:
25
+ # Then try current directory
26
+ try:
27
+ os.makedirs('./nltk_data', exist_ok=True)
28
+ nltk.download('punkt', download_dir='./nltk_data')
29
+ logger.info("Downloaded NLTK punkt to current directory")
30
+ except Exception as e:
31
+ logger.error(f"Failed to download NLTK punkt: {str(e)}")
32
+ # Continue anyway, as we might have the data elsewhere
33
 
34
  # Vocabulary class for loading the vocabulary
35
  class Vocabulary: