Soha85 commited on
Commit
a797b94
·
verified ·
1 Parent(s): 9383a15
Files changed (2) hide show
  1. Dockerfile +2 -8
  2. rag_engine.py +5 -4
Dockerfile CHANGED
@@ -12,14 +12,8 @@ COPY requirements.txt .
12
  RUN pip install --no-cache-dir -r requirements.txt
13
 
14
  # download NLTK data **after** nltk is installed
15
- RUN python - <<EOF
16
- import nltk
17
- nltk.download('punkt', download_dir='/usr/local/nltk_data')
18
- nltk.download('punkt_tab', download_dir='/usr/local/nltk_data')
19
- nltk.download('averaged_perceptron_tagger', download_dir='/usr/local/nltk_data')
20
- nltk.download('wordnet', download_dir='/usr/local/nltk_data')
21
- EOF
22
- ENV NLTK_DATA=/usr/local/nltk_data
23
 
24
  # copy application code
25
  COPY app.py rag_engine.py ./
 
12
  RUN pip install --no-cache-dir -r requirements.txt
13
 
14
  # download NLTK data **after** nltk is installed
15
+ COPY nltk_data /app/nltk_data
16
+ ENV NLTK_DATA=/app/nltk_data
 
 
 
 
 
 
17
 
18
  # copy application code
19
  COPY app.py rag_engine.py ./
rag_engine.py CHANGED
@@ -10,10 +10,11 @@ from nltk.translate.bleu_score import sentence_bleu
10
  from nltk.tokenize import word_tokenize
11
  from rouge_score import rouge_scorer
12
  import nltk
13
- try:
14
- nltk.data.find("tokenizers/punkt")
15
- except LookupError:
16
- raise RuntimeError("NLTK punkt tokenizer not found inside container. Fix Dockerfile.")
 
17
 
18
  DEFAULT_LLMS = {
19
  "gpt2": "gpt2",
 
10
  from nltk.tokenize import word_tokenize
11
  from rouge_score import rouge_scorer
12
  import nltk
13
+ import os
14
+
15
+ # Force NLTK to use the bundled data
16
+ NLTK_LOCAL = os.path.join(os.path.dirname(__file__), "nltk_data")
17
+ nltk.data.path.insert(0, NLTK_LOCAL)
18
 
19
  DEFAULT_LLMS = {
20
  "gpt2": "gpt2",