Shymaa2611 commited on
Commit
4eafc46
·
1 Parent(s): ad42335
Files changed (2) hide show
  1. Dockerfile +3 -1
  2. inference.py +7 -13
Dockerfile CHANGED
@@ -4,7 +4,9 @@ WORKDIR /app
4
  COPY requirements.txt .
5
  RUN pip install --no-cache-dir -r requirements.txt
6
  RUN mkdir -p /app/cache /app/nltk_data
7
-
 
 
8
  ENV HF_HOME=/app/cache
9
  ENV NLTK_DATA=/app/nltk_data
10
  RUN chmod -R 777 /app/cache /app/nltk_data
 
4
  COPY requirements.txt .
5
  RUN pip install --no-cache-dir -r requirements.txt
6
  RUN mkdir -p /app/cache /app/nltk_data
7
+ RUN mkdir -p /app/ChatbotCheckpoint
8
+ RUN chmod 777 /app/ChatbotCheckpoint
9
+ ENV CHECKPOINT_DIR=/app/ChatbotCheckpoint
10
  ENV HF_HOME=/app/cache
11
  ENV NLTK_DATA=/app/nltk_data
12
  RUN chmod -R 777 /app/cache /app/nltk_data
inference.py CHANGED
@@ -3,22 +3,16 @@ from dataset import clean
3
  import re
4
  import gdown
5
 
6
- import os
7
- import gdown
8
-
9
  def load_tokenizer_model():
10
- cache_dir = "/tmp/ChatbotCheckpoint"
11
  folder_url = "https://drive.google.com/drive/folders/1DDJ9t-HfMrf6OLYim5bVrP20QgyOZahc"
12
- os.makedirs(cache_dir, exist_ok=True)
13
-
14
- gdown.download_folder(folder_url, output=cache_dir)
15
- model_name = cache_dir # Update to the cache directory
16
- model = GPT2LMHeadModel.from_pretrained(model_name, cache_dir=cache_dir)
17
  model.eval()
18
- tokenizer = GPT2Tokenizer.from_pretrained(model_name)
19
- tokenizer.pad_token = tokenizer.eos_token
20
- return tokenizer, model
21
-
22
 
23
  def generate_answer(query):
24
  tokenizer,model=load_tokenizer_model()
 
3
  import re
4
  import gdown
5
 
 
 
 
6
  def load_tokenizer_model():
7
+ cache_dir="ChatbotCheckpoint"
8
  folder_url = "https://drive.google.com/drive/folders/1DDJ9t-HfMrf6OLYim5bVrP20QgyOZahc"
9
+ gdown.download_folder(folder_url, output="ChatbotCheckpoint")
10
+ model_name="ChatbotCheckpoint"
11
+ model = GPT2LMHeadModel.from_pretrained(model_name,cache_dir=cache_dir)
 
 
12
  model.eval()
13
+ tokenizer=GPT2Tokenizer.from_pretrained(model_name)
14
+ tokenizer.pad_token=tokenizer.eos_token
15
+ return tokenizer,model
 
16
 
17
  def generate_answer(query):
18
  tokenizer,model=load_tokenizer_model()