Shymaa2611 commited on
Commit ·
4eafc46
1
Parent(s): ad42335
update
Browse files- Dockerfile +3 -1
- inference.py +7 -13
Dockerfile
CHANGED
|
@@ -4,7 +4,9 @@ WORKDIR /app
|
|
| 4 |
COPY requirements.txt .
|
| 5 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 6 |
RUN mkdir -p /app/cache /app/nltk_data
|
| 7 |
-
|
|
|
|
|
|
|
| 8 |
ENV HF_HOME=/app/cache
|
| 9 |
ENV NLTK_DATA=/app/nltk_data
|
| 10 |
RUN chmod -R 777 /app/cache /app/nltk_data
|
|
|
|
| 4 |
COPY requirements.txt .
|
| 5 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 6 |
RUN mkdir -p /app/cache /app/nltk_data
|
| 7 |
+
RUN mkdir -p /app/ChatbotCheckpoint
|
| 8 |
+
RUN chmod 777 /app/ChatbotCheckpoint
|
| 9 |
+
ENV CHECKPOINT_DIR=/app/ChatbotCheckpoint
|
| 10 |
ENV HF_HOME=/app/cache
|
| 11 |
ENV NLTK_DATA=/app/nltk_data
|
| 12 |
RUN chmod -R 777 /app/cache /app/nltk_data
|
inference.py
CHANGED
|
@@ -3,22 +3,16 @@ from dataset import clean
|
|
| 3 |
import re
|
| 4 |
import gdown
|
| 5 |
|
| 6 |
-
import os
|
| 7 |
-
import gdown
|
| 8 |
-
|
| 9 |
def load_tokenizer_model():
|
| 10 |
-
cache_dir
|
| 11 |
folder_url = "https://drive.google.com/drive/folders/1DDJ9t-HfMrf6OLYim5bVrP20QgyOZahc"
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
model_name = cache_dir # Update to the cache directory
|
| 16 |
-
model = GPT2LMHeadModel.from_pretrained(model_name, cache_dir=cache_dir)
|
| 17 |
model.eval()
|
| 18 |
-
tokenizer
|
| 19 |
-
tokenizer.pad_token
|
| 20 |
-
return tokenizer,
|
| 21 |
-
|
| 22 |
|
| 23 |
def generate_answer(query):
|
| 24 |
tokenizer,model=load_tokenizer_model()
|
|
|
|
| 3 |
import re
|
| 4 |
import gdown
|
| 5 |
|
|
|
|
|
|
|
|
|
|
| 6 |
def load_tokenizer_model():
|
| 7 |
+
cache_dir="ChatbotCheckpoint"
|
| 8 |
folder_url = "https://drive.google.com/drive/folders/1DDJ9t-HfMrf6OLYim5bVrP20QgyOZahc"
|
| 9 |
+
gdown.download_folder(folder_url, output="ChatbotCheckpoint")
|
| 10 |
+
model_name="ChatbotCheckpoint"
|
| 11 |
+
model = GPT2LMHeadModel.from_pretrained(model_name,cache_dir=cache_dir)
|
|
|
|
|
|
|
| 12 |
model.eval()
|
| 13 |
+
tokenizer=GPT2Tokenizer.from_pretrained(model_name)
|
| 14 |
+
tokenizer.pad_token=tokenizer.eos_token
|
| 15 |
+
return tokenizer,model
|
|
|
|
| 16 |
|
| 17 |
def generate_answer(query):
|
| 18 |
tokenizer,model=load_tokenizer_model()
|