no message
Browse files- model/retriever.py +3 -3
model/retriever.py
CHANGED
|
@@ -23,9 +23,9 @@ class Retriever:
|
|
| 23 |
|
| 24 |
def compute_embeddings(self):
|
| 25 |
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
self.chunk_embeddings = self.model.encode(self.corpus, convert_to_tensor=True)
|
| 30 |
|
| 31 |
def chunk_text(self, text, chunk_size=CONFIG['CHUNK_SIZE']):
|
|
|
|
| 23 |
|
| 24 |
def compute_embeddings(self):
|
| 25 |
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 26 |
+
tokenizer = self.model._first_module().tokenizer
|
| 27 |
+
if tokenizer.pad_token is None:
|
| 28 |
+
tokenizer.pad_token = tokenizer.eos_token
|
| 29 |
self.chunk_embeddings = self.model.encode(self.corpus, convert_to_tensor=True)
|
| 30 |
|
| 31 |
def chunk_text(self, text, chunk_size=CONFIG['CHUNK_SIZE']):
|