Update copy_of_hw1.py
Browse files- copy_of_hw1.py +10 -1
copy_of_hw1.py
CHANGED
|
@@ -230,7 +230,6 @@ class BM25Index(InvertedIndex):
|
|
| 230 |
)
|
| 231 |
return index
|
| 232 |
|
| 233 |
-
|
| 234 |
from nlp4web_codebase.ir.models import BaseRetriever
|
| 235 |
from typing import Type
|
| 236 |
from abc import abstractmethod
|
|
@@ -293,3 +292,13 @@ class BM25Retriever(BaseInvertedIndexRetriever):
|
|
| 293 |
@property
|
| 294 |
def index_class(self) -> Type[BM25Index]:
|
| 295 |
return BM25Index
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
)
|
| 231 |
return index
|
| 232 |
|
|
|
|
| 233 |
from nlp4web_codebase.ir.models import BaseRetriever
|
| 234 |
from typing import Type
|
| 235 |
from abc import abstractmethod
|
|
|
|
| 292 |
@property
|
| 293 |
def index_class(self) -> Type[BM25Index]:
|
| 294 |
return BM25Index
|
| 295 |
+
|
| 296 |
+
best_b, best_k1 = 0.8, 0.5
|
| 297 |
+
bm25_index = BM25Index.build_from_documents(
|
| 298 |
+
documents=iter(sciq.corpus),
|
| 299 |
+
ndocs=12160,
|
| 300 |
+
show_progress_bar=True,
|
| 301 |
+
k1=best_k1,
|
| 302 |
+
b=best_b
|
| 303 |
+
)
|
| 304 |
+
bm25_index.save("output/bm25_index")
|