Update BM25S model
Browse files- README.md +3 -3
- corpus.jsonl +2 -2
- corpus.mmindex.json +0 -0
- data.csc.index.npy +2 -2
- indices.csc.index.npy +2 -2
- indptr.csc.index.npy +2 -2
- params.index.json +1 -1
- vocab.index.json +2 -2
README.md
CHANGED
|
@@ -123,9 +123,9 @@ This dataset was created using the following data:
|
|
| 123 |
|
| 124 |
| Statistic | Value |
|
| 125 |
| --- | --- |
|
| 126 |
-
| Number of documents |
|
| 127 |
-
| Number of tokens |
|
| 128 |
-
| Average tokens per document | 10.
|
| 129 |
|
| 130 |
## Parameters
|
| 131 |
|
|
|
|
| 123 |
|
| 124 |
| Statistic | Value |
|
| 125 |
| --- | --- |
|
| 126 |
+
| Number of documents | 712943 |
|
| 127 |
+
| Number of tokens | 7391911 |
|
| 128 |
+
| Average tokens per document | 10.37 |
|
| 129 |
|
| 130 |
## Parameters
|
| 131 |
|
corpus.jsonl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5869a1f3263444bb6292d30075a29405e88740a06255b9e312569b4339001230
|
| 3 |
+
size 69920920
|
corpus.mmindex.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data.csc.index.npy
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70756fa753a7c110e506386956b0f84358d2b123175f8b2b2459b8c255d86f0a
|
| 3 |
+
size 29567772
|
indices.csc.index.npy
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9d24996451232d9b3f3a3d57eeba29f73ccfef68e3116e6b74c8626bb140d92
|
| 3 |
+
size 29567772
|
indptr.csc.index.npy
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fcb12500c5b017e78b2f608ddc910f48d876bdeeee3fabad7ebe47262e5e89c8
|
| 3 |
+
size 553400
|
params.index.json
CHANGED
|
@@ -6,7 +6,7 @@
|
|
| 6 |
"idf_method": "lucene",
|
| 7 |
"dtype": "float32",
|
| 8 |
"int_dtype": "int32",
|
| 9 |
-
"num_docs":
|
| 10 |
"version": "0.2.7post1",
|
| 11 |
"backend": "numpy"
|
| 12 |
}
|
|
|
|
| 6 |
"idf_method": "lucene",
|
| 7 |
"dtype": "float32",
|
| 8 |
"int_dtype": "int32",
|
| 9 |
+
"num_docs": 712943,
|
| 10 |
"version": "0.2.7post1",
|
| 11 |
"backend": "numpy"
|
| 12 |
}
|
vocab.index.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45500e26ddca7d47ccec5ef4dcc61036ffaecc5d2ddc99e1a1bab55d7cea10b5
|
| 3 |
+
size 2252514
|