Update BM25S model
Browse files- .gitattributes +1 -0
- README.md +3 -3
- corpus.jsonl +2 -2
- corpus.mmindex.json +0 -0
- data.csc.index.npy +2 -2
- indices.csc.index.npy +2 -2
- indptr.csc.index.npy +2 -2
- params.index.json +1 -1
- vocab.index.json +2 -2
.gitattributes
CHANGED
|
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
corpus.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 37 |
vocab.index.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
corpus.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 37 |
vocab.index.json filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
corpus.mmindex.json filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
|
@@ -123,9 +123,9 @@ This dataset was created using the following data:
|
|
| 123 |
|
| 124 |
| Statistic | Value |
|
| 125 |
| --- | --- |
|
| 126 |
-
| Number of documents |
|
| 127 |
-
| Number of tokens |
|
| 128 |
-
| Average tokens per document |
|
| 129 |
|
| 130 |
## Parameters
|
| 131 |
|
|
|
|
| 123 |
|
| 124 |
| Statistic | Value |
|
| 125 |
| --- | --- |
|
| 126 |
+
| Number of documents | 2465154 |
|
| 127 |
+
| Number of tokens | 23873995 |
|
| 128 |
+
| Average tokens per document | 9.68 |
|
| 129 |
|
| 130 |
## Parameters
|
| 131 |
|
corpus.jsonl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1398ab105ac80cf05d2fc15f4d6dc30ee4141174aece99918896eafe5e467b32
|
| 3 |
+
size 231574332
|
corpus.mmindex.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data.csc.index.npy
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:912b50a2eb4eedce97f8e28212e0431c2b07b6211f8974d3df3930d5dc80b418
|
| 3 |
+
size 95496108
|
indices.csc.index.npy
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43bf92d020f7d208dcc96e9ee218918d8532625226546d2a4be3016111baab1c
|
| 3 |
+
size 95496108
|
indptr.csc.index.npy
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e6288b33718c8ab5dbd27c3341fef867e6d320eaaefaf904ffe2e393ef608a9
|
| 3 |
+
size 2161772
|
params.index.json
CHANGED
|
@@ -6,7 +6,7 @@
|
|
| 6 |
"idf_method": "lucene",
|
| 7 |
"dtype": "float32",
|
| 8 |
"int_dtype": "int32",
|
| 9 |
-
"num_docs":
|
| 10 |
"version": "0.2.7post1",
|
| 11 |
"backend": "numpy"
|
| 12 |
}
|
|
|
|
| 6 |
"idf_method": "lucene",
|
| 7 |
"dtype": "float32",
|
| 8 |
"int_dtype": "int32",
|
| 9 |
+
"num_docs": 2465154,
|
| 10 |
"version": "0.2.7post1",
|
| 11 |
"backend": "numpy"
|
| 12 |
}
|
vocab.index.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63998ba360b473818d02c4b7511c8732c3f694ff14dc4754439ef3b346801cc2
|
| 3 |
+
size 9707975
|