tien314 commited on
Commit
9fdf1bd
·
verified ·
1 Parent(s): c81a0c9

Update BM25S model

Browse files
README.md CHANGED
@@ -123,9 +123,9 @@ This dataset was created using the following data:
123
 
124
  | Statistic | Value |
125
  | --- | --- |
126
- | Number of documents | 750312 |
127
- | Number of tokens | 7592215 |
128
- | Average tokens per document | 10.12 |
129
 
130
  ## Parameters
131
 
 
123
 
124
  | Statistic | Value |
125
  | --- | --- |
126
+ | Number of documents | 712943 |
127
+ | Number of tokens | 7391911 |
128
+ | Average tokens per document | 10.37 |
129
 
130
  ## Parameters
131
 
corpus.jsonl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b999015937322493801414a7628aea03373b140623efa2d7e247af03e4eb2b2
3
- size 73690346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5869a1f3263444bb6292d30075a29405e88740a06255b9e312569b4339001230
3
+ size 69920920
corpus.mmindex.json CHANGED
The diff for this file is too large to render. See raw diff
 
data.csc.index.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e30a1c11c7cc998cca6c57896b569da0e32ca94e6c59b92d14648706c2d670aa
3
- size 30368988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70756fa753a7c110e506386956b0f84358d2b123175f8b2b2459b8c255d86f0a
3
+ size 29567772
indices.csc.index.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b2f427f0d29bab6a4744d4d14cad7f6205617efb8f2381fa0e82664698e1f92
3
- size 30368988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9d24996451232d9b3f3a3d57eeba29f73ccfef68e3116e6b74c8626bb140d92
3
+ size 29567772
indptr.csc.index.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e01faf6b39cfa30b9e13fb00c062117e7487e6a6e8b4539522a37f6b30591a52
3
- size 559348
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcb12500c5b017e78b2f608ddc910f48d876bdeeee3fabad7ebe47262e5e89c8
3
+ size 553400
params.index.json CHANGED
@@ -6,7 +6,7 @@
6
  "idf_method": "lucene",
7
  "dtype": "float32",
8
  "int_dtype": "int32",
9
- "num_docs": 750312,
10
  "version": "0.2.7post1",
11
  "backend": "numpy"
12
  }
 
6
  "idf_method": "lucene",
7
  "dtype": "float32",
8
  "int_dtype": "int32",
9
+ "num_docs": 712943,
10
  "version": "0.2.7post1",
11
  "backend": "numpy"
12
  }
vocab.index.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d224cd8071210250b768abb4e03969c446bef1a2d382ad711325ad63c54af4c9
3
- size 2283889
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45500e26ddca7d47ccec5ef4dcc61036ffaecc5d2ddc99e1a1bab55d7cea10b5
3
+ size 2252514