tien314 commited on
Commit
f3aceda
·
verified ·
1 Parent(s): 9fdf1bd

Update BM25S model

Browse files
.gitattributes CHANGED
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  corpus.jsonl filter=lfs diff=lfs merge=lfs -text
37
  vocab.index.json filter=lfs diff=lfs merge=lfs -text
 
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  corpus.jsonl filter=lfs diff=lfs merge=lfs -text
37
  vocab.index.json filter=lfs diff=lfs merge=lfs -text
38
+ corpus.mmindex.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -123,9 +123,9 @@ This dataset was created using the following data:
123
 
124
  | Statistic | Value |
125
  | --- | --- |
126
- | Number of documents | 712943 |
127
- | Number of tokens | 7391911 |
128
- | Average tokens per document | 10.37 |
129
 
130
  ## Parameters
131
 
 
123
 
124
  | Statistic | Value |
125
  | --- | --- |
126
+ | Number of documents | 2465154 |
127
+ | Number of tokens | 23873995 |
128
+ | Average tokens per document | 9.68 |
129
 
130
  ## Parameters
131
 
corpus.jsonl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5869a1f3263444bb6292d30075a29405e88740a06255b9e312569b4339001230
3
- size 69920920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1398ab105ac80cf05d2fc15f4d6dc30ee4141174aece99918896eafe5e467b32
3
+ size 231574332
corpus.mmindex.json CHANGED
The diff for this file is too large to render. See raw diff
 
data.csc.index.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70756fa753a7c110e506386956b0f84358d2b123175f8b2b2459b8c255d86f0a
3
- size 29567772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:912b50a2eb4eedce97f8e28212e0431c2b07b6211f8974d3df3930d5dc80b418
3
+ size 95496108
indices.csc.index.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9d24996451232d9b3f3a3d57eeba29f73ccfef68e3116e6b74c8626bb140d92
3
- size 29567772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43bf92d020f7d208dcc96e9ee218918d8532625226546d2a4be3016111baab1c
3
+ size 95496108
indptr.csc.index.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fcb12500c5b017e78b2f608ddc910f48d876bdeeee3fabad7ebe47262e5e89c8
3
- size 553400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e6288b33718c8ab5dbd27c3341fef867e6d320eaaefaf904ffe2e393ef608a9
3
+ size 2161772
params.index.json CHANGED
@@ -6,7 +6,7 @@
6
  "idf_method": "lucene",
7
  "dtype": "float32",
8
  "int_dtype": "int32",
9
- "num_docs": 712943,
10
  "version": "0.2.7post1",
11
  "backend": "numpy"
12
  }
 
6
  "idf_method": "lucene",
7
  "dtype": "float32",
8
  "int_dtype": "int32",
9
+ "num_docs": 2465154,
10
  "version": "0.2.7post1",
11
  "backend": "numpy"
12
  }
vocab.index.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45500e26ddca7d47ccec5ef4dcc61036ffaecc5d2ddc99e1a1bab55d7cea10b5
3
- size 2252514
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63998ba360b473818d02c4b7511c8732c3f694ff14dc4754439ef3b346801cc2
3
+ size 9707975