Upload folder using huggingface_hub
Browse files- .amlignore +6 -0
- .amlignore.amltmp +6 -0
- .gitattributes +2 -0
- artifact.metadata +63 -0
- config.json +27 -0
- model.safetensors +3 -0
- none/2026-03/17/12.51.10/checkpoints/colbert/artifact.metadata +63 -0
- none/2026-03/17/12.51.10/checkpoints/colbert/config.json +27 -0
- none/2026-03/17/12.51.10/checkpoints/colbert/model.safetensors +3 -0
- none/2026-03/17/12.51.10/checkpoints/colbert/special_tokens_map.json +51 -0
- none/2026-03/17/12.51.10/checkpoints/colbert/tokenizer.json +3 -0
- none/2026-03/17/12.51.10/checkpoints/colbert/tokenizer_config.json +56 -0
- queries_all_test.tsv +137 -0
- queries_all_train.tsv +0 -0
- special_tokens_map.json +51 -0
- tokenizer.json +3 -0
- tokenizer_config.json +56 -0
- triples_all.jsonl +0 -0
- triples_all_round_1.jsonl +0 -0
- triples_all_round_2.jsonl +0 -0
- triples_all_round_3.jsonl +0 -0
.amlignore
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## This file was auto generated by the Azure Machine Learning Studio. Please do not remove.
|
| 2 |
+
## Read more about the .amlignore file here: https://docs.microsoft.com/azure/machine-learning/how-to-save-write-experiment-files#storage-limits-of-experiment-snapshots
|
| 3 |
+
|
| 4 |
+
.ipynb_aml_checkpoints/
|
| 5 |
+
*.amltmp
|
| 6 |
+
*.amltemp
|
.amlignore.amltmp
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## This file was auto generated by the Azure Machine Learning Studio. Please do not remove.
|
| 2 |
+
## Read more about the .amlignore file here: https://docs.microsoft.com/azure/machine-learning/how-to-save-write-experiment-files#storage-limits-of-experiment-snapshots
|
| 3 |
+
|
| 4 |
+
.ipynb_aml_checkpoints/
|
| 5 |
+
*.amltmp
|
| 6 |
+
*.amltemp
|
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
none/2026-03/17/12.51.10/checkpoints/colbert/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
artifact.metadata
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"query_token_id": "[unused0]",
|
| 3 |
+
"doc_token_id": "[unused1]",
|
| 4 |
+
"query_token": "[Q]",
|
| 5 |
+
"doc_token": "[D]",
|
| 6 |
+
"ncells": null,
|
| 7 |
+
"centroid_score_threshold": null,
|
| 8 |
+
"ndocs": null,
|
| 9 |
+
"load_index_with_mmap": false,
|
| 10 |
+
"index_path": null,
|
| 11 |
+
"index_bsize": 64,
|
| 12 |
+
"nbits": 1,
|
| 13 |
+
"kmeans_niters": 4,
|
| 14 |
+
"resume": false,
|
| 15 |
+
"pool_factor": 1,
|
| 16 |
+
"clustering_mode": "hierarchical",
|
| 17 |
+
"protected_tokens": 0,
|
| 18 |
+
"similarity": "cosine",
|
| 19 |
+
"bsize": 8,
|
| 20 |
+
"accumsteps": 8,
|
| 21 |
+
"lr": 5e-6,
|
| 22 |
+
"maxsteps": 500000,
|
| 23 |
+
"save_every": 1000,
|
| 24 |
+
"warmup": null,
|
| 25 |
+
"warmup_bert": null,
|
| 26 |
+
"relu": false,
|
| 27 |
+
"nway": 2,
|
| 28 |
+
"use_ib_negatives": false,
|
| 29 |
+
"reranker": false,
|
| 30 |
+
"distillation_alpha": 1.0,
|
| 31 |
+
"ignore_scores": false,
|
| 32 |
+
"model_name": null,
|
| 33 |
+
"query_maxlen": 64,
|
| 34 |
+
"attend_to_mask_tokens": false,
|
| 35 |
+
"interaction": "colbert",
|
| 36 |
+
"dim": 256,
|
| 37 |
+
"doc_maxlen": 512,
|
| 38 |
+
"mask_punctuation": true,
|
| 39 |
+
"checkpoint": "BAAI\/bge-m3",
|
| 40 |
+
"triples": "\/mnt\/batch\/tasks\/shared\/LS_root\/mounts\/clusters\/lucas-1xh100-v1\/code\/Users\/azuadmllgg\/GECKOv2\/data\/colbert_retriever\/all-MiniLM-L12-v2-bge-m3-english_all_nodes\/triples_all.jsonl",
|
| 41 |
+
"collection": "\/mnt\/batch\/tasks\/shared\/LS_root\/mounts\/clusters\/lucas-1xh100-v1\/code\/Users\/azuadmllgg\/GECKOv2\/data\/colbert_retriever\/all-MiniLM-L12-v2-bge-m3-english_all_nodes\/collection_all.tsv",
|
| 42 |
+
"queries": "\/mnt\/batch\/tasks\/shared\/LS_root\/mounts\/clusters\/lucas-1xh100-v1\/code\/Users\/azuadmllgg\/GECKOv2\/data\/colbert_retriever\/all-MiniLM-L12-v2-bge-m3-english_all_nodes\/queries_all_train.tsv",
|
| 43 |
+
"index_name": null,
|
| 44 |
+
"overwrite": false,
|
| 45 |
+
"root": "\/mnt\/batch\/tasks\/shared\/LS_root\/mounts\/clusters\/lucas-1xh100-v1\/code\/Users\/azuadmllgg\/GECKOv2\/experiments",
|
| 46 |
+
"experiment": "\/mnt\/batch\/tasks\/shared\/LS_root\/mounts\/clusters\/lucas-1xh100-v1\/code\/Users\/azuadmllgg\/GECKOv2\/data\/colbert_retriever\/all-MiniLM-L12-v2-bge-m3-english_all_nodes",
|
| 47 |
+
"index_root": null,
|
| 48 |
+
"name": "2026-03\/17\/12.51.10",
|
| 49 |
+
"rank": 0,
|
| 50 |
+
"nranks": 1,
|
| 51 |
+
"amp": true,
|
| 52 |
+
"gpus": 1,
|
| 53 |
+
"avoid_fork_if_possible": false,
|
| 54 |
+
"meta": {
|
| 55 |
+
"hostname": "lucas-1xh100-v1",
|
| 56 |
+
"git_branch": "retriever-improvements",
|
| 57 |
+
"git_hash": "536083e0d50f983ea3576e97401eeeafd24c6f27",
|
| 58 |
+
"git_commit_datetime": "2026-03-16 16:13:04+01:00",
|
| 59 |
+
"current_datetime": "Mar 17, 2026 ; 4:03PM UTC (+0000)",
|
| 60 |
+
"cmd": "models\/retrievers\/colbert\/colbert_trainer.py --mode all --embedding_model sentence-transformers\/all-MiniLM-L12-v2 --output_name all-MiniLM-L12-v2-bge-m3-english_all_nodes -b 8",
|
| 61 |
+
"version": "colbert-v0.4"
|
| 62 |
+
}
|
| 63 |
+
}
|
config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"HF_ColBERT"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"dtype": "float32",
|
| 9 |
+
"eos_token_id": 2,
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_dropout_prob": 0.1,
|
| 12 |
+
"hidden_size": 1024,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 4096,
|
| 15 |
+
"layer_norm_eps": 1e-05,
|
| 16 |
+
"max_position_embeddings": 8194,
|
| 17 |
+
"model_type": "xlm-roberta",
|
| 18 |
+
"num_attention_heads": 16,
|
| 19 |
+
"num_hidden_layers": 24,
|
| 20 |
+
"output_past": true,
|
| 21 |
+
"pad_token_id": 1,
|
| 22 |
+
"position_embedding_type": "absolute",
|
| 23 |
+
"transformers_version": "4.57.6",
|
| 24 |
+
"type_vocab_size": 1,
|
| 25 |
+
"use_cache": true,
|
| 26 |
+
"vocab_size": 250002
|
| 27 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d8689e9dc0c1400861593932bbd9f0a12008972027bec7a4d26974b2f5b0c746
|
| 3 |
+
size 2272116256
|
none/2026-03/17/12.51.10/checkpoints/colbert/artifact.metadata
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"query_token_id": "[unused0]",
|
| 3 |
+
"doc_token_id": "[unused1]",
|
| 4 |
+
"query_token": "[Q]",
|
| 5 |
+
"doc_token": "[D]",
|
| 6 |
+
"ncells": null,
|
| 7 |
+
"centroid_score_threshold": null,
|
| 8 |
+
"ndocs": null,
|
| 9 |
+
"load_index_with_mmap": false,
|
| 10 |
+
"index_path": null,
|
| 11 |
+
"index_bsize": 64,
|
| 12 |
+
"nbits": 1,
|
| 13 |
+
"kmeans_niters": 4,
|
| 14 |
+
"resume": false,
|
| 15 |
+
"pool_factor": 1,
|
| 16 |
+
"clustering_mode": "hierarchical",
|
| 17 |
+
"protected_tokens": 0,
|
| 18 |
+
"similarity": "cosine",
|
| 19 |
+
"bsize": 8,
|
| 20 |
+
"accumsteps": 8,
|
| 21 |
+
"lr": 5e-6,
|
| 22 |
+
"maxsteps": 500000,
|
| 23 |
+
"save_every": 1000,
|
| 24 |
+
"warmup": null,
|
| 25 |
+
"warmup_bert": null,
|
| 26 |
+
"relu": false,
|
| 27 |
+
"nway": 2,
|
| 28 |
+
"use_ib_negatives": false,
|
| 29 |
+
"reranker": false,
|
| 30 |
+
"distillation_alpha": 1.0,
|
| 31 |
+
"ignore_scores": false,
|
| 32 |
+
"model_name": null,
|
| 33 |
+
"query_maxlen": 64,
|
| 34 |
+
"attend_to_mask_tokens": false,
|
| 35 |
+
"interaction": "colbert",
|
| 36 |
+
"dim": 256,
|
| 37 |
+
"doc_maxlen": 512,
|
| 38 |
+
"mask_punctuation": true,
|
| 39 |
+
"checkpoint": "BAAI\/bge-m3",
|
| 40 |
+
"triples": "\/mnt\/batch\/tasks\/shared\/LS_root\/mounts\/clusters\/lucas-1xh100-v1\/code\/Users\/azuadmllgg\/GECKOv2\/data\/colbert_retriever\/all-MiniLM-L12-v2-bge-m3-english_all_nodes\/triples_all.jsonl",
|
| 41 |
+
"collection": "\/mnt\/batch\/tasks\/shared\/LS_root\/mounts\/clusters\/lucas-1xh100-v1\/code\/Users\/azuadmllgg\/GECKOv2\/data\/colbert_retriever\/all-MiniLM-L12-v2-bge-m3-english_all_nodes\/collection_all.tsv",
|
| 42 |
+
"queries": "\/mnt\/batch\/tasks\/shared\/LS_root\/mounts\/clusters\/lucas-1xh100-v1\/code\/Users\/azuadmllgg\/GECKOv2\/data\/colbert_retriever\/all-MiniLM-L12-v2-bge-m3-english_all_nodes\/queries_all_train.tsv",
|
| 43 |
+
"index_name": null,
|
| 44 |
+
"overwrite": false,
|
| 45 |
+
"root": "\/mnt\/batch\/tasks\/shared\/LS_root\/mounts\/clusters\/lucas-1xh100-v1\/code\/Users\/azuadmllgg\/GECKOv2\/experiments",
|
| 46 |
+
"experiment": "\/mnt\/batch\/tasks\/shared\/LS_root\/mounts\/clusters\/lucas-1xh100-v1\/code\/Users\/azuadmllgg\/GECKOv2\/data\/colbert_retriever\/all-MiniLM-L12-v2-bge-m3-english_all_nodes",
|
| 47 |
+
"index_root": null,
|
| 48 |
+
"name": "2026-03\/17\/12.51.10",
|
| 49 |
+
"rank": 0,
|
| 50 |
+
"nranks": 1,
|
| 51 |
+
"amp": true,
|
| 52 |
+
"gpus": 1,
|
| 53 |
+
"avoid_fork_if_possible": false,
|
| 54 |
+
"meta": {
|
| 55 |
+
"hostname": "lucas-1xh100-v1",
|
| 56 |
+
"git_branch": "retriever-improvements",
|
| 57 |
+
"git_hash": "536083e0d50f983ea3576e97401eeeafd24c6f27",
|
| 58 |
+
"git_commit_datetime": "2026-03-16 16:13:04+01:00",
|
| 59 |
+
"current_datetime": "Mar 17, 2026 ; 4:03PM UTC (+0000)",
|
| 60 |
+
"cmd": "models\/retrievers\/colbert\/colbert_trainer.py --mode all --embedding_model sentence-transformers\/all-MiniLM-L12-v2 --output_name all-MiniLM-L12-v2-bge-m3-english_all_nodes -b 8",
|
| 61 |
+
"version": "colbert-v0.4"
|
| 62 |
+
}
|
| 63 |
+
}
|
none/2026-03/17/12.51.10/checkpoints/colbert/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"HF_ColBERT"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"dtype": "float32",
|
| 9 |
+
"eos_token_id": 2,
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_dropout_prob": 0.1,
|
| 12 |
+
"hidden_size": 1024,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 4096,
|
| 15 |
+
"layer_norm_eps": 1e-05,
|
| 16 |
+
"max_position_embeddings": 8194,
|
| 17 |
+
"model_type": "xlm-roberta",
|
| 18 |
+
"num_attention_heads": 16,
|
| 19 |
+
"num_hidden_layers": 24,
|
| 20 |
+
"output_past": true,
|
| 21 |
+
"pad_token_id": 1,
|
| 22 |
+
"position_embedding_type": "absolute",
|
| 23 |
+
"transformers_version": "4.57.6",
|
| 24 |
+
"type_vocab_size": 1,
|
| 25 |
+
"use_cache": true,
|
| 26 |
+
"vocab_size": 250002
|
| 27 |
+
}
|
none/2026-03/17/12.51.10/checkpoints/colbert/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d8689e9dc0c1400861593932bbd9f0a12008972027bec7a4d26974b2f5b0c746
|
| 3 |
+
size 2272116256
|
none/2026-03/17/12.51.10/checkpoints/colbert/special_tokens_map.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"cls_token": {
|
| 10 |
+
"content": "<s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"eos_token": {
|
| 17 |
+
"content": "</s>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"mask_token": {
|
| 24 |
+
"content": "<mask>",
|
| 25 |
+
"lstrip": true,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"pad_token": {
|
| 31 |
+
"content": "<pad>",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
},
|
| 37 |
+
"sep_token": {
|
| 38 |
+
"content": "</s>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false
|
| 43 |
+
},
|
| 44 |
+
"unk_token": {
|
| 45 |
+
"content": "<unk>",
|
| 46 |
+
"lstrip": false,
|
| 47 |
+
"normalized": false,
|
| 48 |
+
"rstrip": false,
|
| 49 |
+
"single_word": false
|
| 50 |
+
}
|
| 51 |
+
}
|
none/2026-03/17/12.51.10/checkpoints/colbert/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:249df0778f236f6ece390de0de746838ef25b9d6954b68c2ee71249e0a9d8fd4
|
| 3 |
+
size 17082799
|
none/2026-03/17/12.51.10/checkpoints/colbert/tokenizer_config.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<s>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<pad>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"250001": {
|
| 36 |
+
"content": "<mask>",
|
| 37 |
+
"lstrip": true,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"bos_token": "<s>",
|
| 45 |
+
"clean_up_tokenization_spaces": true,
|
| 46 |
+
"cls_token": "<s>",
|
| 47 |
+
"eos_token": "</s>",
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"mask_token": "<mask>",
|
| 50 |
+
"model_max_length": 8192,
|
| 51 |
+
"pad_token": "<pad>",
|
| 52 |
+
"sep_token": "</s>",
|
| 53 |
+
"sp_model_kwargs": {},
|
| 54 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
| 55 |
+
"unk_token": "<unk>"
|
| 56 |
+
}
|
queries_all_test.tsv
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
0 Total bankruptcies individuals and businesses in Feb 24
|
| 2 |
+
1 Total number of laying hens on farms in December 2021 and April 2022
|
| 3 |
+
2 What is the average business climate outlook for the next three months in the Dutch business services sector for May and June 2024 based on original data?
|
| 4 |
+
3 What was the average number of declared bankruptcies among companies, institutions, and sole proprietors in the lighting equipment manufacturing sector during the summer of 2020?
|
| 5 |
+
4 What percentage of highly educated people in Bonaire were happy in 2013?
|
| 6 |
+
5 did we import more coal of lignite in Demcember 2021?
|
| 7 |
+
6 What was the highest number of total cattle on agricultural holdings recorded between April and December of 2019 and 2020
|
| 8 |
+
7 What were the overall forecasts for business climate and selling prices in the seasonally adjusted data of the sectors industry, accomodation, specialised business services and sports of the Netherlands in October 2013?
|
| 9 |
+
8 What was the total primary energy supply of crude oil and petroleum products in 1966 and 1967?
|
| 10 |
+
9 Which type of farm in Goeree-Overflakkee had the highest number of turkeys in 2015?
|
| 11 |
+
10 How much did construction costs for new homes increase during February to June 2018 compared to the same months in 2017?
|
| 12 |
+
11 Which area had the highest average price for privately owned homes in 1995 among Smallingerland, Soest, Someren, Smilde, and Sneek?
|
| 13 |
+
12 What was the year-on-year turnover change for construction companies with 10 or more employees in utility projects during the third quarter of 2016?
|
| 14 |
+
13 pronounced bankruptcies sole proprietorship in Q1 1983
|
| 15 |
+
14 What were the turnover and price index values for large enterprises in road, railway, demolition, and specialist construction having more than 10 employees, as well as the mining sector, in 2009?
|
| 16 |
+
15 What were the volume changes and value at 2017 prices for the financial institutions sector in Bonaire from 2018 to 2021?
|
| 17 |
+
16 What is the turnover index for construction companies with 10 or more employees in utility projects in April 2024?
|
| 18 |
+
17 Total number of young goats on farms recorded in December 2019, April 2020, and December 2020
|
| 19 |
+
18 How many people received income support from AOW age in April 2018, and what were the numbers for young pigs, young cattle, and broilers on Dutch farms at the same time?
|
| 20 |
+
19 Sum of male goats at least 7 months old in periods when the livestock count was below 11 thousand between April 2020 and April 2022
|
| 21 |
+
20 What is the lowest annual supply of fossil crude oil and petroleum products from mining and quarrying between 2006 and 2008?
|
| 22 |
+
21 How much residual gas was produced as output from other energy processes between 1960 and 1964?
|
| 23 |
+
22 How much hard coal, coke-oven cokes, and lignite was imported into the country in total in 2017?
|
| 24 |
+
23 Average satisfaction Bonaire 2023 for lower educated people
|
| 25 |
+
24 How much did the construction cost index for new homes change year-over-year in August, September, October, and November 2012?
|
| 26 |
+
25 What percentage of sheep were raised by specialist mixed livestock farms in the Centraal Weidegebied region of Groningen in 2013?
|
| 27 |
+
26 happiness women Caribbean Netherlands 2013
|
| 28 |
+
27 How many young men aged 18-24 from Central and South America (excluding Dutch territories) emigrated from the Caribbean Netherlands in 2022, with no more than six people leaving?
|
| 29 |
+
28 Between 2016 and 2019, what was the average yearly wage for employees in education with a gross monthly pay of 2,500 to 2,999 dollars in the Caribbean Netherlands, and how many aged 55 and older on St. Eustatius earned more than $32,470 during those years?
|
| 30 |
+
29 How many businesses in non-conventional education sectors were declared bankrupt in the fourth quarter of 2007, broken down by type of legal entity?
|
| 31 |
+
30 How many coke-oven cokes were imported from outside the EU in Q2 of 2019?
|
| 32 |
+
31 What was the year-over-year percentage change in turnover for construction installation companies with at least one employee in March 2022?
|
| 33 |
+
32 What proportion of specialist permanent crop farms in Flevoland grew ware potatoes or mushrooms in 2009 compared to 2008?
|
| 34 |
+
33 In 2022, which source covered the highest amount for outpatient rehabilitation health expenses?
|
| 35 |
+
34 What was the total number of net migrants aged 70 to 74 in 2023 for St Eustatius from the European Union and for Macao, regardless of gender and marital status?
|
| 36 |
+
35 What was the total consumption of hard coal by industries in January 2015?
|
| 37 |
+
36 What was the highest volume of imported hard coal and coke-oven coke in January 2021?
|
| 38 |
+
37 Lowest month of bulls on farms in The Netherlands for April 2023 and 2024 and December 2023
|
| 39 |
+
38 What is the maximum number of declared insolvency reaons for self-employed individuals or businesses, being less than 559, in March 2004?
|
| 40 |
+
39 Which type of farm had the highest number of nurseries and perennial plants in the Centraal Tuinbouwgebied in Utrecht in 2012 when comparing all farms to mixed crops and livestock?
|
| 41 |
+
40 How much coke from ovens was used by the iron and steel industry and how many tons of manure were removed from farms in 2018?
|
| 42 |
+
41 Which type of coal had the lowest total imports in October 2015?
|
| 43 |
+
42 Which type of farm animal had the lowest count of livestock below 4000 in April 2019?
|
| 44 |
+
43 What percentage of the total livestock in April 2023 comprised breeding hens kept for egg production?
|
| 45 |
+
44 Which type of farm had the highest number of turkey-raising operations in Goeree-Overflakkee in 2015?
|
| 46 |
+
45 Which month saw the biggest annual change in input prices for railway and metro construction in 2016?
|
| 47 |
+
46 What was the average production of coke oven gas by coke plants from 2020 to 2022?
|
| 48 |
+
47 How many people received Wajong social security benefits and what was the count of young goats, milking sheep, and broiler breeding stock on farms in December 2022?
|
| 49 |
+
48 What were the turnover changes for construction companies of various types and the import growth, all in April 2011?
|
| 50 |
+
49 How many social assistance Wajong claims and different types of farm animals (like laying hens, calves for veal, and older female cattle) were recorded in April 2020?
|
| 51 |
+
50 What proportion of all bankruptcy declarations in 2013 involved private individuals not running a business?
|
| 52 |
+
51 Lowest average price for a home in Raalte between 2016 and 2020
|
| 53 |
+
52 In december 2023 and april of 2023 and 2024, when was the highest count of veal calves, with the total never exceeding 957, recorded?
|
| 54 |
+
53 How many bankruptcies were declared in February 2023 for companies and institutions, and for natural persons without sole proprietorship?
|
| 55 |
+
54 Tunover index for construction project development companies with 10 or more workers in 2019 compared to all branches
|
| 56 |
+
55 What was the percentage change in new dwelling building cost output prices in the third quarter of 1986 compared to the same quarter the previous year?
|
| 57 |
+
56 What were the numbers of equidae and other regularly employed persons on specialist mixed livestock farms in Drenthe from 2009 to 2012?
|
| 58 |
+
57 What was the average area of greenhouse horticulture in 2002 for farms in Westelijke Langstraat with more than zero square meters, grouped by farm type such as mixed, field crops, granivores or permanent crops?
|
| 59 |
+
58 What was the average area of greenhouse horticulture on farms with different specializations in Westelijke Langstraat in 2002, considering only farms with some greenhouse crops?
|
| 60 |
+
59 What is the average waste collection fee per person in 2024 for Agglomeratie Leiden en Bollenstreek, Groot-Amsterdam, and Het Gooi en Vechtstreek?
|
| 61 |
+
60 What is the average yearly pay in the accommodation and food services sector for employees earning less than $5,000 in Saba and St. Eustatius for 2015?
|
| 62 |
+
61 Total number of ducks for meat, older rams, young calves, and male goats on farms in December 2019
|
| 63 |
+
62 How many piglets, young pigs, and laying hens were on farms in April 2018, and what was the total number of WIA social security benefits at that time?
|
| 64 |
+
63 How many benefits under Anw were given in December 2020, and what were the counts of dairy goats between 7 months and 1 year, and young female cattle raised for milk, during that same month?
|
| 65 |
+
64 How did life satisfaction rates compare between residents of Bonaire with intermediate education in 2017 and individuals from the Netherlands in 2016?
|
| 66 |
+
65 What percentage of all bankruptcies declared in 2012 were individuals without sole proprietorship?
|
| 67 |
+
66 Which percentage of people with a high level of education in Bonaire were happy in 2013?
|
| 68 |
+
67 How many mature laying hens and large fattening pigs were on farms in April 2022?
|
| 69 |
+
68 What were the numbers of horses and other regularly employed workers on specialist mixed livestock farms in Drenthe from 2009 to 2012?
|
| 70 |
+
69 Compare the percentage of elderly in Bonaire who felt discontented in 2013 to the proportion of Dutch-born residents who were dissatisfied in 2024.
|
| 71 |
+
70 What percentage of declared bankruptcies in 1984 involved individuals running a sole proprietorship?
|
| 72 |
+
71 Which source contributed the highest funding to in-patient long-term nursing care health expenditure in 2024?
|
| 73 |
+
72 Compare the number of adult bulls on crop-focused farms with the total outside labour in 2014 for regions Zuidoost-Zuid-Holland, Overig Zeeland, and Zeeuwsch-Vlaanderen.
|
| 74 |
+
73 Total area used for fodder crops on mixed crop-livestock farms in the grazing region of Overijssel for the years 2000 and 2001
|
| 75 |
+
74 What was the lowest volume change percentage in the business services sector in the Caribbean Netherlands from 2018 to 2021?
|
| 76 |
+
75 How many young pigs, broiler chickens for breeding, and mature goats were recorded on farms in April 2018, and what was the total amount of social welfare support provided in that same month?
|
| 77 |
+
76 Which type of government-funded healthcare service had the highest spending in 2022, excluding amounts equal to 6082 million euro?
|
| 78 |
+
77 What was the relative building cost index for 1926 over the period of 1924 to 1927?
|
| 79 |
+
78 Which farm type in Zuidwest-Brabant had the highest total number of cattle in 2006?
|
| 80 |
+
79 What proportion of profit-generating public companies were overseen by local authorities in 2023 compared to other government sectors?
|
| 81 |
+
80 What percentage of females in the Caribbean Netherlands reported feeling happy in 2013?
|
| 82 |
+
81 What proportion of lignite and brown coal briquettes had an ending stock of zero or less in February 2018 compared to January and March of the same year?
|
| 83 |
+
82 In which year did Bonaire have the highest number of arrivals from another municipality within the Caribbean Netherlands between 2012 and 2015?
|
| 84 |
+
83 How does the percentage of people aged 45 and over in St. Eustatius who felt satisfied in 2017 compare with the level of satisfaction among those with a bachelor’s degree in 2014, considering the reliability and confidence margins?
|
| 85 |
+
84 What proportion of healthcare spending on day treatments covered by voluntary health insurance in 2023 compared to 2022?
|
| 86 |
+
85 Which had the highest increase in stock levels in May 2017: hard coal or coke-oven coke?
|
| 87 |
+
86 What was the output price index for new dwellings construction costs in the first quarter of 2019?
|
| 88 |
+
87 Which had a smaller population in December 2020: heavy pigs or young female cattle raised for beef?
|
| 89 |
+
88 What was the average number of young cattle under one year old, goats up to seven months, and laying hens for rearing and production on farms in April 2018?
|
| 90 |
+
89 What was the typical selling price of existing homes in Baarn in 2011?
|
| 91 |
+
90 Which year had the lowest amount of personal spending on long-term nursing care services from 2021 to 2023?
|
| 92 |
+
91 Which type of farm animal—female beef cattle aged 1-2 years, piglets under 50 kg, or breeding hens—had the highest livestock count in April 2024?
|
| 93 |
+
92 Compare total spending under compulsory private health insurance for long-term social care and for supplemental medical appliance providers in years 2021 to 2023, and find providers with yearly expenditures below 1503 million euros.
|
| 94 |
+
93 What was the lowest count among rams older than 7 months, young sows, and mated sows in December 2022?
|
| 95 |
+
94 What share of total power usage in 1980 came from coal and related fuels compared to 1980-1982?
|
| 96 |
+
95 How many bankruptcies were declared in 1990 for businesses in the industrial and fashion design sector, by type of bankruptcy?
|
| 97 |
+
96 How many women aged 69 who were born in other European countries moved to St Eustatius in 2014?
|
| 98 |
+
97 What share of specialist farms dedicated to cereals and turkeys was located in Westelijk Rijnland in 2015 compared to Goeree-Overflakkee?
|
| 99 |
+
98 Compare the typical yearly salary in the energy and utilities sector across the entire Caribbean Netherlands in 2014 with the average annual wage for workers under 16 years old in Bonaire during the same year.
|
| 100 |
+
99 Compare the material cost indices for civil engineering projects and new homes between July and October 2003 and January and April of 2004
|
| 101 |
+
100 Which area in Groningen had the highest number of sheep on mixed livestock farms in 2013?
|
| 102 |
+
101 How many goats were raised in the Rivierengebied region on all types of farms in 2004 and 2005 combined?
|
| 103 |
+
102 Which area had the highest total land used for specialist mixed livestock farms in 2015: Bollenstreek or Alblasserwaard en Vijfherenlanden?
|
| 104 |
+
103 How many young pigs under 50 kg, laying hen breeders, and IOAZ benefit recipients were there in April 2024?
|
| 105 |
+
104 What proportion of large female pigs with piglets was recorded in April 2024 compared to December 2023 and December 2024 on farms?
|
| 106 |
+
105 How many young beef calves were counted on farms in April and December of 2020, 2021, and 2022?
|
| 107 |
+
106 In which month and year between December 2019 and April 2021 did the number of breeding pigs weighing at least 50 kg fall below 1,052, and which date had the highest such count?
|
| 108 |
+
107 How many breeding chickens for broiler production were reported on farms in April 2023?
|
| 109 |
+
108 What was the average yearly salary in 2016 for employees earning between 5,000 and 9,999 dollars per month in business services on St. Eustatius?
|
| 110 |
+
109 Which sector in the Caribbean Netherlands had the highest average yearly salary under $79,070 in 2021 for people earning between 5,000 and 9,999 dollars monthly: education or the energy, water, and waste sector?
|
| 111 |
+
110 What proportion of the construction sector in Bonaire had a gross value added of no more than 37 million USD (in 2017 prices) in 2019 compared to 2020?
|
| 112 |
+
111 How did the value created by the real estate sector compare to the total GDP of Bonaire from 2018 to 2021, using 2017 prices?
|
| 113 |
+
112 How did the value added change in Caribbean Netherlands sectors like hospitality, finance, and cultural services in 2018, and how does that compare to the GDP growth rate in Zeeland province for the same year?
|
| 114 |
+
113 In which year between 2014 and 2017 was the net migration for people aged 75 to 79 in the Caribbean Netherlands lowest and below 5?
|
| 115 |
+
114 How many girls aged 4 to 11 born in Trinidad and Tobago moved to or from Saba in the Caribbean Netherlands in 2018?
|
| 116 |
+
115 What was the average number of Asian boys aged 12–17 who emigrated from Bonaire and the Caribbean Netherlands in 2013?
|
| 117 |
+
116 What was the average net migration (including administrative changes) in 2023 for 18-24 year old males in St Eustatius who were either born in America or outside the Dutch Caribbean, with figures greater than or equal to minus 22?
|
| 118 |
+
117 What was the average number of people arriving in Bonaire and St Eustatius in 2020 when total arrivals were greater than 119?
|
| 119 |
+
118 How many people moved within Caribbean Netherlands municipalities in 2024, and is that number less than or equal to 3,311?
|
| 120 |
+
119 How many people arrived in the Caribbean Netherlands in 2017?
|
| 121 |
+
120 How did the percentage change in population differ in Bonaire during 2020, 2021, and 2022?
|
| 122 |
+
121 What were the domestic net migration numbers for Bonaire, Saba, and St Eustatius in the Caribbean Netherlands in 2013?
|
| 123 |
+
122 How did turnover change from the previous year in 2008 for building construction and other specialist construction companies with at least 10 employees?
|
| 124 |
+
123 How did the turnover growth rate in January 2005 compare between large construction companies building structures and those in other civil engineering fields?
|
| 125 |
+
124 What was the turnover index for building construction companies in the first quarter of 2015?
|
| 126 |
+
125 What was the year-over-year turnover growth in December 2022 for specialised construction companies with at least 10 employees?
|
| 127 |
+
126 How did the input cost index and the year-on-year percentage change for asphalt road construction evolve in April, July, and October 2011 and in January and April 2012 (base year 2000 = 100)?
|
| 128 |
+
127 For road maintenance civil engineering works, in August 1993 and February 1994, in which months was the input price index change compared with one year earlier less than 4 percent?
|
| 129 |
+
128 In January and April 2002, how did the year-on-year change in input prices for railway and metro construction compare with the percentage change in import prices?
|
| 130 |
+
129 What was the mean buying price of existing owner-occupied homes in Aa en Hunze over the years 2018, 2019, 2020, and 2021?
|
| 131 |
+
130 What was the average purchase price of existing owner-occupied homes in Overijssel in 2004?
|
| 132 |
+
131 In which year between 2005 and 2009 did existing owner-occupied homes in Veenendaal reach the highest average purchase price that was not equal to 240,991 euros?
|
| 133 |
+
132 In 2019, how did the average purchase price of existing owner-occupied homes in Hilversum and Hoeksche Waard compare with the hourly wage in the construction sector?
|
| 134 |
+
133 Price index of building costs for new dwellings in November and December 2006 (base year 2000 = 100)
|
| 135 |
+
134 In which quarters of 2003 was the year-on-year change in construction cost price indices for new homes less than 8 percent?
|
| 136 |
+
135 How much did the construction cost index for newly built homes change year-on-year in the first, second, and third quarters of 2021?
|
| 137 |
+
136 What were the construction output price indices for new homes (2000 = 100) in the 2nd, 3rd and 4th quarter of 1987?
|
queries_all_train.tsv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"cls_token": {
|
| 10 |
+
"content": "<s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"eos_token": {
|
| 17 |
+
"content": "</s>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"mask_token": {
|
| 24 |
+
"content": "<mask>",
|
| 25 |
+
"lstrip": true,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"pad_token": {
|
| 31 |
+
"content": "<pad>",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
},
|
| 37 |
+
"sep_token": {
|
| 38 |
+
"content": "</s>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false
|
| 43 |
+
},
|
| 44 |
+
"unk_token": {
|
| 45 |
+
"content": "<unk>",
|
| 46 |
+
"lstrip": false,
|
| 47 |
+
"normalized": false,
|
| 48 |
+
"rstrip": false,
|
| 49 |
+
"single_word": false
|
| 50 |
+
}
|
| 51 |
+
}
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:249df0778f236f6ece390de0de746838ef25b9d6954b68c2ee71249e0a9d8fd4
|
| 3 |
+
size 17082799
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<s>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<pad>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"250001": {
|
| 36 |
+
"content": "<mask>",
|
| 37 |
+
"lstrip": true,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"bos_token": "<s>",
|
| 45 |
+
"clean_up_tokenization_spaces": true,
|
| 46 |
+
"cls_token": "<s>",
|
| 47 |
+
"eos_token": "</s>",
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"mask_token": "<mask>",
|
| 50 |
+
"model_max_length": 8192,
|
| 51 |
+
"pad_token": "<pad>",
|
| 52 |
+
"sep_token": "</s>",
|
| 53 |
+
"sp_model_kwargs": {},
|
| 54 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
| 55 |
+
"unk_token": "<unk>"
|
| 56 |
+
}
|
triples_all.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
triples_all_round_1.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
triples_all_round_2.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
triples_all_round_3.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|