de-t5-sci-transfer-init-spm32k / tokenizer_training_metadata.json
rausch's picture
Add DE-Trans-Init paper model
456a3ba verified
{
"timestamp_utc": "2026-02-16T20:42:59.548751+00:00",
"language": "deu_Latn",
"track": "paper_spm32k",
"training_data": "/netscratch/nrauscher/projects/BA-hydra/cross_lingual_transfer_multilingual/data/languages/deu_Latn/splits/sub/sub_charcap43gb_seed42/train/docs.parquet",
"training_data_rows": 124052,
"subsplit": "sub_charcap43gb_seed42",
"sentencepiece": {
"model_type": "bpe",
"vocab_size": 32000,
"character_coverage": 1.0,
"byte_fallback": true,
"hard_vocab_limit": false,
"input_sentence_size": 0,
"shuffle_input_sentence": true,
"num_threads": 32,
"pad_id": 0,
"eos_id": 1,
"unk_id": 2,
"bos_id": -1
},
"t5": {
"extra_ids": 100,
"tokenizer_length": 32100,
"fast_tokenizer_export_ok": false
},
"duration_sec": 184.08136105537415
}