File size: 1,127 Bytes
291ee0f 83be1b4 291ee0f 306c4a2 291ee0f 83be1b4 291ee0f 83be1b4 291ee0f 306c4a2 291ee0f 83be1b4 306c4a2 83be1b4 291ee0f 306c4a2 291ee0f 306c4a2 291ee0f 0a25bbd 306c4a2 83be1b4 3d8e013 306c4a2 83be1b4 291ee0f 83be1b4 306c4a2 3d8e013 291ee0f 8562c05 291ee0f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
{
"_name_or_path": "pszemraj/long-t5-tglobal-base-16384-book-summary",
"architectures": [
"LongT5ForConditionalGeneration"
],
"d_ff": 2048,
"d_kv": 64,
"d_model": 768,
"decoder_start_token_id": 0,
"dense_act_fn": "gelu_new",
"dropout_rate": 0.1,
"early_stopping": true,
"encoder_attention_type": "transient-global",
"encoder_no_repeat_ngram_size": 4,
"eos_token_id": 1,
"feed_forward_proj": "gated-gelu",
"global_block_size": 16,
"initializer_factor": 1.0,
"is_encoder_decoder": true,
"is_gated_act": true,
"layer_norm_epsilon": 1e-06,
"length_penalty": 0.8,
"local_radius": 127,
"max_length": 512,
"min_length": 100,
"model_type": "longt5",
"n_positions": 4096,
"no_repeat_ngram_size": 3,
"num_beams": 2,
"num_decoder_layers": 12,
"num_heads": 12,
"num_layers": 12,
"output_past": true,
"pad_token_id": 0,
"relative_attention_max_distance": 128,
"relative_attention_num_buckets": 32,
"repetition_penalty": 3.5,
"tie_word_embeddings": false,
"torch_dtype": "float32",
"transformers_version": "4.28.0",
"use_cache": true,
"vocab_size": 32128
}
|