File size: 1,127 Bytes
291ee0f
83be1b4
291ee0f
306c4a2
291ee0f
83be1b4
291ee0f
83be1b4
291ee0f
306c4a2
291ee0f
83be1b4
306c4a2
83be1b4
291ee0f
306c4a2
 
291ee0f
 
306c4a2
291ee0f
0a25bbd
306c4a2
83be1b4
3d8e013
306c4a2
 
83be1b4
 
 
 
 
291ee0f
 
 
 
83be1b4
306c4a2
3d8e013
291ee0f
 
8562c05
291ee0f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
{
  "_name_or_path": "pszemraj/long-t5-tglobal-base-16384-book-summary",
  "architectures": [
    "LongT5ForConditionalGeneration"
  ],
  "d_ff": 2048,
  "d_kv": 64,
  "d_model": 768,
  "decoder_start_token_id": 0,
  "dense_act_fn": "gelu_new",
  "dropout_rate": 0.1,
  "early_stopping": true,
  "encoder_attention_type": "transient-global",
  "encoder_no_repeat_ngram_size": 4,
  "eos_token_id": 1,
  "feed_forward_proj": "gated-gelu",
  "global_block_size": 16,
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "is_gated_act": true,
  "layer_norm_epsilon": 1e-06,
  "length_penalty": 0.8,
  "local_radius": 127,
  "max_length": 512,
  "min_length": 100,
  "model_type": "longt5",
  "n_positions": 4096,
  "no_repeat_ngram_size": 3,
  "num_beams": 2,
  "num_decoder_layers": 12,
  "num_heads": 12,
  "num_layers": 12,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_max_distance": 128,
  "relative_attention_num_buckets": 32,
  "repetition_penalty": 3.5,
  "tie_word_embeddings": false,
  "torch_dtype": "float32",
  "transformers_version": "4.28.0",
  "use_cache": true,
  "vocab_size": 32128
}