Q-bert's picture
Training in progress, step 100
5cd9904 verified
raw
history blame contribute delete
386 Bytes
{
"architectures": [
"CyclicFormerForCausalLM"
],
"cyclic_size": 200,
"drop_prob": 0.1,
"hidden_size": 200,
"initializer_range": 0.1,
"model_type": "cyclicformer",
"n_loop": 3,
"num_attention_heads": 4,
"num_hidden_layers": 4,
"rms_norm_eps": 1e-06,
"torch_dtype": "float32",
"transformers_version": "4.41.2",
"use_cache": false,
"vocab_size": 50257
}