apd-jmorinelli
/

axon-test-model

Text Generation

text-generation-inference

Model card Files Files and versions

axon-test-model / config.json

apd-jmorinelli's picture

Upload folder using huggingface_hub

f9d6891 verified over 1 year ago

history blame contribute delete

708 Bytes

	{
	"activation_type": "gelu",
	"architectures": [
	"AxonForCausalLM"
	],
	"attention_type": "standard",
	"batch_size": 15,
	"bias": true,
	"block_size": 1024,
	"block_type": "standard",
	"dropout": 0.2,
	"dropout_attention": 0.2,
	"dropout_resid": 0.2,
	"eval_interval": 25,
	"eval_iters": 200,
	"flash_attention": false,
	"grad_clip": 1.0,
	"gradient_accumulation_steps": 40,
	"layer_norm_type": "default",
	"log_interval": 10,
	"max_iters": 600000,
	"mlp_ratio": 4,
	"model_type": "gpt2",
	"n_embd": 256,
	"n_head": 4,
	"n_layer": 8,
	"out_path": "models/axon-test/",
	"rotary_embeddings": true,
	"transformers_version": "4.41.2",
	"use_cache": true,
	"vocab_size": 50304
	}