D0TheMath
/

saved_model

Model card Files Files and versions

saved_model / config.json

D0TheMath's picture

Upload folder using huggingface_hub

2f7a9d2 verified over 1 year ago

history blame contribute delete

1.68 kB

	{
	"n_layers": 2,
	"d_model": 256,
	"n_ctx": 1024,
	"d_head": 32,
	"model_name": "custom",
	"n_heads": 8,
	"d_mlp": null,
	"act_fn": null,
	"d_vocab": 5000,
	"eps": 1e-05,
	"use_attn_result": false,
	"use_attn_scale": true,
	"attn_scale": 5.656854249492381,
	"use_split_qkv_input": false,
	"use_hook_mlp_in": false,
	"use_attn_in": false,
	"use_local_attn": false,
	"original_architecture": null,
	"from_checkpoint": false,
	"checkpoint_index": null,
	"checkpoint_label_type": null,
	"checkpoint_value": null,
	"tokenizer_name": "georgeyw/TinyStories-tokenizer-5k",
	"window_size": null,
	"attn_types": null,
	"init_mode": "gpt2",
	"normalization_type": "LN",
	"device": "cuda",
	"n_devices": 1,
	"attention_dir": "causal",
	"attn_only": true,
	"seed": 1,
	"initializer_range": 0.05,
	"init_weights": true,
	"scale_attn_by_inverse_layer_idx": false,
	"positional_embedding_type": "shortformer",
	"final_rms": false,
	"d_vocab_out": 5000,
	"parallel_attn_mlp": false,
	"rotary_dim": null,
	"n_params": 524288,
	"use_hook_tokens": false,
	"gated_mlp": false,
	"default_prepend_bos": true,
	"dtype": "torch.float32",
	"tokenizer_prepends_bos": false,
	"n_key_value_heads": null,
	"post_embedding_ln": false,
	"rotary_base": 10000,
	"trust_remote_code": false,
	"rotary_adjacent_pairs": false,
	"load_in_4bit": false,
	"num_experts": null,
	"experts_per_token": null,
	"relative_attention_max_distance": null,
	"relative_attention_num_buckets": null,
	"decoder_start_token_id": null,
	"tie_word_embeddings": false,
	"use_normalization_before_and_after": false,
	"attn_scores_soft_cap": -1.0,
	"output_logits_soft_cap": -1.0
	}