DevHunterAI
/

RubiRLM-1B-Base

Text Generation

Mixture of Experts

Model card Files Files and versions

RubiRLM-1B-Base / rubirlm_config.json

DevHunterAI's picture

Upload folder using huggingface_hub

cd16f07 verified about 1 month ago

history blame contribute delete

701 Bytes

	{
	"vocab_size": 50257,
	"max_seq_len": 2048,
	"d_model": 1024,
	"n_layers": 10,
	"n_heads": 16,
	"ff_mult": 4,
	"dropout": 0.1,
	"recurse_steps": 6,
	"critique_threshold": 0.2,
	"tie_embeddings": true,
	"use_moe": true,
	"moe_num_experts": 32,
	"moe_top_k": 1,
	"moe_expert_hidden": 1280,
	"moe_router_jitter": 0.01,
	"moe_aux_loss_weight": 0.01,
	"use_layer_skip": true,
	"layer_skip_threshold": 0.8,
	"layer_skip_target": 0.03,
	"layer_skip_aux_weight": 0.01,
	"use_ternary_weights": true,
	"use_flash_attention": true,
	"use_fused_ops": true,
	"packed_execution": true,
	"use_torch_compile": false,
	"moe_backend": "auto",
	"moe_ep_size": 1
	}