--- license: apache-2.0 --- ```python """Create a tiny random Glm4Moe model for testing optimum-intel export.""" import torch from transformers import AutoTokenizer from transformers.models.glm4_moe.modeling_glm4_moe import Glm4MoeForCausalLM, Glm4MoeConfig def create_tiny_glm4_moe(): config = Glm4MoeConfig( vocab_size=1000, hidden_size=64, intermediate_size=128, num_hidden_layers=2, num_attention_heads=4, num_key_value_heads=4, hidden_act="silu", max_position_embeddings=256, rms_norm_eps=1e-5, n_routed_experts=4, n_shared_experts=1, num_experts_per_tok=2, moe_intermediate_size=32, first_k_dense_replace=1, n_group=1, topk_group=1, norm_topk_prob=True, routed_scaling_factor=1.8, topk_method="noaux_tc", rope_theta=10000, tie_word_embeddings=False, ) model = Glm4MoeForCausalLM(config) model.eval() # Verify model works input_ids = torch.randint(0, 1000, (1, 10)) with torch.no_grad(): outputs = model(input_ids) print(f"Model output shape: {outputs.logits.shape}") print(f"Num parameters: {sum(p.numel() for p in model.parameters()):,}") # Save model output_dir = "tiny-random-glm4-moe" model.save_pretrained(output_dir) # Create and save a simple tokenizer from transformers import PreTrainedTokenizerFast from tokenizers import Tokenizer, models, pre_tokenizers tokenizer_model = models.WordPiece( vocab={f"token_{i}": i for i in range(1000)}, unk_token="token_0", ) base_tokenizer = Tokenizer(tokenizer_model) base_tokenizer.pre_tokenizer = pre_tokenizers.Whitespace() tokenizer = PreTrainedTokenizerFast( tokenizer_object=base_tokenizer, unk_token="token_0", pad_token="token_0", eos_token="token_1", bos_token="token_2", ) tokenizer.save_pretrained(output_dir) print(f"Model saved to {output_dir}") return model, config if __name__ == "__main__": create_tiny_glm4_moe() ```