| | """ |
| | Test script for GptOssDense model with trust_remote_code=True |
| | """ |
| | from transformers import AutoConfig, AutoModelForCausalLM |
| | import torch |
| |
|
| | |
| | print("=" * 60) |
| | print("Test 1: Loading config from Hub") |
| | print("=" * 60) |
| | config = AutoConfig.from_pretrained( |
| | 'marksverdhei/gpt-oss-dense', |
| | trust_remote_code=True |
| | ) |
| | print(f"✓ Config loaded: {type(config).__name__}") |
| | print(f" Model type: {config.model_type}") |
| | print(f" Hidden size: {config.hidden_size}") |
| | print(f" Num layers: {config.num_hidden_layers}") |
| | print(f" Intermediate size: {config.intermediate_size}") |
| | print(f" Num attention heads: {config.num_attention_heads}") |
| |
|
| | |
| | print("\n" + "=" * 60) |
| | print("Test 2: Initializing model from config") |
| | print("=" * 60) |
| | model = AutoModelForCausalLM.from_config( |
| | config, |
| | trust_remote_code=True |
| | ) |
| | print(f"✓ Model initialized: {type(model).__name__}") |
| |
|
| | |
| | print("\n" + "=" * 60) |
| | print("Test 3: Verifying MLP structure (Dense, not MoE)") |
| | print("=" * 60) |
| | mlp = model.model.layers[0].mlp |
| | print(f"MLP type: {type(mlp).__name__}") |
| | print(f" Has router: {hasattr(mlp, 'router')}") |
| | print(f" Has experts: {hasattr(mlp, 'experts')}") |
| | print(f" Has gate_up_proj: {hasattr(mlp, 'gate_up_proj')}") |
| | print(f" Has down_proj: {hasattr(mlp, 'down_proj')}") |
| | print(f" Alpha (GLU): {mlp.alpha}") |
| | print(f" Limit (clamping): {mlp.limit}") |
| |
|
| | |
| | print("\n" + "=" * 60) |
| | print("Test 4: Running forward pass") |
| | print("=" * 60) |
| | input_ids = torch.randint(0, config.vocab_size, (2, 16)) |
| | model.eval() |
| | with torch.no_grad(): |
| | outputs = model(input_ids) |
| | print(f"✓ Forward pass successful") |
| | print(f" Input shape: {input_ids.shape}") |
| | print(f" Output shape: {outputs.logits.shape}") |
| |
|
| | |
| | print("\n" + "=" * 60) |
| | print("Test 5: Model parameters") |
| | print("=" * 60) |
| | total_params = sum(p.numel() for p in model.parameters()) |
| | print(f"Total parameters: {total_params:,}") |
| |
|
| | print("\n" + "=" * 60) |
| | print("✅ All tests passed!") |
| | print("=" * 60) |
| | print("\nTo save the model weights:") |
| | print(" model.save_pretrained('/path/to/save')") |
| | print(" # Then upload to Hub with: huggingface-cli upload marksverdhei/gpt-oss-dense /path/to/save") |
| |
|