File size: 2,241 Bytes
7a92993
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
"""
Test script for GptOssDense model with trust_remote_code=True
"""
from transformers import AutoConfig, AutoModelForCausalLM
import torch

# Test 1: Load config from Hub
print("=" * 60)
print("Test 1: Loading config from Hub")
print("=" * 60)
config = AutoConfig.from_pretrained(
    'marksverdhei/gpt-oss-dense',
    trust_remote_code=True
)
print(f"✓ Config loaded: {type(config).__name__}")
print(f"  Model type: {config.model_type}")
print(f"  Hidden size: {config.hidden_size}")
print(f"  Num layers: {config.num_hidden_layers}")
print(f"  Intermediate size: {config.intermediate_size}")
print(f"  Num attention heads: {config.num_attention_heads}")

# Test 2: Initialize model from config
print("\n" + "=" * 60)
print("Test 2: Initializing model from config")
print("=" * 60)
model = AutoModelForCausalLM.from_config(
    config,
    trust_remote_code=True
)
print(f"✓ Model initialized: {type(model).__name__}")

# Test 3: Verify MLP structure
print("\n" + "=" * 60)
print("Test 3: Verifying MLP structure (Dense, not MoE)")
print("=" * 60)
mlp = model.model.layers[0].mlp
print(f"MLP type: {type(mlp).__name__}")
print(f"  Has router: {hasattr(mlp, 'router')}")
print(f"  Has experts: {hasattr(mlp, 'experts')}")
print(f"  Has gate_up_proj: {hasattr(mlp, 'gate_up_proj')}")
print(f"  Has down_proj: {hasattr(mlp, 'down_proj')}")
print(f"  Alpha (GLU): {mlp.alpha}")
print(f"  Limit (clamping): {mlp.limit}")

# Test 4: Forward pass
print("\n" + "=" * 60)
print("Test 4: Running forward pass")
print("=" * 60)
input_ids = torch.randint(0, config.vocab_size, (2, 16))
model.eval()
with torch.no_grad():
    outputs = model(input_ids)
print(f"✓ Forward pass successful")
print(f"  Input shape: {input_ids.shape}")
print(f"  Output shape: {outputs.logits.shape}")

# Test 5: Parameter count
print("\n" + "=" * 60)
print("Test 5: Model parameters")
print("=" * 60)
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params:,}")

print("\n" + "=" * 60)
print("✅ All tests passed!")
print("=" * 60)
print("\nTo save the model weights:")
print("  model.save_pretrained('/path/to/save')")
print("  # Then upload to Hub with: huggingface-cli upload marksverdhei/gpt-oss-dense /path/to/save")