|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import pytest |
|
|
|
|
|
from nemo.utils.flops_formulas import FLOPSConfig, bert, gpt3, llama2, llama3, mixtral, nemotron, transformer |
|
|
from nemo.utils.hyena_flops_formulas import hyena |
|
|
|
|
|
|
|
|
@pytest.fixture |
|
|
def flops_config(): |
|
|
return FLOPSConfig( |
|
|
gbs=1, |
|
|
enc_seq_len=128, |
|
|
hs=768, |
|
|
layers=12, |
|
|
ffn_hs=3072, |
|
|
attention_heads=12, |
|
|
moe_router_topk=2, |
|
|
query_groups=12, |
|
|
vocab_size=50257, |
|
|
model_pattern="SDH*", |
|
|
) |
|
|
|
|
|
|
|
|
def test_gpt3(flops_config): |
|
|
expected_flops = 97240743936 |
|
|
assert gpt3(flops_config) == expected_flops |
|
|
|
|
|
|
|
|
def test_llama2(flops_config): |
|
|
expected_flops = 107659395072.0 |
|
|
assert llama2(flops_config) == expected_flops |
|
|
|
|
|
|
|
|
def test_llama3(flops_config): |
|
|
expected_flops = 164433494016.0 |
|
|
assert llama3(flops_config) == expected_flops |
|
|
|
|
|
|
|
|
def test_nemotron(flops_config): |
|
|
expected_flops = 218036699136.0 |
|
|
assert nemotron(flops_config) == expected_flops |
|
|
|
|
|
|
|
|
def test_mixtral(flops_config): |
|
|
expected_flops = 172889210880.0 |
|
|
assert mixtral(flops_config) == expected_flops |
|
|
|
|
|
|
|
|
def test_bert(flops_config): |
|
|
expected_flops = 84146651135.99998 |
|
|
assert bert(flops_config) == expected_flops |
|
|
|
|
|
|
|
|
def test_hyena(flops_config): |
|
|
expected_flops = 116883062784.0 |
|
|
assert hyena(flops_config) == expected_flops |
|
|
|
|
|
|
|
|
def test_transformer(flops_config): |
|
|
expected_flops = 118427811840.0 |
|
|
assert transformer(flops_config) == expected_flops |
|
|
|
|
|
def test_transformer_no_moe(flops_config): |
|
|
flops_config.moe_router_topk = 0 |
|
|
expected_flops = 96684539904.0 |
|
|
assert transformer(flops_config) == expected_flops |
|
|
|