SymbolicLight-V1 / artifacts /same_scale_reference_benchmark_200.json
symboliclight-ai's picture
Upload 2 files
f8d54aa verified
{
"benchmark_type": "reference_only_same_scale_base_lm",
"important_caveat": "Dense public models use different corpora, tokenizers, and training schedules. These numbers are not controlled training-budget comparisons.",
"settings": {
"max_wikitext_chars": 50000,
"max_lambada_docs": 200,
"max_mc_docs": 200
},
"results": [
{
"model": "SymbolicLight-V1-0.8B",
"params": 873668135,
"device": "cuda",
"dtype": "fp16",
"metrics": {
"wikitext2": {
"nll": 40508.8044,
"tokens": 12388,
"ppl": 26.3114,
"bits_per_byte": 1.165,
"chars": 50000
},
"lambada_ppl": {
"nll": 64891.4496,
"tokens": 18218,
"ppl": 35.2315,
"bits_per_byte": 1.4056,
"docs": 200
},
"sciq": {
"accuracy": 0.52,
"correct": 104,
"total": 200
},
"arc_easy": {
"accuracy": 0.415,
"correct": 83,
"total": 200
},
"hellaswag": {
"accuracy": 0.36,
"correct": 72,
"total": 200
},
"elapsed_sec": 210.3
}
},
{
"model": "gpt2-large",
"params": 774030080,
"device": "cuda",
"dtype": "fp16",
"metrics": {
"wikitext2": {
"nll": 36412.3386,
"tokens": 11308,
"ppl": 25.0294,
"bits_per_byte": 1.0472,
"chars": 50000
},
"lambada_ppl": {
"nll": 59116.0558,
"tokens": 17378,
"ppl": 30.0174,
"bits_per_byte": 1.2805,
"docs": 200
},
"sciq": {
"accuracy": 0.525,
"correct": 105,
"total": 200
},
"arc_easy": {
"accuracy": 0.4,
"correct": 80,
"total": 200
},
"hellaswag": {
"accuracy": 0.395,
"correct": 79,
"total": 200
},
"elapsed_sec": 92.05
}
},
{
"model": "EleutherAI/pythia-1b",
"params": 1011781632,
"device": "cuda",
"dtype": "fp16",
"metrics": {
"wikitext2": {
"nll": 33414.1723,
"tokens": 11354,
"ppl": 18.9716,
"bits_per_byte": 0.9609,
"chars": 50000
},
"lambada_ppl": {
"nll": 52602.2452,
"tokens": 16843,
"ppl": 22.7165,
"bits_per_byte": 1.1394,
"docs": 200
},
"sciq": {
"accuracy": 0.565,
"correct": 113,
"total": 200
},
"arc_easy": {
"accuracy": 0.435,
"correct": 87,
"total": 200
},
"hellaswag": {
"accuracy": 0.395,
"correct": 79,
"total": 200
},
"elapsed_sec": 58.8
}
}
]
}