| | import pytest |
| |
|
| | from lm_eval import evaluator |
| | from lm_eval.api.registry import get_model |
| |
|
| |
|
| | SPARSEML_MODELS_TASKS = [ |
| | |
| | ("facebook/opt-125m", "lambada_openai"), |
| | |
| | ("hf-internal-testing/tiny-random-gpt2", "wikitext"), |
| | |
| | ("mgoin/tiny-random-llama-2-quant", "gsm8k"), |
| | ] |
| |
|
| | DEEPSPARSE_MODELS_TASKS = [ |
| | |
| | ("hf:mgoin/llama2.c-stories15M-quant-ds", "lambada_openai"), |
| | |
| | |
| | |
| | ("hf:mgoin/llama2.c-stories15M-quant-ds", "gsm8k"), |
| | ] |
| |
|
| |
|
| | @pytest.mark.skip(reason="test failing") |
| | @pytest.mark.parametrize("model_id,task", SPARSEML_MODELS_TASKS) |
| | def test_sparseml_eval(model_id, task): |
| | lm = get_model("sparseml").create_from_arg_string( |
| | f"pretrained={model_id}", |
| | { |
| | "batch_size": 1, |
| | "device": "cpu", |
| | "dtype": "float32", |
| | }, |
| | ) |
| |
|
| | limit = 5 |
| | evaluator.simple_evaluate( |
| | model=lm, |
| | tasks=[task], |
| | num_fewshot=0, |
| | limit=limit, |
| | ) |
| |
|
| |
|
| | @pytest.mark.parametrize("model_id,task", DEEPSPARSE_MODELS_TASKS) |
| | def test_deepsparse_eval(model_id, task): |
| | lm = get_model("deepsparse").create_from_arg_string( |
| | f"pretrained={model_id}", |
| | { |
| | "batch_size": 1, |
| | }, |
| | ) |
| |
|
| | limit = 5 |
| | evaluator.simple_evaluate( |
| | model=lm, |
| | tasks=[task], |
| | num_fewshot=0, |
| | limit=limit, |
| | ) |
| |
|