File size: 2,632 Bytes
ca97aa9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import { LlamaTokenizer, LlamaForCausalLM } from "../../../src/transformers.js";

import { MAX_MODEL_LOAD_TIME, MAX_TEST_EXECUTION_TIME, MAX_MODEL_DISPOSE_TIME, MAX_TEST_TIME, DEFAULT_MODEL_OPTIONS } from "../../init.js";

export default () => {
  describe("LlamaForCausalLM", () => {
    const model_id = "hf-internal-testing/tiny-random-LlamaForCausalLM";
    /** @type {LlamaForCausalLM} */
    let model;
    /** @type {LlamaTokenizer} */
    let tokenizer;
    beforeAll(async () => {
      model = await LlamaForCausalLM.from_pretrained(model_id, DEFAULT_MODEL_OPTIONS);
      tokenizer = await LlamaTokenizer.from_pretrained(model_id);
    }, MAX_MODEL_LOAD_TIME);

    it(
      "batch_size=1",
      async () => {
        const inputs = tokenizer("hello");
        const outputs = await model.generate({
          ...inputs,
          max_length: 10,
        });
        expect(outputs.tolist()).toEqual([[1n, 22172n, 18547n, 8143n, 22202n, 9456n, 17213n, 15330n, 26591n, 15721n]]);
      },
      MAX_TEST_EXECUTION_TIME,
    );

    it(
      "batch_size>1",
      async () => {
        const inputs = tokenizer(["hello", "hello world"], { padding: true });
        const outputs = await model.generate({
          ...inputs,
          max_length: 10,
        });
        expect(outputs.tolist()).toEqual([
          [0n, 1n, 22172n, 18547n, 8143n, 22202n, 9456n, 17213n, 15330n, 26591n],
          [1n, 22172n, 3186n, 24786n, 19169n, 20222n, 29993n, 27146n, 27426n, 24562n],
        ]);
      },
      MAX_TEST_EXECUTION_TIME,
    );

    afterAll(async () => {
      await model?.dispose();
    }, MAX_MODEL_DISPOSE_TIME);
  });

  describe("LlamaForCausalLM (onnxruntime-genai)", () => {
    const model_id = "onnx-community/tiny-random-LlamaForCausalLM-ONNX";
    /** @type {LlamaTokenizer} */
    let tokenizer;
    let inputs;
    beforeAll(async () => {
      tokenizer = await LlamaTokenizer.from_pretrained(model_id);
      inputs = tokenizer("hello");
    }, MAX_MODEL_LOAD_TIME);

    const dtypes = ["fp32", "fp16", "q4", "q4f16"];

    for (const dtype of dtypes) {
      it(
        `dtype=${dtype}`,
        async () => {
          /** @type {LlamaForCausalLM} */
          const model = await LlamaForCausalLM.from_pretrained(model_id, {
            ...DEFAULT_MODEL_OPTIONS,
            dtype,
          });

          const outputs = await model.generate({
            ...inputs,
            max_length: 5,
          });
          expect(outputs.tolist()).toEqual([[128000n, 15339n, 15339n, 15339n, 15339n]]);

          await model?.dispose();
        },
        MAX_TEST_TIME,
      );
    }
  });
};