File size: 806 Bytes
0498c37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import os
import sys
import torch
import torch.nn as nn

sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

from trellis2.quantization import quantize_model


def main():
    m = nn.Sequential(
        nn.Linear(16, 8),
        nn.ReLU(),
        nn.Linear(8, 4)
    )

    print('model loaded')
    quantize_model(m, bits=4, dtype=torch.float16)
    print('model quantized')

    x = torch.randn(2, 16)
    y = m(x)

    print('forward ok')
    print('output shape:', y.shape)
    print('output dtype:', y.dtype)
    print('output sample:', y[0].tolist())


if __name__ == '__main__':
    main()

import torch
print("allocated GB:", torch.cuda.memory_allocated() / 1024**3)
print("reserved GB:", torch.cuda.memory_reserved() / 1024**3)