| import torch; | |
| import habana_frameworks.torch.core as htcore | |
| msize=2 | |
| A = torch.randn(msize,msize,dtype=torch.bfloat16).to("hpu") | |
| B = torch.randn(msize,msize,dtype=torch.bfloat16).to("hpu") | |
| C = torch.randn(msize,msize,dtype=torch.bfloat16).to("hpu") | |
| torch.matmul(A,B,out=C) | |
| torch.hpu.synchronize() | |
| R = C.to("cpu") | |
| print(A) | |
| print(B) | |
| print(C) | |
| print(R) | |