| """ |
| used for debug using tensor comparison |
| dump {name: tensor} into "log_hf.jsonl" and "log_srt.jsonl" |
| use the same name for two tensors that supposed to be close |
| recommend name like: "layer 2 after mlp" |
| """ |
|
|
| import json |
| import sys |
|
|
| import torch |
|
|
| if len(sys.argv) > 1: |
| assert sys.argv[1] == "base" |
| hf_log = "base_log_hf.jsonl" |
| srt_log = "base_log_srt.jsonl" |
| else: |
| hf_log = "log_hf.jsonl" |
| srt_log = "log_srt.jsonl" |
|
|
|
|
| def load_data(filepath): |
| tensors = {} |
| with open(filepath, "r") as f: |
| lines = f.readlines() |
| for line in lines: |
| data = json.loads(line) |
| for k, v in data.items(): |
| tensors[k] = torch.tensor(v) |
| return tensors |
|
|
|
|
| hf_tensors = load_data(hf_log) |
| srt_tensors = load_data(srt_log) |
|
|
|
|
| def get_diff(t1, t2): |
| t1 = t1.reshape(t2.shape) |
| max_diff = torch.max(abs(t1.reshape(t2.shape) - t2)) |
| l2_dis = torch.dist(t1, t2, p=2) |
| return l2_dis, max_diff |
|
|
|
|
| for k, _ in srt_tensors.items(): |
| l2_dis, max_diff = get_diff(hf_tensors[k], srt_tensors[k]) |
| print(f"{k} {l2_dis=} {max_diff=}") |
| if k == "layer 1 attn": |
| print(hf_tensors[k]) |
| print(srt_tensors[k]) |
| if k == "layer 0 prefill k": |
| print(srt_tensors[k].shape) |
| print(hf_tensors[k].shape) |
|
|