File size: 8,766 Bytes
9bbe9c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88

 -- model.embed_tokens                         rfn_err: 0.000000   max_diff/norm: 0.000000   sqnr: 79.869484   cos_err: 0.000000
 -- model.layers.0                             rfn_err: 0.003394   max_diff/norm: 0.000607   sqnr: 51.644513   cos_err: 0.000004
 -- model.layers.1                             rfn_err: 0.005808   max_diff/norm: 0.002926   sqnr: 46.651903   cos_err: 0.000013
 -- model.layers.2                             rfn_err: 0.011415   max_diff/norm: 0.002718   sqnr: 40.951312   cos_err: 0.000040
 -- model.layers.3                             rfn_err: 0.015681   max_diff/norm: 0.003334   sqnr: 37.879271   cos_err: 0.000062
 -- model.layers.4                             rfn_err: 0.018357   max_diff/norm: 0.004838   sqnr: 36.169828   cos_err: 0.000083
 -- model.layers.5                             rfn_err: 0.023889   max_diff/norm: 0.160353   sqnr: 34.804823   cos_err: 0.000118
 -- model.layers.6                             rfn_err: 0.024955   max_diff/norm: 0.159496   sqnr: 33.171246   cos_err: 0.000159
 -- model.layers.7                             rfn_err: 0.025661   max_diff/norm: 0.157699   sqnr: 32.518122   cos_err: 0.000202
 -- model.layers.8                             rfn_err: 0.026866   max_diff/norm: 0.156315   sqnr: 31.303216   cos_err: 0.000301
 -- model.layers.9                             rfn_err: 0.028113   max_diff/norm: 0.153931   sqnr: 30.224139   cos_err: 0.000407
 -- model.layers.10                            rfn_err: 0.029038   max_diff/norm: 0.152014   sqnr: 29.747104   cos_err: 0.000474
 -- model.layers.11                            rfn_err: 0.030416   max_diff/norm: 0.149952   sqnr: 29.029571   cos_err: 0.000603
 -- model.layers.12                            rfn_err: 0.032240   max_diff/norm: 0.148527   sqnr: 28.188072   cos_err: 0.000743
 -- model.layers.13                            rfn_err: 0.034151   max_diff/norm: 0.147561   sqnr: 27.505001   cos_err: 0.000882
 -- model.layers.14                            rfn_err: 0.036222   max_diff/norm: 0.146296   sqnr: 26.820057   cos_err: 0.001026
 -- model.layers.15                            rfn_err: 0.038649   max_diff/norm: 0.144103   sqnr: 26.183311   cos_err: 0.001212
 -- model.layers.16                            rfn_err: 0.040821   max_diff/norm: 0.141321   sqnr: 25.734528   cos_err: 0.001330
 -- model.layers.17                            rfn_err: 0.043610   max_diff/norm: 0.140069   sqnr: 25.100897   cos_err: 0.001547
 -- model.layers.18                            rfn_err: 0.046721   max_diff/norm: 0.138471   sqnr: 24.456386   cos_err: 0.001895
 -- model.layers.19                            rfn_err: 0.050462   max_diff/norm: 0.137175   sqnr: 23.788862   cos_err: 0.002255
 -- model.layers.20                            rfn_err: 0.054798   max_diff/norm: 0.134785   sqnr: 23.108732   cos_err: 0.002650
 -- model.layers.21                            rfn_err: 0.058880   max_diff/norm: 0.133462   sqnr: 22.572879   cos_err: 0.003073
 -- model.layers.22                            rfn_err: 0.064044   max_diff/norm: 0.131656   sqnr: 21.904474   cos_err: 0.003659
 -- model.layers.23                            rfn_err: 0.070224   max_diff/norm: 0.131168   sqnr: 21.062901   cos_err: 0.004548
 -- model.layers.24                            rfn_err: 0.077352   max_diff/norm: 0.127912   sqnr: 20.287736   cos_err: 0.005605
 -- model.layers.25                            rfn_err: 0.086052   max_diff/norm: 0.127358   sqnr: 19.350536   cos_err: 0.007216
 -- model.layers.26                            rfn_err: 0.097367   max_diff/norm: 0.127782   sqnr: 18.169144   cos_err: 0.009497
 -- model.layers.27                            rfn_err: 0.110078   max_diff/norm: 0.127504   sqnr: 17.233901   cos_err: 0.012311
 -- model.layers.28                            rfn_err: 0.141438   max_diff/norm: 0.124777   sqnr: 16.252857   cos_err: 0.016206
 -- model.layers.29                            rfn_err: 0.155649   max_diff/norm: 0.123986   sqnr: 15.576786   cos_err: 0.019012
 -- model.layers.30                            rfn_err: 0.171725   max_diff/norm: 0.122313   sqnr: 14.626882   cos_err: 0.023428
 -- model.layers.31                            rfn_err: 0.181566   max_diff/norm: 0.119922   sqnr: 14.148360   cos_err: 0.026096
 -- model.layers.32                            rfn_err: 0.198945   max_diff/norm: 0.117497   sqnr: 13.547689   cos_err: 0.029230
 -- model.layers.33                            rfn_err: 0.209506   max_diff/norm: 0.115155   sqnr: 13.038629   cos_err: 0.032930
 -- model.layers.34                            rfn_err: 0.216794   max_diff/norm: 0.112269   sqnr: 12.763568   cos_err: 0.034920
 -- model.layers.35                            rfn_err: 0.223848   max_diff/norm: 0.108764   sqnr: 12.565271   cos_err: 0.036753
 -- model.layers.36                            rfn_err: 0.237001   max_diff/norm: 0.107344   sqnr: 12.045963   cos_err: 0.041433
 -- model.layers.37                            rfn_err: 0.240391   max_diff/norm: 0.103187   sqnr: 12.052502   cos_err: 0.041365
 -- model.layers.38                            rfn_err: 0.249047   max_diff/norm: 0.099769   sqnr: 11.780027   cos_err: 0.045215
 -- model.layers.39                            rfn_err: 0.251593   max_diff/norm: 0.094690   sqnr: 11.874520   cos_err: 0.043901
 -- model.layers.40                            rfn_err: 0.256486   max_diff/norm: 0.092512   sqnr: 11.783108   cos_err: 0.044108
 -- model.layers.41                            rfn_err: 0.261185   max_diff/norm: 0.087722   sqnr: 11.704016   cos_err: 0.045681
 -- model.layers.42                            rfn_err: 0.269034   max_diff/norm: 0.084542   sqnr: 11.544741   cos_err: 0.048144
 -- model.layers.43                            rfn_err: 0.271123   max_diff/norm: 0.079522   sqnr: 11.648045   cos_err: 0.046260
 -- model.layers.44                            rfn_err: 0.281021   max_diff/norm: 0.076279   sqnr: 11.425829   cos_err: 0.049241
 -- model.layers.45                            rfn_err: 0.287677   max_diff/norm: 0.073810   sqnr: 11.279177   cos_err: 0.050887
 -- model.layers.46                            rfn_err: 0.288193   max_diff/norm: 0.069342   sqnr: 11.328149   cos_err: 0.050074
 -- model.layers.47                            rfn_err: 0.296293   max_diff/norm: 0.066391   sqnr: 11.184084   cos_err: 0.052388
 -- model.layers.48                            rfn_err: 0.299036   max_diff/norm: 0.063632   sqnr: 11.137132   cos_err: 0.052699
 -- model.layers.49                            rfn_err: 0.301785   max_diff/norm: 0.060063   sqnr: 11.114139   cos_err: 0.053790
 -- model.layers.50                            rfn_err: 0.304180   max_diff/norm: 0.057226   sqnr: 11.059243   cos_err: 0.054579
 -- model.layers.51                            rfn_err: 0.309586   max_diff/norm: 0.054422   sqnr: 10.955227   cos_err: 0.056505
 -- model.layers.52                            rfn_err: 0.311136   max_diff/norm: 0.050588   sqnr: 10.931107   cos_err: 0.057045
 -- model.layers.53                            rfn_err: 0.313254   max_diff/norm: 0.048196   sqnr: 10.851354   cos_err: 0.058378
 -- model.layers.54                            rfn_err: 0.315019   max_diff/norm: 0.045797   sqnr: 10.793363   cos_err: 0.058742
 -- model.layers.55                            rfn_err: 0.319939   max_diff/norm: 0.044467   sqnr: 10.657039   cos_err: 0.060996
 -- model.layers.56                            rfn_err: 0.322801   max_diff/norm: 0.041824   sqnr: 10.549990   cos_err: 0.061744
 -- model.layers.57                            rfn_err: 0.322995   max_diff/norm: 0.039723   sqnr: 10.481810   cos_err: 0.061341
 -- model.layers.58                            rfn_err: 0.318804   max_diff/norm: 0.038624   sqnr: 10.458463   cos_err: 0.059246
 -- model.layers.59                            rfn_err: 0.316041   max_diff/norm: 0.036968   sqnr: 10.455861   cos_err: 0.056453
 -- model.layers.60                            rfn_err: 0.305529   max_diff/norm: 0.032550   sqnr: 10.742234   cos_err: 0.051024
 -- model.layers.61                            rfn_err: 0.282084   max_diff/norm: 0.061164   sqnr: 11.445191   cos_err: 0.046361
 -- model.norm                                 rfn_err: 0.312897   max_diff/norm: 0.009568   sqnr: 11.155214   cos_err: 0.050396
 -- A perplexity:  8.74921130
 -- B perplexity:  8.34981264
 -- A label in top-K:
      K = 1: 0.5544
      K = 2: 0.6738
      K = 3: 0.7339
      K = 4: 0.7703
      K = 5: 0.7952
 -- B label in top-K:
      K = 1: 0.5598
      K = 2: 0.6804
      K = 3: 0.7390
      K = 4: 0.7749
      K = 5: 0.8005
 -- Top-K agreement, A vs B:
      K = 1: 0.8640
      K = 2: 0.6125
      K = 3: 0.3773
      K = 4: 0.2072
      K = 5: 0.1040
 -- KL divergence (A, B):  0.14842009
 -- KL divergence (B, A):  0.15566614