File size: 8,766 Bytes
9bbe9c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88

 -- model.embed_tokens                         rfn_err: 0.000000   max_diff/norm: 0.000000   sqnr: 79.869484   cos_err: 0.000000
 -- model.layers.0                             rfn_err: 0.007858   max_diff/norm: 0.001212   sqnr: 43.922771   cos_err: 0.000015
 -- model.layers.1                             rfn_err: 0.011489   max_diff/norm: 0.003147   sqnr: 39.937154   cos_err: 0.000051
 -- model.layers.2                             rfn_err: 0.024680   max_diff/norm: 0.005199   sqnr: 33.424866   cos_err: 0.000133
 -- model.layers.3                             rfn_err: 0.047014   max_diff/norm: 0.005335   sqnr: 27.375450   cos_err: 0.000201
 -- model.layers.4                             rfn_err: 0.053175   max_diff/norm: 0.010376   sqnr: 26.392067   cos_err: 0.000264
 -- model.layers.5                             rfn_err: 0.101607   max_diff/norm: 0.439889   sqnr: 25.275748   cos_err: 0.000366
 -- model.layers.6                             rfn_err: 0.104424   max_diff/norm: 0.433924   sqnr: 23.747089   cos_err: 0.000484
 -- model.layers.7                             rfn_err: 0.104557   max_diff/norm: 0.431919   sqnr: 24.505606   cos_err: 0.000609
 -- model.layers.8                             rfn_err: 0.105434   max_diff/norm: 0.429296   sqnr: 23.243343   cos_err: 0.000926
 -- model.layers.9                             rfn_err: 0.106367   max_diff/norm: 0.423224   sqnr: 22.128818   cos_err: 0.001310
 -- model.layers.10                            rfn_err: 0.104368   max_diff/norm: 0.418102   sqnr: 22.921680   cos_err: 0.001477
 -- model.layers.11                            rfn_err: 0.105033   max_diff/norm: 0.413182   sqnr: 22.373919   cos_err: 0.001895
 -- model.layers.12                            rfn_err: 0.106147   max_diff/norm: 0.409659   sqnr: 21.870182   cos_err: 0.002353
 -- model.layers.13                            rfn_err: 0.107418   max_diff/norm: 0.406745   sqnr: 21.460371   cos_err: 0.002797
 -- model.layers.14                            rfn_err: 0.108870   max_diff/norm: 0.403091   sqnr: 21.103061   cos_err: 0.003244
 -- model.layers.15                            rfn_err: 0.112406   max_diff/norm: 0.397691   sqnr: 20.229972   cos_err: 0.003911
 -- model.layers.16                            rfn_err: 0.113939   max_diff/norm: 0.391588   sqnr: 19.953552   cos_err: 0.004338
 -- model.layers.17                            rfn_err: 0.118850   max_diff/norm: 0.387389   sqnr: 19.038787   cos_err: 0.005121
 -- model.layers.18                            rfn_err: 0.122696   max_diff/norm: 0.382501   sqnr: 18.479936   cos_err: 0.006249
 -- model.layers.19                            rfn_err: 0.126383   max_diff/norm: 0.375111   sqnr: 18.069667   cos_err: 0.007387
 -- model.layers.20                            rfn_err: 0.133546   max_diff/norm: 0.371628   sqnr: 17.313982   cos_err: 0.008723
 -- model.layers.21                            rfn_err: 0.139186   max_diff/norm: 0.355202   sqnr: 16.785980   cos_err: 0.010051
 -- model.layers.22                            rfn_err: 0.146212   max_diff/norm: 0.354242   sqnr: 16.204125   cos_err: 0.011832
 -- model.layers.23                            rfn_err: 0.153624   max_diff/norm: 0.354606   sqnr: 15.559332   cos_err: 0.014324
 -- model.layers.24                            rfn_err: 0.167379   max_diff/norm: 0.376240   sqnr: 14.823083   cos_err: 0.017401
 -- model.layers.25                            rfn_err: 0.178808   max_diff/norm: 0.371875   sqnr: 14.015634   cos_err: 0.021692
 -- model.layers.26                            rfn_err: 0.191962   max_diff/norm: 0.371208   sqnr: 13.135830   cos_err: 0.027272
 -- model.layers.27                            rfn_err: 0.208002   max_diff/norm: 0.363041   sqnr: 12.251230   cos_err: 0.034009
 -- model.layers.28                            rfn_err: 0.237604   max_diff/norm: 0.358428   sqnr: 11.404306   cos_err: 0.042369
 -- model.layers.29                            rfn_err: 0.254413   max_diff/norm: 0.353930   sqnr: 10.800144   cos_err: 0.048954
 -- model.layers.30                            rfn_err: 0.276157   max_diff/norm: 0.345944   sqnr:  9.982429   cos_err: 0.058998
 -- model.layers.31                            rfn_err: 0.289523   max_diff/norm: 0.339725   sqnr:  9.545339   cos_err: 0.065212
 -- model.layers.32                            rfn_err: 0.310718   max_diff/norm: 0.328324   sqnr:  9.050561   cos_err: 0.072135
 -- model.layers.33                            rfn_err: 0.325859   max_diff/norm: 0.321673   sqnr:  8.594002   cos_err: 0.080117
 -- model.layers.34                            rfn_err: 0.336873   max_diff/norm: 0.311122   sqnr:  8.352301   cos_err: 0.084569
 -- model.layers.35                            rfn_err: 0.346817   max_diff/norm: 0.300775   sqnr:  8.178503   cos_err: 0.087901
 -- model.layers.36                            rfn_err: 0.364598   max_diff/norm: 0.298362   sqnr:  7.717051   cos_err: 0.097876
 -- model.layers.37                            rfn_err: 0.370055   max_diff/norm: 0.282943   sqnr:  7.747602   cos_err: 0.097190
 -- model.layers.38                            rfn_err: 0.383731   max_diff/norm: 0.271891   sqnr:  7.462389   cos_err: 0.105123
 -- model.layers.39                            rfn_err: 0.387421   max_diff/norm: 0.258139   sqnr:  7.546127   cos_err: 0.101936
 -- model.layers.40                            rfn_err: 0.394746   max_diff/norm: 0.238546   sqnr:  7.460694   cos_err: 0.103462
 -- model.layers.41                            rfn_err: 0.402364   max_diff/norm: 0.203700   sqnr:  7.381044   cos_err: 0.105978
 -- model.layers.42                            rfn_err: 0.412670   max_diff/norm: 0.168166   sqnr:  7.233969   cos_err: 0.110305
 -- model.layers.43                            rfn_err: 0.415765   max_diff/norm: 0.144052   sqnr:  7.316041   cos_err: 0.107760
 -- model.layers.44                            rfn_err: 0.429482   max_diff/norm: 0.141583   sqnr:  7.099236   cos_err: 0.113699
 -- model.layers.45                            rfn_err: 0.438810   max_diff/norm: 0.134049   sqnr:  6.963944   cos_err: 0.117313
 -- model.layers.46                            rfn_err: 0.439827   max_diff/norm: 0.127399   sqnr:  7.015340   cos_err: 0.115621
 -- model.layers.47                            rfn_err: 0.449468   max_diff/norm: 0.121084   sqnr:  6.893901   cos_err: 0.120122
 -- model.layers.48                            rfn_err: 0.454131   max_diff/norm: 0.114432   sqnr:  6.845831   cos_err: 0.120953
 -- model.layers.49                            rfn_err: 0.457335   max_diff/norm: 0.107741   sqnr:  6.835199   cos_err: 0.122351
 -- model.layers.50                            rfn_err: 0.460975   max_diff/norm: 0.101064   sqnr:  6.794710   cos_err: 0.123616
 -- model.layers.51                            rfn_err: 0.468401   max_diff/norm: 0.094384   sqnr:  6.697549   cos_err: 0.127102
 -- model.layers.52                            rfn_err: 0.471488   max_diff/norm: 0.086993   sqnr:  6.672907   cos_err: 0.128079
 -- model.layers.53                            rfn_err: 0.476036   max_diff/norm: 0.082251   sqnr:  6.596959   cos_err: 0.130608
 -- model.layers.54                            rfn_err: 0.478482   max_diff/norm: 0.077461   sqnr:  6.566252   cos_err: 0.131370
 -- model.layers.55                            rfn_err: 0.484951   max_diff/norm: 0.075136   sqnr:  6.461519   cos_err: 0.135141
 -- model.layers.56                            rfn_err: 0.490010   max_diff/norm: 0.072084   sqnr:  6.370898   cos_err: 0.137315
 -- model.layers.57                            rfn_err: 0.493214   max_diff/norm: 0.065704   sqnr:  6.307768   cos_err: 0.137680
 -- model.layers.58                            rfn_err: 0.492374   max_diff/norm: 0.062835   sqnr:  6.277678   cos_err: 0.136408
 -- model.layers.59                            rfn_err: 0.494073   max_diff/norm: 0.059431   sqnr:  6.242959   cos_err: 0.134536
 -- model.layers.60                            rfn_err: 0.480946   max_diff/norm: 0.051310   sqnr:  6.505622   cos_err: 0.124897
 -- model.layers.61                            rfn_err: 0.450072   max_diff/norm: 0.107757   sqnr:  7.071674   cos_err: 0.111895
 -- model.norm                                 rfn_err: 0.483829   max_diff/norm: 0.010220   sqnr:  6.910045   cos_err: 0.119958
 -- A perplexity:  9.46492433
 -- B perplexity:  8.34981264
 -- A label in top-K:
      K = 1: 0.5363
      K = 2: 0.6564
      K = 3: 0.7155
      K = 4: 0.7527
      K = 5: 0.7792
 -- B label in top-K:
      K = 1: 0.5598
      K = 2: 0.6804
      K = 3: 0.7390
      K = 4: 0.7749
      K = 5: 0.8005
 -- Top-K agreement, A vs B:
      K = 1: 0.7699
      K = 2: 0.4340
      K = 3: 0.2006
      K = 4: 0.0796
      K = 5: 0.0289
 -- KL divergence (A, B):  0.36735150
 -- KL divergence (B, A):  0.42469226