han-cai commited on
Commit
b84fa10
·
verified ·
1 Parent(s): 7ce7c82

Upload params.csv with huggingface_hub

Browse files
Files changed (1) hide show
  1. params.csv +157 -0
params.csv ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,optimizer,lr,wd,shape,mean,std,first_several_values
2
+ _fsdp_wrapped_module.blocks.0.attns.full_attention.qkv.weight,0,1e-06,0.0001,[1769472],4.49E-05,7.66E-02,"-0.0468, 0.0027, -0.0178, 0.0090, -0.0706, 0.0272, 0.0123, 0.0356, -0.0142, -0.0887"
3
+ _fsdp_wrapped_module.blocks.0.attns.full_attention.proj.weight,0,1e-06,0.0001,[589824],2.58E-05,2.88E-02,"-0.0187, -0.0622, 0.0098, -0.0118, 0.0009, -0.0168, 0.0077, -0.0037, -0.0219, 0.0027"
4
+ _fsdp_wrapped_module.blocks.0.attns.linear_attention_with_dynamic_dwc_on_v.qkv.weight,0,1e-06,0.0001,[1769472],4.49E-05,7.66E-02,"-0.0468, 0.0027, -0.0178, 0.0090, -0.0706, 0.0272, 0.0123, 0.0356, -0.0142, -0.0887"
5
+ _fsdp_wrapped_module.blocks.0.attns.linear_attention_with_dynamic_dwc_on_v.proj.weight,0,1e-06,0.0001,[589824],2.58E-05,2.88E-02,"-0.0187, -0.0622, 0.0098, -0.0118, 0.0009, -0.0168, 0.0077, -0.0037, -0.0219, 0.0027"
6
+ _fsdp_wrapped_module.blocks.0.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv1.weight,0,1e-06,0.0001,[1024],2.49E-03,1.58E-01,"-0.1403, -0.1711, -0.1295, 0.1673, 0.0430, -0.2659, 0.2373, -0.1201, -0.1575, 0.2264"
7
+ _fsdp_wrapped_module.blocks.0.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.weight,0,1e-06,0.0001,[9216],0.00E+00,0.00E+00,"0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000"
8
+ _fsdp_wrapped_module.blocks.1.attns.full_attention.qkv.weight,0,1e-06,0.0001,[1769472],9.03E-05,5.90E-02,"-0.0524, 0.0384, -0.0355, 0.0186, -0.0357, -0.0100, -0.0494, -0.0442, 0.0405, 0.0040"
9
+ _fsdp_wrapped_module.blocks.1.attns.full_attention.proj.weight,0,1e-06,0.0001,[589824],-1.42E-05,5.91E-02,"-0.0249, 0.0035, 0.0109, 0.0591, -0.0072, -0.0096, -0.0327, -0.0125, -0.0278, -0.0273"
10
+ _fsdp_wrapped_module.blocks.1.attns.linear_attention_with_dynamic_dwc_on_v.qkv.weight,0,1e-06,0.0001,[1769472],9.03E-05,5.90E-02,"-0.0524, 0.0384, -0.0355, 0.0186, -0.0357, -0.0100, -0.0494, -0.0442, 0.0405, 0.0040"
11
+ _fsdp_wrapped_module.blocks.1.attns.linear_attention_with_dynamic_dwc_on_v.proj.weight,0,1e-06,0.0001,[74080],-2.04E-04,5.89E-02,"-0.0249, 0.0035, 0.0109, 0.0591, -0.0072, -0.0096, -0.0327, -0.0125, -0.0278, -0.0273"
12
+ _fsdp_wrapped_module.blocks.1.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv1.weight,0,1e-06,0.0001,[0],NAN,NAN,
13
+ _fsdp_wrapped_module.blocks.1.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.weight,0,1e-06,0.0001,[0],NAN,NAN,
14
+ _fsdp_wrapped_module.blocks.2.attns.full_attention.qkv.weight,0,1e-06,0.0001,[0],NAN,NAN,
15
+ _fsdp_wrapped_module.blocks.2.attns.full_attention.proj.weight,0,1e-06,0.0001,[0],NAN,NAN,
16
+ _fsdp_wrapped_module.blocks.2.attns.linear_attention_with_dynamic_dwc_on_v.qkv.weight,0,1e-06,0.0001,[0],NAN,NAN,
17
+ _fsdp_wrapped_module.blocks.2.attns.linear_attention_with_dynamic_dwc_on_v.proj.weight,0,1e-06,0.0001,[0],NAN,NAN,
18
+ _fsdp_wrapped_module.blocks.2.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv1.weight,0,1e-06,0.0001,[0],NAN,NAN,
19
+ _fsdp_wrapped_module.blocks.2.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.weight,0,1e-06,0.0001,[0],NAN,NAN,
20
+ _fsdp_wrapped_module.blocks.3.attns.full_attention.qkv.weight,0,1e-06,0.0001,[0],NAN,NAN,
21
+ _fsdp_wrapped_module.blocks.3.attns.full_attention.proj.weight,0,1e-06,0.0001,[0],NAN,NAN,
22
+ _fsdp_wrapped_module.blocks.3.attns.linear_attention_with_dynamic_dwc_on_v.qkv.weight,0,1e-06,0.0001,[0],NAN,NAN,
23
+ _fsdp_wrapped_module.blocks.3.attns.linear_attention_with_dynamic_dwc_on_v.proj.weight,0,1e-06,0.0001,[0],NAN,NAN,
24
+ _fsdp_wrapped_module.blocks.3.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv1.weight,0,1e-06,0.0001,[0],NAN,NAN,
25
+ _fsdp_wrapped_module.blocks.3.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.weight,0,1e-06,0.0001,[0],NAN,NAN,
26
+ _fsdp_wrapped_module.blocks.4.attns.full_attention.qkv.weight,0,1e-06,0.0001,[0],NAN,NAN,
27
+ _fsdp_wrapped_module.blocks.4.attns.full_attention.proj.weight,0,1e-06,0.0001,[0],NAN,NAN,
28
+ _fsdp_wrapped_module.blocks.4.attns.linear_attention_with_dynamic_dwc_on_v.qkv.weight,0,1e-06,0.0001,[0],NAN,NAN,
29
+ _fsdp_wrapped_module.blocks.4.attns.linear_attention_with_dynamic_dwc_on_v.proj.weight,0,1e-06,0.0001,[0],NAN,NAN,
30
+ _fsdp_wrapped_module.blocks.4.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv1.weight,0,1e-06,0.0001,[0],NAN,NAN,
31
+ _fsdp_wrapped_module.blocks.4.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.weight,0,1e-06,0.0001,[0],NAN,NAN,
32
+ _fsdp_wrapped_module.blocks.5.attns.full_attention.qkv.weight,0,1e-06,0.0001,[0],NAN,NAN,
33
+ _fsdp_wrapped_module.blocks.5.attns.full_attention.proj.weight,0,1e-06,0.0001,[0],NAN,NAN,
34
+ _fsdp_wrapped_module.blocks.5.attns.linear_attention_with_dynamic_dwc_on_v.qkv.weight,0,1e-06,0.0001,[0],NAN,NAN,
35
+ _fsdp_wrapped_module.blocks.5.attns.linear_attention_with_dynamic_dwc_on_v.proj.weight,0,1e-06,0.0001,[0],NAN,NAN,
36
+ _fsdp_wrapped_module.blocks.5.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv1.weight,0,1e-06,0.0001,[0],NAN,NAN,
37
+ _fsdp_wrapped_module.blocks.5.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.weight,0,1e-06,0.0001,[0],NAN,NAN,
38
+ _fsdp_wrapped_module.blocks.6.attns.full_attention.qkv.weight,0,1e-06,0.0001,[0],NAN,NAN,
39
+ _fsdp_wrapped_module.blocks.6.attns.full_attention.proj.weight,0,1e-06,0.0001,[0],NAN,NAN,
40
+ _fsdp_wrapped_module.blocks.6.attns.linear_attention_with_dynamic_dwc_on_v.qkv.weight,0,1e-06,0.0001,[0],NAN,NAN,
41
+ _fsdp_wrapped_module.blocks.6.attns.linear_attention_with_dynamic_dwc_on_v.proj.weight,0,1e-06,0.0001,[0],NAN,NAN,
42
+ _fsdp_wrapped_module.blocks.6.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv1.weight,0,1e-06,0.0001,[0],NAN,NAN,
43
+ _fsdp_wrapped_module.blocks.6.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.weight,0,1e-06,0.0001,[0],NAN,NAN,
44
+ _fsdp_wrapped_module.blocks.7.attns.full_attention.qkv.weight,0,1e-06,0.0001,[0],NAN,NAN,
45
+ _fsdp_wrapped_module.blocks.7.attns.full_attention.proj.weight,0,1e-06,0.0001,[0],NAN,NAN,
46
+ _fsdp_wrapped_module.blocks.7.attns.linear_attention_with_dynamic_dwc_on_v.qkv.weight,0,1e-06,0.0001,[0],NAN,NAN,
47
+ _fsdp_wrapped_module.blocks.7.attns.linear_attention_with_dynamic_dwc_on_v.proj.weight,0,1e-06,0.0001,[0],NAN,NAN,
48
+ _fsdp_wrapped_module.blocks.7.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv1.weight,0,1e-06,0.0001,[0],NAN,NAN,
49
+ _fsdp_wrapped_module.blocks.7.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.weight,0,1e-06,0.0001,[0],NAN,NAN,
50
+ _fsdp_wrapped_module.blocks.8.attns.full_attention.qkv.weight,0,1e-06,0.0001,[0],NAN,NAN,
51
+ _fsdp_wrapped_module.blocks.8.attns.full_attention.proj.weight,0,1e-06,0.0001,[0],NAN,NAN,
52
+ _fsdp_wrapped_module.blocks.8.attns.linear_attention_with_dynamic_dwc_on_v.qkv.weight,0,1e-06,0.0001,[0],NAN,NAN,
53
+ _fsdp_wrapped_module.blocks.8.attns.linear_attention_with_dynamic_dwc_on_v.proj.weight,0,1e-06,0.0001,[0],NAN,NAN,
54
+ _fsdp_wrapped_module.blocks.8.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv1.weight,0,1e-06,0.0001,[0],NAN,NAN,
55
+ _fsdp_wrapped_module.blocks.8.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.weight,0,1e-06,0.0001,[0],NAN,NAN,
56
+ _fsdp_wrapped_module.blocks.9.attns.full_attention.qkv.weight,0,1e-06,0.0001,[0],NAN,NAN,
57
+ _fsdp_wrapped_module.blocks.9.attns.full_attention.proj.weight,0,1e-06,0.0001,[0],NAN,NAN,
58
+ _fsdp_wrapped_module.blocks.9.attns.linear_attention_with_dynamic_dwc_on_v.qkv.weight,0,1e-06,0.0001,[0],NAN,NAN,
59
+ _fsdp_wrapped_module.blocks.9.attns.linear_attention_with_dynamic_dwc_on_v.proj.weight,0,1e-06,0.0001,[0],NAN,NAN,
60
+ _fsdp_wrapped_module.blocks.9.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv1.weight,0,1e-06,0.0001,[0],NAN,NAN,
61
+ _fsdp_wrapped_module.blocks.9.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.weight,0,1e-06,0.0001,[0],NAN,NAN,
62
+ _fsdp_wrapped_module.blocks.10.attns.full_attention.qkv.weight,0,1e-06,0.0001,[0],NAN,NAN,
63
+ _fsdp_wrapped_module.blocks.10.attns.full_attention.proj.weight,0,1e-06,0.0001,[0],NAN,NAN,
64
+ _fsdp_wrapped_module.blocks.10.attns.linear_attention_with_dynamic_dwc_on_v.qkv.weight,0,1e-06,0.0001,[0],NAN,NAN,
65
+ _fsdp_wrapped_module.blocks.10.attns.linear_attention_with_dynamic_dwc_on_v.proj.weight,0,1e-06,0.0001,[0],NAN,NAN,
66
+ _fsdp_wrapped_module.blocks.10.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv1.weight,0,1e-06,0.0001,[0],NAN,NAN,
67
+ _fsdp_wrapped_module.blocks.10.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.weight,0,1e-06,0.0001,[0],NAN,NAN,
68
+ _fsdp_wrapped_module.blocks.11.attns.full_attention.qkv.weight,0,1e-06,0.0001,[0],NAN,NAN,
69
+ _fsdp_wrapped_module.blocks.11.attns.full_attention.proj.weight,0,1e-06,0.0001,[0],NAN,NAN,
70
+ _fsdp_wrapped_module.blocks.11.attns.linear_attention_with_dynamic_dwc_on_v.qkv.weight,0,1e-06,0.0001,[0],NAN,NAN,
71
+ _fsdp_wrapped_module.blocks.11.attns.linear_attention_with_dynamic_dwc_on_v.proj.weight,0,1e-06,0.0001,[0],NAN,NAN,
72
+ _fsdp_wrapped_module.blocks.11.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv1.weight,0,1e-06,0.0001,[0],NAN,NAN,
73
+ _fsdp_wrapped_module.blocks.11.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.weight,0,1e-06,0.0001,[0],NAN,NAN,
74
+ _fsdp_wrapped_module.blocks.0.attns.full_attention.qkv.bias,0,1e-06,0.0,[2304],0.00E+00,0.00E+00,"0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000"
75
+ _fsdp_wrapped_module.blocks.0.attns.full_attention.proj.bias,0,1e-06,0.0,[768],2.63E-02,6.70E-01,"-0.0497, 0.0112, -0.0262, 0.0088, -0.1807, 0.0368, 0.0250, 0.0253, -0.0265, -0.0110"
76
+ _fsdp_wrapped_module.blocks.0.attns.linear_attention_with_dynamic_dwc_on_v.qkv.bias,0,1e-06,0.0,[2304],0.00E+00,0.00E+00,"0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000"
77
+ _fsdp_wrapped_module.blocks.0.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.weight,0,1e-06,0.0,[64],1.00E+00,0.00E+00,"1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000"
78
+ _fsdp_wrapped_module.blocks.0.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.bias,0,1e-06,0.0,[64],0.00E+00,0.00E+00,"0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000"
79
+ _fsdp_wrapped_module.blocks.0.attns.linear_attention_with_dynamic_dwc_on_v.proj.bias,0,1e-06,0.0,[768],2.63E-02,6.70E-01,"-0.0497, 0.0112, -0.0262, 0.0088, -0.1807, 0.0368, 0.0250, 0.0253, -0.0265, -0.0110"
80
+ _fsdp_wrapped_module.blocks.0.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.bias,0,1e-06,0.0,[576],0.00E+00,0.00E+00,"0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000"
81
+ _fsdp_wrapped_module.blocks.1.attns.full_attention.qkv.bias,0,1e-06,0.0,[2304],0.00E+00,0.00E+00,"0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000"
82
+ _fsdp_wrapped_module.blocks.1.attns.full_attention.proj.bias,0,1e-06,0.0,[768],-9.08E-03,3.96E-01,"-0.0112, 0.0545, -0.0729, 0.4950, 0.0816, 0.0533, 0.0715, -0.3199, -0.0840, -0.1038"
83
+ _fsdp_wrapped_module.blocks.1.attns.linear_attention_with_dynamic_dwc_on_v.qkv.bias,0,1e-06,0.0,[2304],0.00E+00,0.00E+00,"0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000"
84
+ _fsdp_wrapped_module.blocks.1.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.weight,0,1e-06,0.0,[64],1.00E+00,0.00E+00,"1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000"
85
+ _fsdp_wrapped_module.blocks.1.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.bias,0,1e-06,0.0,[64],0.00E+00,0.00E+00,"0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000"
86
+ _fsdp_wrapped_module.blocks.1.attns.linear_attention_with_dynamic_dwc_on_v.proj.bias,0,1e-06,0.0,[0],NAN,NAN,
87
+ _fsdp_wrapped_module.blocks.1.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.bias,0,1e-06,0.0,[0],NAN,NAN,
88
+ _fsdp_wrapped_module.blocks.2.attns.full_attention.qkv.bias,0,1e-06,0.0,[0],NAN,NAN,
89
+ _fsdp_wrapped_module.blocks.2.attns.full_attention.proj.bias,0,1e-06,0.0,[0],NAN,NAN,
90
+ _fsdp_wrapped_module.blocks.2.attns.linear_attention_with_dynamic_dwc_on_v.qkv.bias,0,1e-06,0.0,[0],NAN,NAN,
91
+ _fsdp_wrapped_module.blocks.2.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.weight,0,1e-06,0.0,[0],NAN,NAN,
92
+ _fsdp_wrapped_module.blocks.2.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.bias,0,1e-06,0.0,[0],NAN,NAN,
93
+ _fsdp_wrapped_module.blocks.2.attns.linear_attention_with_dynamic_dwc_on_v.proj.bias,0,1e-06,0.0,[0],NAN,NAN,
94
+ _fsdp_wrapped_module.blocks.2.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.bias,0,1e-06,0.0,[0],NAN,NAN,
95
+ _fsdp_wrapped_module.blocks.3.attns.full_attention.qkv.bias,0,1e-06,0.0,[0],NAN,NAN,
96
+ _fsdp_wrapped_module.blocks.3.attns.full_attention.proj.bias,0,1e-06,0.0,[0],NAN,NAN,
97
+ _fsdp_wrapped_module.blocks.3.attns.linear_attention_with_dynamic_dwc_on_v.qkv.bias,0,1e-06,0.0,[0],NAN,NAN,
98
+ _fsdp_wrapped_module.blocks.3.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.weight,0,1e-06,0.0,[0],NAN,NAN,
99
+ _fsdp_wrapped_module.blocks.3.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.bias,0,1e-06,0.0,[0],NAN,NAN,
100
+ _fsdp_wrapped_module.blocks.3.attns.linear_attention_with_dynamic_dwc_on_v.proj.bias,0,1e-06,0.0,[0],NAN,NAN,
101
+ _fsdp_wrapped_module.blocks.3.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.bias,0,1e-06,0.0,[0],NAN,NAN,
102
+ _fsdp_wrapped_module.blocks.4.attns.full_attention.qkv.bias,0,1e-06,0.0,[0],NAN,NAN,
103
+ _fsdp_wrapped_module.blocks.4.attns.full_attention.proj.bias,0,1e-06,0.0,[0],NAN,NAN,
104
+ _fsdp_wrapped_module.blocks.4.attns.linear_attention_with_dynamic_dwc_on_v.qkv.bias,0,1e-06,0.0,[0],NAN,NAN,
105
+ _fsdp_wrapped_module.blocks.4.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.weight,0,1e-06,0.0,[0],NAN,NAN,
106
+ _fsdp_wrapped_module.blocks.4.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.bias,0,1e-06,0.0,[0],NAN,NAN,
107
+ _fsdp_wrapped_module.blocks.4.attns.linear_attention_with_dynamic_dwc_on_v.proj.bias,0,1e-06,0.0,[0],NAN,NAN,
108
+ _fsdp_wrapped_module.blocks.4.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.bias,0,1e-06,0.0,[0],NAN,NAN,
109
+ _fsdp_wrapped_module.blocks.5.attns.full_attention.qkv.bias,0,1e-06,0.0,[0],NAN,NAN,
110
+ _fsdp_wrapped_module.blocks.5.attns.full_attention.proj.bias,0,1e-06,0.0,[0],NAN,NAN,
111
+ _fsdp_wrapped_module.blocks.5.attns.linear_attention_with_dynamic_dwc_on_v.qkv.bias,0,1e-06,0.0,[0],NAN,NAN,
112
+ _fsdp_wrapped_module.blocks.5.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.weight,0,1e-06,0.0,[0],NAN,NAN,
113
+ _fsdp_wrapped_module.blocks.5.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.bias,0,1e-06,0.0,[0],NAN,NAN,
114
+ _fsdp_wrapped_module.blocks.5.attns.linear_attention_with_dynamic_dwc_on_v.proj.bias,0,1e-06,0.0,[0],NAN,NAN,
115
+ _fsdp_wrapped_module.blocks.5.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.bias,0,1e-06,0.0,[0],NAN,NAN,
116
+ _fsdp_wrapped_module.blocks.6.attns.full_attention.qkv.bias,0,1e-06,0.0,[0],NAN,NAN,
117
+ _fsdp_wrapped_module.blocks.6.attns.full_attention.proj.bias,0,1e-06,0.0,[0],NAN,NAN,
118
+ _fsdp_wrapped_module.blocks.6.attns.linear_attention_with_dynamic_dwc_on_v.qkv.bias,0,1e-06,0.0,[0],NAN,NAN,
119
+ _fsdp_wrapped_module.blocks.6.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.weight,0,1e-06,0.0,[0],NAN,NAN,
120
+ _fsdp_wrapped_module.blocks.6.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.bias,0,1e-06,0.0,[0],NAN,NAN,
121
+ _fsdp_wrapped_module.blocks.6.attns.linear_attention_with_dynamic_dwc_on_v.proj.bias,0,1e-06,0.0,[0],NAN,NAN,
122
+ _fsdp_wrapped_module.blocks.6.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.bias,0,1e-06,0.0,[0],NAN,NAN,
123
+ _fsdp_wrapped_module.blocks.7.attns.full_attention.qkv.bias,0,1e-06,0.0,[0],NAN,NAN,
124
+ _fsdp_wrapped_module.blocks.7.attns.full_attention.proj.bias,0,1e-06,0.0,[0],NAN,NAN,
125
+ _fsdp_wrapped_module.blocks.7.attns.linear_attention_with_dynamic_dwc_on_v.qkv.bias,0,1e-06,0.0,[0],NAN,NAN,
126
+ _fsdp_wrapped_module.blocks.7.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.weight,0,1e-06,0.0,[0],NAN,NAN,
127
+ _fsdp_wrapped_module.blocks.7.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.bias,0,1e-06,0.0,[0],NAN,NAN,
128
+ _fsdp_wrapped_module.blocks.7.attns.linear_attention_with_dynamic_dwc_on_v.proj.bias,0,1e-06,0.0,[0],NAN,NAN,
129
+ _fsdp_wrapped_module.blocks.7.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.bias,0,1e-06,0.0,[0],NAN,NAN,
130
+ _fsdp_wrapped_module.blocks.8.attns.full_attention.qkv.bias,0,1e-06,0.0,[0],NAN,NAN,
131
+ _fsdp_wrapped_module.blocks.8.attns.full_attention.proj.bias,0,1e-06,0.0,[0],NAN,NAN,
132
+ _fsdp_wrapped_module.blocks.8.attns.linear_attention_with_dynamic_dwc_on_v.qkv.bias,0,1e-06,0.0,[0],NAN,NAN,
133
+ _fsdp_wrapped_module.blocks.8.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.weight,0,1e-06,0.0,[0],NAN,NAN,
134
+ _fsdp_wrapped_module.blocks.8.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.bias,0,1e-06,0.0,[0],NAN,NAN,
135
+ _fsdp_wrapped_module.blocks.8.attns.linear_attention_with_dynamic_dwc_on_v.proj.bias,0,1e-06,0.0,[0],NAN,NAN,
136
+ _fsdp_wrapped_module.blocks.8.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.bias,0,1e-06,0.0,[0],NAN,NAN,
137
+ _fsdp_wrapped_module.blocks.9.attns.full_attention.qkv.bias,0,1e-06,0.0,[0],NAN,NAN,
138
+ _fsdp_wrapped_module.blocks.9.attns.full_attention.proj.bias,0,1e-06,0.0,[0],NAN,NAN,
139
+ _fsdp_wrapped_module.blocks.9.attns.linear_attention_with_dynamic_dwc_on_v.qkv.bias,0,1e-06,0.0,[0],NAN,NAN,
140
+ _fsdp_wrapped_module.blocks.9.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.weight,0,1e-06,0.0,[0],NAN,NAN,
141
+ _fsdp_wrapped_module.blocks.9.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.bias,0,1e-06,0.0,[0],NAN,NAN,
142
+ _fsdp_wrapped_module.blocks.9.attns.linear_attention_with_dynamic_dwc_on_v.proj.bias,0,1e-06,0.0,[0],NAN,NAN,
143
+ _fsdp_wrapped_module.blocks.9.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.bias,0,1e-06,0.0,[0],NAN,NAN,
144
+ _fsdp_wrapped_module.blocks.10.attns.full_attention.qkv.bias,0,1e-06,0.0,[0],NAN,NAN,
145
+ _fsdp_wrapped_module.blocks.10.attns.full_attention.proj.bias,0,1e-06,0.0,[0],NAN,NAN,
146
+ _fsdp_wrapped_module.blocks.10.attns.linear_attention_with_dynamic_dwc_on_v.qkv.bias,0,1e-06,0.0,[0],NAN,NAN,
147
+ _fsdp_wrapped_module.blocks.10.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.weight,0,1e-06,0.0,[0],NAN,NAN,
148
+ _fsdp_wrapped_module.blocks.10.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.bias,0,1e-06,0.0,[0],NAN,NAN,
149
+ _fsdp_wrapped_module.blocks.10.attns.linear_attention_with_dynamic_dwc_on_v.proj.bias,0,1e-06,0.0,[0],NAN,NAN,
150
+ _fsdp_wrapped_module.blocks.10.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.bias,0,1e-06,0.0,[0],NAN,NAN,
151
+ _fsdp_wrapped_module.blocks.11.attns.full_attention.qkv.bias,0,1e-06,0.0,[0],NAN,NAN,
152
+ _fsdp_wrapped_module.blocks.11.attns.full_attention.proj.bias,0,1e-06,0.0,[0],NAN,NAN,
153
+ _fsdp_wrapped_module.blocks.11.attns.linear_attention_with_dynamic_dwc_on_v.qkv.bias,0,1e-06,0.0,[0],NAN,NAN,
154
+ _fsdp_wrapped_module.blocks.11.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.weight,0,1e-06,0.0,[0],NAN,NAN,
155
+ _fsdp_wrapped_module.blocks.11.attns.linear_attention_with_dynamic_dwc_on_v.o_norm.bias,0,1e-06,0.0,[0],NAN,NAN,
156
+ _fsdp_wrapped_module.blocks.11.attns.linear_attention_with_dynamic_dwc_on_v.proj.bias,0,1e-06,0.0,[0],NAN,NAN,
157
+ _fsdp_wrapped_module.blocks.11.attns.linear_attention_with_dynamic_dwc_on_v.dwc.kernel_generator.conv2.bias,0,1e-06,0.0,[0],NAN,NAN,