yuccaaa commited on
Commit
d3931d8
·
verified ·
1 Parent(s): c72f6bb

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. BIO/ablation/metal_ion_binding_test.jsonl +718 -0
  2. ProtT3/all_checkpoints/stage1_06221723/wandb/run-20250622_192645-zmuhvn72/files/wandb-summary.json +1 -0
  3. ProtT3/all_checkpoints/stage1_06221723/wandb/run-20250622_192645-zmuhvn72/logs/debug-internal.log +15 -0
  4. ProtT3/all_checkpoints/stage1_06221723/wandb/run-20250622_192645-zmuhvn72/logs/debug.log +24 -0
  5. ProtT3/all_checkpoints/stage1_06261435/wandb/debug-internal.log +15 -0
  6. ProtT3/all_checkpoints/stage1_06261435/wandb/debug.log +23 -0
  7. ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_143705-0goy7nyw/logs/debug-internal.log +3 -0
  8. ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_143705-0goy7nyw/logs/debug.log +94 -0
  9. ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/files/output.log +19 -0
  10. ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/files/requirements.txt +225 -0
  11. ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/files/wandb-metadata.json +98 -0
  12. ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/files/wandb-summary.json +1 -0
  13. ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/logs/debug-internal.log +12 -0
  14. ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/logs/debug.log +24 -0
  15. ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/run-coknhy79.wandb +0 -0
  16. ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/files/output.log +28 -0
  17. ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/files/requirements.txt +225 -0
  18. ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/files/wandb-metadata.json +98 -0
  19. ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/logs/debug-internal.log +15 -0
  20. ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/logs/debug.log +23 -0
  21. ProtT3/all_checkpoints/stage1_06262112/wandb/debug-internal.log +73 -0
  22. ProtT3/all_checkpoints/stage1_06262112/wandb/debug.log +23 -0
  23. ProtT3/all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/files/output.log +28 -0
  24. ProtT3/all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/files/requirements.txt +225 -0
  25. ProtT3/all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/files/wandb-metadata.json +98 -0
  26. ProtT3/all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/logs/debug-internal.log +73 -0
  27. ProtT3/all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/logs/debug.log +23 -0
  28. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/debug-internal.log +17 -0
  29. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/debug.log +24 -0
  30. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/files/output.log +16 -0
  31. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/files/requirements.txt +225 -0
  32. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/files/wandb-metadata.json +103 -0
  33. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/logs/debug-internal.log +7 -0
  34. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/logs/debug.log +23 -0
  35. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/run-2bo0nfvt.wandb +0 -0
  36. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/files/output.log +4 -0
  37. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/files/requirements.txt +225 -0
  38. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/files/wandb-metadata.json +103 -0
  39. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/logs/debug-internal.log +7 -0
  40. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/logs/debug.log +22 -0
  41. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/run-ftp1v3gy.wandb +0 -0
  42. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/files/output.log +4 -0
  43. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/files/requirements.txt +225 -0
  44. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/files/wandb-metadata.json +103 -0
  45. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/logs/debug-internal.log +7 -0
  46. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/logs/debug.log +22 -0
  47. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/run-p1815hm9.wandb +0 -0
  48. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_190145-vu5mgolt/files/output.log +4 -0
  49. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_190145-vu5mgolt/files/requirements.txt +225 -0
  50. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_190145-vu5mgolt/run-vu5mgolt.wandb +0 -0
BIO/ablation/metal_ion_binding_test.jsonl ADDED
@@ -0,0 +1,718 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"reference_answer": "0", "generated_answer": "1"}
2
+ {"reference_answer": "0", "generated_answer": "1"}
3
+ {"reference_answer": "0", "generated_answer": "1"}
4
+ {"reference_answer": "1", "generated_answer": "1"}
5
+ {"reference_answer": "1", "generated_answer": "1"}
6
+ {"reference_answer": "0", "generated_answer": "1"}
7
+ {"reference_answer": "0", "generated_answer": "1"}
8
+ {"reference_answer": "0", "generated_answer": "1"}
9
+ {"reference_answer": "0", "generated_answer": "1"}
10
+ {"reference_answer": "0", "generated_answer": "1"}
11
+ {"reference_answer": "1", "generated_answer": "1"}
12
+ {"reference_answer": "0", "generated_answer": "1"}
13
+ {"reference_answer": "0", "generated_answer": "1"}
14
+ {"reference_answer": "0", "generated_answer": "1"}
15
+ {"reference_answer": "1", "generated_answer": "1"}
16
+ {"reference_answer": "0", "generated_answer": "1"}
17
+ {"reference_answer": "0", "generated_answer": "1"}
18
+ {"reference_answer": "1", "generated_answer": "1"}
19
+ {"reference_answer": "0", "generated_answer": "1"}
20
+ {"reference_answer": "1", "generated_answer": "1"}
21
+ {"reference_answer": "1", "generated_answer": "1"}
22
+ {"reference_answer": "1", "generated_answer": "1"}
23
+ {"reference_answer": "0", "generated_answer": "1"}
24
+ {"reference_answer": "0", "generated_answer": "1"}
25
+ {"reference_answer": "0", "generated_answer": "1"}
26
+ {"reference_answer": "1", "generated_answer": "1"}
27
+ {"reference_answer": "0", "generated_answer": "1"}
28
+ {"reference_answer": "0", "generated_answer": "1"}
29
+ {"reference_answer": "1", "generated_answer": "1"}
30
+ {"reference_answer": "1", "generated_answer": "1"}
31
+ {"reference_answer": "0", "generated_answer": "1"}
32
+ {"reference_answer": "1", "generated_answer": "1"}
33
+ {"reference_answer": "0", "generated_answer": "1"}
34
+ {"reference_answer": "0", "generated_answer": "1"}
35
+ {"reference_answer": "0", "generated_answer": "1"}
36
+ {"reference_answer": "0", "generated_answer": "1"}
37
+ {"reference_answer": "0", "generated_answer": "1"}
38
+ {"reference_answer": "0", "generated_answer": "1"}
39
+ {"reference_answer": "0", "generated_answer": "1"}
40
+ {"reference_answer": "0", "generated_answer": "1"}
41
+ {"reference_answer": "0", "generated_answer": "1"}
42
+ {"reference_answer": "0", "generated_answer": "1"}
43
+ {"reference_answer": "1", "generated_answer": "1"}
44
+ {"reference_answer": "0", "generated_answer": "1"}
45
+ {"reference_answer": "0", "generated_answer": "1"}
46
+ {"reference_answer": "0", "generated_answer": "1"}
47
+ {"reference_answer": "0", "generated_answer": "1"}
48
+ {"reference_answer": "1", "generated_answer": "1"}
49
+ {"reference_answer": "0", "generated_answer": "1"}
50
+ {"reference_answer": "0", "generated_answer": "1"}
51
+ {"reference_answer": "0", "generated_answer": "1"}
52
+ {"reference_answer": "1", "generated_answer": "1"}
53
+ {"reference_answer": "0", "generated_answer": "1"}
54
+ {"reference_answer": "0", "generated_answer": "1"}
55
+ {"reference_answer": "0", "generated_answer": "1"}
56
+ {"reference_answer": "0", "generated_answer": "1"}
57
+ {"reference_answer": "1", "generated_answer": "1"}
58
+ {"reference_answer": "0", "generated_answer": "1"}
59
+ {"reference_answer": "0", "generated_answer": "1"}
60
+ {"reference_answer": "0", "generated_answer": "1"}
61
+ {"reference_answer": "0", "generated_answer": "1"}
62
+ {"reference_answer": "1", "generated_answer": "1"}
63
+ {"reference_answer": "0", "generated_answer": "1"}
64
+ {"reference_answer": "0", "generated_answer": "1"}
65
+ {"reference_answer": "1", "generated_answer": "1"}
66
+ {"reference_answer": "1", "generated_answer": "1"}
67
+ {"reference_answer": "0", "generated_answer": "1"}
68
+ {"reference_answer": "0", "generated_answer": "1"}
69
+ {"reference_answer": "0", "generated_answer": "1"}
70
+ {"reference_answer": "1", "generated_answer": "1"}
71
+ {"reference_answer": "0", "generated_answer": "1"}
72
+ {"reference_answer": "0", "generated_answer": "1"}
73
+ {"reference_answer": "0", "generated_answer": "1"}
74
+ {"reference_answer": "0", "generated_answer": "1"}
75
+ {"reference_answer": "0", "generated_answer": "1"}
76
+ {"reference_answer": "0", "generated_answer": "1"}
77
+ {"reference_answer": "0", "generated_answer": "1"}
78
+ {"reference_answer": "0", "generated_answer": "1"}
79
+ {"reference_answer": "1", "generated_answer": "1"}
80
+ {"reference_answer": "0", "generated_answer": "1"}
81
+ {"reference_answer": "0", "generated_answer": "1"}
82
+ {"reference_answer": "1", "generated_answer": "1"}
83
+ {"reference_answer": "1", "generated_answer": "1"}
84
+ {"reference_answer": "0", "generated_answer": "1"}
85
+ {"reference_answer": "0", "generated_answer": "1"}
86
+ {"reference_answer": "0", "generated_answer": "1"}
87
+ {"reference_answer": "1", "generated_answer": "1"}
88
+ {"reference_answer": "0", "generated_answer": "1"}
89
+ {"reference_answer": "0", "generated_answer": "1"}
90
+ {"reference_answer": "0", "generated_answer": "1"}
91
+ {"reference_answer": "0", "generated_answer": "1"}
92
+ {"reference_answer": "0", "generated_answer": "1"}
93
+ {"reference_answer": "0", "generated_answer": "1"}
94
+ {"reference_answer": "0", "generated_answer": "1"}
95
+ {"reference_answer": "0", "generated_answer": "1"}
96
+ {"reference_answer": "1", "generated_answer": "1"}
97
+ {"reference_answer": "0", "generated_answer": "1"}
98
+ {"reference_answer": "0", "generated_answer": "1"}
99
+ {"reference_answer": "0", "generated_answer": "1"}
100
+ {"reference_answer": "1", "generated_answer": "1"}
101
+ {"reference_answer": "1", "generated_answer": "1"}
102
+ {"reference_answer": "1", "generated_answer": "1"}
103
+ {"reference_answer": "1", "generated_answer": "1"}
104
+ {"reference_answer": "0", "generated_answer": "1"}
105
+ {"reference_answer": "0", "generated_answer": "1"}
106
+ {"reference_answer": "0", "generated_answer": "1"}
107
+ {"reference_answer": "0", "generated_answer": "1"}
108
+ {"reference_answer": "0", "generated_answer": "1"}
109
+ {"reference_answer": "1", "generated_answer": "1"}
110
+ {"reference_answer": "1", "generated_answer": "1"}
111
+ {"reference_answer": "0", "generated_answer": "1"}
112
+ {"reference_answer": "0", "generated_answer": "1"}
113
+ {"reference_answer": "1", "generated_answer": "1"}
114
+ {"reference_answer": "1", "generated_answer": "1"}
115
+ {"reference_answer": "0", "generated_answer": "1"}
116
+ {"reference_answer": "0", "generated_answer": "1"}
117
+ {"reference_answer": "1", "generated_answer": "1"}
118
+ {"reference_answer": "0", "generated_answer": "1"}
119
+ {"reference_answer": "1", "generated_answer": "1"}
120
+ {"reference_answer": "0", "generated_answer": "1"}
121
+ {"reference_answer": "0", "generated_answer": "1"}
122
+ {"reference_answer": "1", "generated_answer": "1"}
123
+ {"reference_answer": "0", "generated_answer": "1"}
124
+ {"reference_answer": "0", "generated_answer": "1"}
125
+ {"reference_answer": "0", "generated_answer": "1"}
126
+ {"reference_answer": "0", "generated_answer": "1"}
127
+ {"reference_answer": "1", "generated_answer": "1"}
128
+ {"reference_answer": "0", "generated_answer": "1"}
129
+ {"reference_answer": "0", "generated_answer": "1"}
130
+ {"reference_answer": "1", "generated_answer": "1"}
131
+ {"reference_answer": "0", "generated_answer": "1"}
132
+ {"reference_answer": "1", "generated_answer": "1"}
133
+ {"reference_answer": "1", "generated_answer": "1"}
134
+ {"reference_answer": "0", "generated_answer": "1"}
135
+ {"reference_answer": "1", "generated_answer": "1"}
136
+ {"reference_answer": "1", "generated_answer": "1"}
137
+ {"reference_answer": "1", "generated_answer": "1"}
138
+ {"reference_answer": "1", "generated_answer": "1"}
139
+ {"reference_answer": "0", "generated_answer": "1"}
140
+ {"reference_answer": "0", "generated_answer": "1"}
141
+ {"reference_answer": "1", "generated_answer": "1"}
142
+ {"reference_answer": "0", "generated_answer": "1"}
143
+ {"reference_answer": "0", "generated_answer": "1"}
144
+ {"reference_answer": "0", "generated_answer": "1"}
145
+ {"reference_answer": "0", "generated_answer": "1"}
146
+ {"reference_answer": "0", "generated_answer": "1"}
147
+ {"reference_answer": "1", "generated_answer": "1"}
148
+ {"reference_answer": "0", "generated_answer": "1"}
149
+ {"reference_answer": "0", "generated_answer": "1"}
150
+ {"reference_answer": "0", "generated_answer": "1"}
151
+ {"reference_answer": "0", "generated_answer": "1"}
152
+ {"reference_answer": "1", "generated_answer": "1"}
153
+ {"reference_answer": "0", "generated_answer": "1"}
154
+ {"reference_answer": "0", "generated_answer": "1"}
155
+ {"reference_answer": "1", "generated_answer": "1"}
156
+ {"reference_answer": "0", "generated_answer": "1"}
157
+ {"reference_answer": "1", "generated_answer": "1"}
158
+ {"reference_answer": "0", "generated_answer": "1"}
159
+ {"reference_answer": "1", "generated_answer": "1"}
160
+ {"reference_answer": "0", "generated_answer": "1"}
161
+ {"reference_answer": "1", "generated_answer": "1"}
162
+ {"reference_answer": "1", "generated_answer": "1"}
163
+ {"reference_answer": "0", "generated_answer": "1"}
164
+ {"reference_answer": "1", "generated_answer": "1"}
165
+ {"reference_answer": "0", "generated_answer": "1"}
166
+ {"reference_answer": "0", "generated_answer": "1"}
167
+ {"reference_answer": "1", "generated_answer": "1"}
168
+ {"reference_answer": "0", "generated_answer": "1"}
169
+ {"reference_answer": "1", "generated_answer": "1"}
170
+ {"reference_answer": "1", "generated_answer": "1"}
171
+ {"reference_answer": "0", "generated_answer": "1"}
172
+ {"reference_answer": "1", "generated_answer": "1"}
173
+ {"reference_answer": "0", "generated_answer": "1"}
174
+ {"reference_answer": "0", "generated_answer": "1"}
175
+ {"reference_answer": "0", "generated_answer": "1"}
176
+ {"reference_answer": "1", "generated_answer": "1"}
177
+ {"reference_answer": "0", "generated_answer": "1"}
178
+ {"reference_answer": "0", "generated_answer": "1"}
179
+ {"reference_answer": "0", "generated_answer": "1"}
180
+ {"reference_answer": "1", "generated_answer": "1"}
181
+ {"reference_answer": "0", "generated_answer": "1"}
182
+ {"reference_answer": "0", "generated_answer": "1"}
183
+ {"reference_answer": "0", "generated_answer": "1"}
184
+ {"reference_answer": "0", "generated_answer": "1"}
185
+ {"reference_answer": "1", "generated_answer": "1"}
186
+ {"reference_answer": "0", "generated_answer": "1"}
187
+ {"reference_answer": "0", "generated_answer": "1"}
188
+ {"reference_answer": "0", "generated_answer": "1"}
189
+ {"reference_answer": "0", "generated_answer": "1"}
190
+ {"reference_answer": "0", "generated_answer": "1"}
191
+ {"reference_answer": "0", "generated_answer": "1"}
192
+ {"reference_answer": "1", "generated_answer": "1"}
193
+ {"reference_answer": "0", "generated_answer": "1"}
194
+ {"reference_answer": "1", "generated_answer": "1"}
195
+ {"reference_answer": "0", "generated_answer": "1"}
196
+ {"reference_answer": "0", "generated_answer": "1"}
197
+ {"reference_answer": "1", "generated_answer": "1"}
198
+ {"reference_answer": "0", "generated_answer": "1"}
199
+ {"reference_answer": "0", "generated_answer": "1"}
200
+ {"reference_answer": "0", "generated_answer": "1"}
201
+ {"reference_answer": "1", "generated_answer": "1"}
202
+ {"reference_answer": "1", "generated_answer": "1"}
203
+ {"reference_answer": "0", "generated_answer": "1"}
204
+ {"reference_answer": "1", "generated_answer": "1"}
205
+ {"reference_answer": "1", "generated_answer": "1"}
206
+ {"reference_answer": "0", "generated_answer": "1"}
207
+ {"reference_answer": "1", "generated_answer": "1"}
208
+ {"reference_answer": "0", "generated_answer": "1"}
209
+ {"reference_answer": "1", "generated_answer": "1"}
210
+ {"reference_answer": "0", "generated_answer": "1"}
211
+ {"reference_answer": "1", "generated_answer": "1"}
212
+ {"reference_answer": "1", "generated_answer": "1"}
213
+ {"reference_answer": "1", "generated_answer": "1"}
214
+ {"reference_answer": "0", "generated_answer": "1"}
215
+ {"reference_answer": "1", "generated_answer": "1"}
216
+ {"reference_answer": "1", "generated_answer": "1"}
217
+ {"reference_answer": "0", "generated_answer": "1"}
218
+ {"reference_answer": "0", "generated_answer": "1"}
219
+ {"reference_answer": "1", "generated_answer": "1"}
220
+ {"reference_answer": "1", "generated_answer": "1"}
221
+ {"reference_answer": "1", "generated_answer": "1"}
222
+ {"reference_answer": "0", "generated_answer": "1"}
223
+ {"reference_answer": "0", "generated_answer": "1"}
224
+ {"reference_answer": "0", "generated_answer": "1"}
225
+ {"reference_answer": "0", "generated_answer": "1"}
226
+ {"reference_answer": "1", "generated_answer": "1"}
227
+ {"reference_answer": "0", "generated_answer": "1"}
228
+ {"reference_answer": "0", "generated_answer": "1"}
229
+ {"reference_answer": "0", "generated_answer": "1"}
230
+ {"reference_answer": "0", "generated_answer": "1"}
231
+ {"reference_answer": "0", "generated_answer": "1"}
232
+ {"reference_answer": "0", "generated_answer": "1"}
233
+ {"reference_answer": "0", "generated_answer": "1"}
234
+ {"reference_answer": "0", "generated_answer": "1"}
235
+ {"reference_answer": "0", "generated_answer": "1"}
236
+ {"reference_answer": "0", "generated_answer": "1"}
237
+ {"reference_answer": "1", "generated_answer": "1"}
238
+ {"reference_answer": "0", "generated_answer": "1"}
239
+ {"reference_answer": "0", "generated_answer": "1"}
240
+ {"reference_answer": "0", "generated_answer": "1"}
241
+ {"reference_answer": "0", "generated_answer": "1"}
242
+ {"reference_answer": "0", "generated_answer": "1"}
243
+ {"reference_answer": "0", "generated_answer": "1"}
244
+ {"reference_answer": "1", "generated_answer": "1"}
245
+ {"reference_answer": "0", "generated_answer": "1"}
246
+ {"reference_answer": "0", "generated_answer": "1"}
247
+ {"reference_answer": "0", "generated_answer": "1"}
248
+ {"reference_answer": "0", "generated_answer": "1"}
249
+ {"reference_answer": "1", "generated_answer": "1"}
250
+ {"reference_answer": "0", "generated_answer": "1"}
251
+ {"reference_answer": "1", "generated_answer": "1"}
252
+ {"reference_answer": "0", "generated_answer": "1"}
253
+ {"reference_answer": "0", "generated_answer": "1"}
254
+ {"reference_answer": "0", "generated_answer": "1"}
255
+ {"reference_answer": "0", "generated_answer": "1"}
256
+ {"reference_answer": "0", "generated_answer": "1"}
257
+ {"reference_answer": "0", "generated_answer": "1"}
258
+ {"reference_answer": "1", "generated_answer": "1"}
259
+ {"reference_answer": "0", "generated_answer": "1"}
260
+ {"reference_answer": "1", "generated_answer": "1"}
261
+ {"reference_answer": "0", "generated_answer": "1"}
262
+ {"reference_answer": "0", "generated_answer": "1"}
263
+ {"reference_answer": "1", "generated_answer": "1"}
264
+ {"reference_answer": "0", "generated_answer": "1"}
265
+ {"reference_answer": "1", "generated_answer": "1"}
266
+ {"reference_answer": "1", "generated_answer": "1"}
267
+ {"reference_answer": "0", "generated_answer": "1"}
268
+ {"reference_answer": "0", "generated_answer": "1"}
269
+ {"reference_answer": "0", "generated_answer": "1"}
270
+ {"reference_answer": "0", "generated_answer": "1"}
271
+ {"reference_answer": "1", "generated_answer": "1"}
272
+ {"reference_answer": "1", "generated_answer": "1"}
273
+ {"reference_answer": "1", "generated_answer": "1"}
274
+ {"reference_answer": "0", "generated_answer": "1"}
275
+ {"reference_answer": "0", "generated_answer": "1"}
276
+ {"reference_answer": "1", "generated_answer": "1"}
277
+ {"reference_answer": "0", "generated_answer": "1"}
278
+ {"reference_answer": "0", "generated_answer": "1"}
279
+ {"reference_answer": "1", "generated_answer": "1"}
280
+ {"reference_answer": "0", "generated_answer": "1"}
281
+ {"reference_answer": "0", "generated_answer": "1"}
282
+ {"reference_answer": "0", "generated_answer": "1"}
283
+ {"reference_answer": "0", "generated_answer": "1"}
284
+ {"reference_answer": "0", "generated_answer": "1"}
285
+ {"reference_answer": "0", "generated_answer": "1"}
286
+ {"reference_answer": "0", "generated_answer": "1"}
287
+ {"reference_answer": "1", "generated_answer": "1"}
288
+ {"reference_answer": "1", "generated_answer": "1"}
289
+ {"reference_answer": "1", "generated_answer": "1"}
290
+ {"reference_answer": "0", "generated_answer": "1"}
291
+ {"reference_answer": "0", "generated_answer": "1"}
292
+ {"reference_answer": "1", "generated_answer": "1"}
293
+ {"reference_answer": "1", "generated_answer": "1"}
294
+ {"reference_answer": "0", "generated_answer": "1"}
295
+ {"reference_answer": "0", "generated_answer": "1"}
296
+ {"reference_answer": "1", "generated_answer": "1"}
297
+ {"reference_answer": "0", "generated_answer": "1"}
298
+ {"reference_answer": "0", "generated_answer": "1"}
299
+ {"reference_answer": "0", "generated_answer": "1"}
300
+ {"reference_answer": "0", "generated_answer": "1"}
301
+ {"reference_answer": "1", "generated_answer": "1"}
302
+ {"reference_answer": "0", "generated_answer": "1"}
303
+ {"reference_answer": "0", "generated_answer": "1"}
304
+ {"reference_answer": "0", "generated_answer": "1"}
305
+ {"reference_answer": "1", "generated_answer": "1"}
306
+ {"reference_answer": "1", "generated_answer": "1"}
307
+ {"reference_answer": "0", "generated_answer": "1"}
308
+ {"reference_answer": "0", "generated_answer": "1"}
309
+ {"reference_answer": "0", "generated_answer": "1"}
310
+ {"reference_answer": "0", "generated_answer": "1"}
311
+ {"reference_answer": "1", "generated_answer": "1"}
312
+ {"reference_answer": "0", "generated_answer": "1"}
313
+ {"reference_answer": "0", "generated_answer": "1"}
314
+ {"reference_answer": "1", "generated_answer": "1"}
315
+ {"reference_answer": "0", "generated_answer": "1"}
316
+ {"reference_answer": "1", "generated_answer": "1"}
317
+ {"reference_answer": "0", "generated_answer": "1"}
318
+ {"reference_answer": "1", "generated_answer": "1"}
319
+ {"reference_answer": "1", "generated_answer": "1"}
320
+ {"reference_answer": "0", "generated_answer": "1"}
321
+ {"reference_answer": "0", "generated_answer": "1"}
322
+ {"reference_answer": "0", "generated_answer": "1"}
323
+ {"reference_answer": "0", "generated_answer": "1"}
324
+ {"reference_answer": "0", "generated_answer": "1"}
325
+ {"reference_answer": "0", "generated_answer": "1"}
326
+ {"reference_answer": "0", "generated_answer": "1"}
327
+ {"reference_answer": "1", "generated_answer": "1"}
328
+ {"reference_answer": "0", "generated_answer": "1"}
329
+ {"reference_answer": "0", "generated_answer": "1"}
330
+ {"reference_answer": "0", "generated_answer": "1"}
331
+ {"reference_answer": "1", "generated_answer": "1"}
332
+ {"reference_answer": "0", "generated_answer": "1"}
333
+ {"reference_answer": "0", "generated_answer": "1"}
334
+ {"reference_answer": "0", "generated_answer": "1"}
335
+ {"reference_answer": "1", "generated_answer": "1"}
336
+ {"reference_answer": "0", "generated_answer": "1"}
337
+ {"reference_answer": "0", "generated_answer": "1"}
338
+ {"reference_answer": "0", "generated_answer": "1"}
339
+ {"reference_answer": "0", "generated_answer": "1"}
340
+ {"reference_answer": "1", "generated_answer": "1"}
341
+ {"reference_answer": "0", "generated_answer": "1"}
342
+ {"reference_answer": "0", "generated_answer": "1"}
343
+ {"reference_answer": "0", "generated_answer": "1"}
344
+ {"reference_answer": "0", "generated_answer": "1"}
345
+ {"reference_answer": "0", "generated_answer": "1"}
346
+ {"reference_answer": "0", "generated_answer": "1"}
347
+ {"reference_answer": "1", "generated_answer": "1"}
348
+ {"reference_answer": "1", "generated_answer": "1"}
349
+ {"reference_answer": "0", "generated_answer": "1"}
350
+ {"reference_answer": "0", "generated_answer": "1"}
351
+ {"reference_answer": "0", "generated_answer": "1"}
352
+ {"reference_answer": "0", "generated_answer": "1"}
353
+ {"reference_answer": "0", "generated_answer": "1"}
354
+ {"reference_answer": "1", "generated_answer": "1"}
355
+ {"reference_answer": "0", "generated_answer": "1"}
356
+ {"reference_answer": "1", "generated_answer": "1"}
357
+ {"reference_answer": "0", "generated_answer": "1"}
358
+ {"reference_answer": "1", "generated_answer": "1"}
359
+ {"reference_answer": "1", "generated_answer": "1"}
360
+ {"reference_answer": "1", "generated_answer": "1"}
361
+ {"reference_answer": "1", "generated_answer": "1"}
362
+ {"reference_answer": "0", "generated_answer": "1"}
363
+ {"reference_answer": "1", "generated_answer": "1"}
364
+ {"reference_answer": "0", "generated_answer": "1"}
365
+ {"reference_answer": "0", "generated_answer": "1"}
366
+ {"reference_answer": "0", "generated_answer": "1"}
367
+ {"reference_answer": "0", "generated_answer": "1"}
368
+ {"reference_answer": "0", "generated_answer": "1"}
369
+ {"reference_answer": "1", "generated_answer": "1"}
370
+ {"reference_answer": "0", "generated_answer": "1"}
371
+ {"reference_answer": "1", "generated_answer": "1"}
372
+ {"reference_answer": "0", "generated_answer": "1"}
373
+ {"reference_answer": "0", "generated_answer": "1"}
374
+ {"reference_answer": "1", "generated_answer": "1"}
375
+ {"reference_answer": "0", "generated_answer": "1"}
376
+ {"reference_answer": "1", "generated_answer": "1"}
377
+ {"reference_answer": "1", "generated_answer": "1"}
378
+ {"reference_answer": "0", "generated_answer": "1"}
379
+ {"reference_answer": "0", "generated_answer": "1"}
380
+ {"reference_answer": "0", "generated_answer": "1"}
381
+ {"reference_answer": "1", "generated_answer": "1"}
382
+ {"reference_answer": "1", "generated_answer": "1"}
383
+ {"reference_answer": "0", "generated_answer": "1"}
384
+ {"reference_answer": "1", "generated_answer": "1"}
385
+ {"reference_answer": "1", "generated_answer": "1"}
386
+ {"reference_answer": "0", "generated_answer": "1"}
387
+ {"reference_answer": "0", "generated_answer": "1"}
388
+ {"reference_answer": "1", "generated_answer": "1"}
389
+ {"reference_answer": "0", "generated_answer": "1"}
390
+ {"reference_answer": "1", "generated_answer": "1"}
391
+ {"reference_answer": "0", "generated_answer": "1"}
392
+ {"reference_answer": "1", "generated_answer": "1"}
393
+ {"reference_answer": "0", "generated_answer": "1"}
394
+ {"reference_answer": "0", "generated_answer": "1"}
395
+ {"reference_answer": "0", "generated_answer": "1"}
396
+ {"reference_answer": "1", "generated_answer": "1"}
397
+ {"reference_answer": "1", "generated_answer": "1"}
398
+ {"reference_answer": "0", "generated_answer": "1"}
399
+ {"reference_answer": "0", "generated_answer": "1"}
400
+ {"reference_answer": "0", "generated_answer": "1"}
401
+ {"reference_answer": "1", "generated_answer": "1"}
402
+ {"reference_answer": "0", "generated_answer": "1"}
403
+ {"reference_answer": "0", "generated_answer": "1"}
404
+ {"reference_answer": "0", "generated_answer": "1"}
405
+ {"reference_answer": "0", "generated_answer": "1"}
406
+ {"reference_answer": "0", "generated_answer": "1"}
407
+ {"reference_answer": "0", "generated_answer": "1"}
408
+ {"reference_answer": "0", "generated_answer": "1"}
409
+ {"reference_answer": "0", "generated_answer": "1"}
410
+ {"reference_answer": "0", "generated_answer": "1"}
411
+ {"reference_answer": "0", "generated_answer": "1"}
412
+ {"reference_answer": "0", "generated_answer": "1"}
413
+ {"reference_answer": "1", "generated_answer": "1"}
414
+ {"reference_answer": "1", "generated_answer": "1"}
415
+ {"reference_answer": "1", "generated_answer": "1"}
416
+ {"reference_answer": "0", "generated_answer": "1"}
417
+ {"reference_answer": "0", "generated_answer": "1"}
418
+ {"reference_answer": "0", "generated_answer": "1"}
419
+ {"reference_answer": "0", "generated_answer": "1"}
420
+ {"reference_answer": "1", "generated_answer": "1"}
421
+ {"reference_answer": "1", "generated_answer": "1"}
422
+ {"reference_answer": "0", "generated_answer": "1"}
423
+ {"reference_answer": "0", "generated_answer": "1"}
424
+ {"reference_answer": "1", "generated_answer": "1"}
425
+ {"reference_answer": "0", "generated_answer": "1"}
426
+ {"reference_answer": "0", "generated_answer": "1"}
427
+ {"reference_answer": "1", "generated_answer": "1"}
428
+ {"reference_answer": "0", "generated_answer": "1"}
429
+ {"reference_answer": "0", "generated_answer": "1"}
430
+ {"reference_answer": "0", "generated_answer": "1"}
431
+ {"reference_answer": "1", "generated_answer": "1"}
432
+ {"reference_answer": "0", "generated_answer": "1"}
433
+ {"reference_answer": "0", "generated_answer": "1"}
434
+ {"reference_answer": "1", "generated_answer": "1"}
435
+ {"reference_answer": "0", "generated_answer": "1"}
436
+ {"reference_answer": "1", "generated_answer": "1"}
437
+ {"reference_answer": "0", "generated_answer": "1"}
438
+ {"reference_answer": "1", "generated_answer": "1"}
439
+ {"reference_answer": "0", "generated_answer": "1"}
440
+ {"reference_answer": "0", "generated_answer": "1"}
441
+ {"reference_answer": "0", "generated_answer": "1"}
442
+ {"reference_answer": "0", "generated_answer": "1"}
443
+ {"reference_answer": "1", "generated_answer": "1"}
444
+ {"reference_answer": "0", "generated_answer": "1"}
445
+ {"reference_answer": "0", "generated_answer": "1"}
446
+ {"reference_answer": "1", "generated_answer": "1"}
447
+ {"reference_answer": "1", "generated_answer": "1"}
448
+ {"reference_answer": "0", "generated_answer": "1"}
449
+ {"reference_answer": "0", "generated_answer": "1"}
450
+ {"reference_answer": "0", "generated_answer": "1"}
451
+ {"reference_answer": "0", "generated_answer": "1"}
452
+ {"reference_answer": "0", "generated_answer": "1"}
453
+ {"reference_answer": "0", "generated_answer": "1"}
454
+ {"reference_answer": "1", "generated_answer": "1"}
455
+ {"reference_answer": "0", "generated_answer": "1"}
456
+ {"reference_answer": "0", "generated_answer": "1"}
457
+ {"reference_answer": "0", "generated_answer": "1"}
458
+ {"reference_answer": "1", "generated_answer": "1"}
459
+ {"reference_answer": "0", "generated_answer": "1"}
460
+ {"reference_answer": "1", "generated_answer": "1"}
461
+ {"reference_answer": "0", "generated_answer": "1"}
462
+ {"reference_answer": "1", "generated_answer": "1"}
463
+ {"reference_answer": "1", "generated_answer": "1"}
464
+ {"reference_answer": "0", "generated_answer": "1"}
465
+ {"reference_answer": "0", "generated_answer": "1"}
466
+ {"reference_answer": "0", "generated_answer": "1"}
467
+ {"reference_answer": "0", "generated_answer": "1"}
468
+ {"reference_answer": "1", "generated_answer": "1"}
469
+ {"reference_answer": "0", "generated_answer": "1"}
470
+ {"reference_answer": "0", "generated_answer": "1"}
471
+ {"reference_answer": "1", "generated_answer": "1"}
472
+ {"reference_answer": "1", "generated_answer": "1"}
473
+ {"reference_answer": "1", "generated_answer": "1"}
474
+ {"reference_answer": "0", "generated_answer": "1"}
475
+ {"reference_answer": "1", "generated_answer": "1"}
476
+ {"reference_answer": "0", "generated_answer": "1"}
477
+ {"reference_answer": "1", "generated_answer": "1"}
478
+ {"reference_answer": "0", "generated_answer": "1"}
479
+ {"reference_answer": "0", "generated_answer": "1"}
480
+ {"reference_answer": "0", "generated_answer": "1"}
481
+ {"reference_answer": "1", "generated_answer": "1"}
482
+ {"reference_answer": "1", "generated_answer": "1"}
483
+ {"reference_answer": "0", "generated_answer": "1"}
484
+ {"reference_answer": "0", "generated_answer": "1"}
485
+ {"reference_answer": "1", "generated_answer": "1"}
486
+ {"reference_answer": "0", "generated_answer": "1"}
487
+ {"reference_answer": "0", "generated_answer": "1"}
488
+ {"reference_answer": "0", "generated_answer": "1"}
489
+ {"reference_answer": "1", "generated_answer": "1"}
490
+ {"reference_answer": "0", "generated_answer": "1"}
491
+ {"reference_answer": "0", "generated_answer": "1"}
492
+ {"reference_answer": "1", "generated_answer": "1"}
493
+ {"reference_answer": "0", "generated_answer": "1"}
494
+ {"reference_answer": "0", "generated_answer": "1"}
495
+ {"reference_answer": "0", "generated_answer": "1"}
496
+ {"reference_answer": "1", "generated_answer": "1"}
497
+ {"reference_answer": "0", "generated_answer": "1"}
498
+ {"reference_answer": "0", "generated_answer": "1"}
499
+ {"reference_answer": "1", "generated_answer": "1"}
500
+ {"reference_answer": "0", "generated_answer": "1"}
501
+ {"reference_answer": "1", "generated_answer": "1"}
502
+ {"reference_answer": "0", "generated_answer": "1"}
503
+ {"reference_answer": "1", "generated_answer": "1"}
504
+ {"reference_answer": "1", "generated_answer": "1"}
505
+ {"reference_answer": "1", "generated_answer": "1"}
506
+ {"reference_answer": "1", "generated_answer": "1"}
507
+ {"reference_answer": "0", "generated_answer": "1"}
508
+ {"reference_answer": "0", "generated_answer": "1"}
509
+ {"reference_answer": "0", "generated_answer": "1"}
510
+ {"reference_answer": "0", "generated_answer": "1"}
511
+ {"reference_answer": "1", "generated_answer": "1"}
512
+ {"reference_answer": "0", "generated_answer": "1"}
513
+ {"reference_answer": "1", "generated_answer": "1"}
514
+ {"reference_answer": "0", "generated_answer": "1"}
515
+ {"reference_answer": "0", "generated_answer": "1"}
516
+ {"reference_answer": "1", "generated_answer": "1"}
517
+ {"reference_answer": "0", "generated_answer": "1"}
518
+ {"reference_answer": "0", "generated_answer": "1"}
519
+ {"reference_answer": "0", "generated_answer": "1"}
520
+ {"reference_answer": "1", "generated_answer": "1"}
521
+ {"reference_answer": "0", "generated_answer": "1"}
522
+ {"reference_answer": "0", "generated_answer": "1"}
523
+ {"reference_answer": "1", "generated_answer": "1"}
524
+ {"reference_answer": "1", "generated_answer": "1"}
525
+ {"reference_answer": "1", "generated_answer": "1"}
526
+ {"reference_answer": "0", "generated_answer": "1"}
527
+ {"reference_answer": "0", "generated_answer": "1"}
528
+ {"reference_answer": "0", "generated_answer": "1"}
529
+ {"reference_answer": "0", "generated_answer": "1"}
530
+ {"reference_answer": "0", "generated_answer": "1"}
531
+ {"reference_answer": "0", "generated_answer": "1"}
532
+ {"reference_answer": "0", "generated_answer": "1"}
533
+ {"reference_answer": "0", "generated_answer": "1"}
534
+ {"reference_answer": "0", "generated_answer": "1"}
535
+ {"reference_answer": "0", "generated_answer": "1"}
536
+ {"reference_answer": "0", "generated_answer": "1"}
537
+ {"reference_answer": "0", "generated_answer": "1"}
538
+ {"reference_answer": "0", "generated_answer": "1"}
539
+ {"reference_answer": "0", "generated_answer": "1"}
540
+ {"reference_answer": "0", "generated_answer": "1"}
541
+ {"reference_answer": "1", "generated_answer": "1"}
542
+ {"reference_answer": "0", "generated_answer": "1"}
543
+ {"reference_answer": "0", "generated_answer": "1"}
544
+ {"reference_answer": "1", "generated_answer": "1"}
545
+ {"reference_answer": "1", "generated_answer": "1"}
546
+ {"reference_answer": "0", "generated_answer": "1"}
547
+ {"reference_answer": "0", "generated_answer": "1"}
548
+ {"reference_answer": "0", "generated_answer": "1"}
549
+ {"reference_answer": "0", "generated_answer": "1"}
550
+ {"reference_answer": "0", "generated_answer": "1"}
551
+ {"reference_answer": "0", "generated_answer": "1"}
552
+ {"reference_answer": "0", "generated_answer": "1"}
553
+ {"reference_answer": "1", "generated_answer": "1"}
554
+ {"reference_answer": "1", "generated_answer": "1"}
555
+ {"reference_answer": "0", "generated_answer": "1"}
556
+ {"reference_answer": "0", "generated_answer": "1"}
557
+ {"reference_answer": "1", "generated_answer": "1"}
558
+ {"reference_answer": "0", "generated_answer": "1"}
559
+ {"reference_answer": "0", "generated_answer": "1"}
560
+ {"reference_answer": "0", "generated_answer": "1"}
561
+ {"reference_answer": "0", "generated_answer": "1"}
562
+ {"reference_answer": "0", "generated_answer": "1"}
563
+ {"reference_answer": "0", "generated_answer": "1"}
564
+ {"reference_answer": "1", "generated_answer": "1"}
565
+ {"reference_answer": "0", "generated_answer": "1"}
566
+ {"reference_answer": "0", "generated_answer": "1"}
567
+ {"reference_answer": "0", "generated_answer": "1"}
568
+ {"reference_answer": "1", "generated_answer": "1"}
569
+ {"reference_answer": "1", "generated_answer": "1"}
570
+ {"reference_answer": "0", "generated_answer": "1"}
571
+ {"reference_answer": "1", "generated_answer": "1"}
572
+ {"reference_answer": "1", "generated_answer": "1"}
573
+ {"reference_answer": "0", "generated_answer": "1"}
574
+ {"reference_answer": "0", "generated_answer": "1"}
575
+ {"reference_answer": "1", "generated_answer": "1"}
576
+ {"reference_answer": "1", "generated_answer": "1"}
577
+ {"reference_answer": "0", "generated_answer": "1"}
578
+ {"reference_answer": "1", "generated_answer": "1"}
579
+ {"reference_answer": "0", "generated_answer": "1"}
580
+ {"reference_answer": "0", "generated_answer": "1"}
581
+ {"reference_answer": "0", "generated_answer": "1"}
582
+ {"reference_answer": "0", "generated_answer": "1"}
583
+ {"reference_answer": "1", "generated_answer": "1"}
584
+ {"reference_answer": "0", "generated_answer": "1"}
585
+ {"reference_answer": "0", "generated_answer": "1"}
586
+ {"reference_answer": "0", "generated_answer": "1"}
587
+ {"reference_answer": "0", "generated_answer": "1"}
588
+ {"reference_answer": "1", "generated_answer": "1"}
589
+ {"reference_answer": "0", "generated_answer": "1"}
590
+ {"reference_answer": "0", "generated_answer": "1"}
591
+ {"reference_answer": "0", "generated_answer": "1"}
592
+ {"reference_answer": "0", "generated_answer": "1"}
593
+ {"reference_answer": "1", "generated_answer": "1"}
594
+ {"reference_answer": "1", "generated_answer": "1"}
595
+ {"reference_answer": "0", "generated_answer": "1"}
596
+ {"reference_answer": "0", "generated_answer": "1"}
597
+ {"reference_answer": "0", "generated_answer": "1"}
598
+ {"reference_answer": "1", "generated_answer": "1"}
599
+ {"reference_answer": "1", "generated_answer": "1"}
600
+ {"reference_answer": "0", "generated_answer": "1"}
601
+ {"reference_answer": "0", "generated_answer": "1"}
602
+ {"reference_answer": "1", "generated_answer": "1"}
603
+ {"reference_answer": "0", "generated_answer": "1"}
604
+ {"reference_answer": "1", "generated_answer": "1"}
605
+ {"reference_answer": "1", "generated_answer": "1"}
606
+ {"reference_answer": "0", "generated_answer": "1"}
607
+ {"reference_answer": "0", "generated_answer": "1"}
608
+ {"reference_answer": "0", "generated_answer": "1"}
609
+ {"reference_answer": "0", "generated_answer": "1"}
610
+ {"reference_answer": "1", "generated_answer": "1"}
611
+ {"reference_answer": "0", "generated_answer": "1"}
612
+ {"reference_answer": "0", "generated_answer": "1"}
613
+ {"reference_answer": "0", "generated_answer": "1"}
614
+ {"reference_answer": "1", "generated_answer": "1"}
615
+ {"reference_answer": "0", "generated_answer": "1"}
616
+ {"reference_answer": "1", "generated_answer": "1"}
617
+ {"reference_answer": "1", "generated_answer": "1"}
618
+ {"reference_answer": "1", "generated_answer": "1"}
619
+ {"reference_answer": "0", "generated_answer": "1"}
620
+ {"reference_answer": "1", "generated_answer": "1"}
621
+ {"reference_answer": "1", "generated_answer": "1"}
622
+ {"reference_answer": "0", "generated_answer": "1"}
623
+ {"reference_answer": "0", "generated_answer": "1"}
624
+ {"reference_answer": "0", "generated_answer": "1"}
625
+ {"reference_answer": "0", "generated_answer": "1"}
626
+ {"reference_answer": "0", "generated_answer": "1"}
627
+ {"reference_answer": "0", "generated_answer": "1"}
628
+ {"reference_answer": "0", "generated_answer": "1"}
629
+ {"reference_answer": "0", "generated_answer": "1"}
630
+ {"reference_answer": "0", "generated_answer": "1"}
631
+ {"reference_answer": "0", "generated_answer": "1"}
632
+ {"reference_answer": "0", "generated_answer": "1"}
633
+ {"reference_answer": "0", "generated_answer": "1"}
634
+ {"reference_answer": "0", "generated_answer": "1"}
635
+ {"reference_answer": "0", "generated_answer": "1"}
636
+ {"reference_answer": "0", "generated_answer": "1"}
637
+ {"reference_answer": "0", "generated_answer": "1"}
638
+ {"reference_answer": "0", "generated_answer": "1"}
639
+ {"reference_answer": "0", "generated_answer": "1"}
640
+ {"reference_answer": "1", "generated_answer": "1"}
641
+ {"reference_answer": "0", "generated_answer": "1"}
642
+ {"reference_answer": "0", "generated_answer": "1"}
643
+ {"reference_answer": "1", "generated_answer": "1"}
644
+ {"reference_answer": "1", "generated_answer": "1"}
645
+ {"reference_answer": "0", "generated_answer": "1"}
646
+ {"reference_answer": "0", "generated_answer": "1"}
647
+ {"reference_answer": "0", "generated_answer": "1"}
648
+ {"reference_answer": "0", "generated_answer": "1"}
649
+ {"reference_answer": "0", "generated_answer": "1"}
650
+ {"reference_answer": "0", "generated_answer": "1"}
651
+ {"reference_answer": "0", "generated_answer": "1"}
652
+ {"reference_answer": "1", "generated_answer": "1"}
653
+ {"reference_answer": "0", "generated_answer": "1"}
654
+ {"reference_answer": "1", "generated_answer": "1"}
655
+ {"reference_answer": "0", "generated_answer": "1"}
656
+ {"reference_answer": "0", "generated_answer": "1"}
657
+ {"reference_answer": "0", "generated_answer": "1"}
658
+ {"reference_answer": "1", "generated_answer": "1"}
659
+ {"reference_answer": "0", "generated_answer": "1"}
660
+ {"reference_answer": "0", "generated_answer": "1"}
661
+ {"reference_answer": "0", "generated_answer": "1"}
662
+ {"reference_answer": "1", "generated_answer": "1"}
663
+ {"reference_answer": "0", "generated_answer": "1"}
664
+ {"reference_answer": "0", "generated_answer": "1"}
665
+ {"reference_answer": "0", "generated_answer": "1"}
666
+ {"reference_answer": "0", "generated_answer": "1"}
667
+ {"reference_answer": "1", "generated_answer": "1"}
668
+ {"reference_answer": "0", "generated_answer": "1"}
669
+ {"reference_answer": "0", "generated_answer": "1"}
670
+ {"reference_answer": "0", "generated_answer": "1"}
671
+ {"reference_answer": "1", "generated_answer": "1"}
672
+ {"reference_answer": "0", "generated_answer": "1"}
673
+ {"reference_answer": "1", "generated_answer": "1"}
674
+ {"reference_answer": "0", "generated_answer": "1"}
675
+ {"reference_answer": "0", "generated_answer": "1"}
676
+ {"reference_answer": "0", "generated_answer": "1"}
677
+ {"reference_answer": "0", "generated_answer": "1"}
678
+ {"reference_answer": "0", "generated_answer": "1"}
679
+ {"reference_answer": "0", "generated_answer": "1"}
680
+ {"reference_answer": "1", "generated_answer": "1"}
681
+ {"reference_answer": "0", "generated_answer": "1"}
682
+ {"reference_answer": "0", "generated_answer": "1"}
683
+ {"reference_answer": "0", "generated_answer": "1"}
684
+ {"reference_answer": "0", "generated_answer": "1"}
685
+ {"reference_answer": "1", "generated_answer": "1"}
686
+ {"reference_answer": "1", "generated_answer": "1"}
687
+ {"reference_answer": "1", "generated_answer": "1"}
688
+ {"reference_answer": "0", "generated_answer": "1"}
689
+ {"reference_answer": "0", "generated_answer": "1"}
690
+ {"reference_answer": "0", "generated_answer": "1"}
691
+ {"reference_answer": "1", "generated_answer": "1"}
692
+ {"reference_answer": "1", "generated_answer": "1"}
693
+ {"reference_answer": "0", "generated_answer": "1"}
694
+ {"reference_answer": "0", "generated_answer": "1"}
695
+ {"reference_answer": "0", "generated_answer": "1"}
696
+ {"reference_answer": "1", "generated_answer": "1"}
697
+ {"reference_answer": "1", "generated_answer": "1"}
698
+ {"reference_answer": "0", "generated_answer": "1"}
699
+ {"reference_answer": "0", "generated_answer": "1"}
700
+ {"reference_answer": "1", "generated_answer": "1"}
701
+ {"reference_answer": "0", "generated_answer": "1"}
702
+ {"reference_answer": "0", "generated_answer": "1"}
703
+ {"reference_answer": "1", "generated_answer": "1"}
704
+ {"reference_answer": "1", "generated_answer": "1"}
705
+ {"reference_answer": "0", "generated_answer": "1"}
706
+ {"reference_answer": "0", "generated_answer": "1"}
707
+ {"reference_answer": "1", "generated_answer": "1"}
708
+ {"reference_answer": "1", "generated_answer": "1"}
709
+ {"reference_answer": "0", "generated_answer": "1"}
710
+ {"reference_answer": "1", "generated_answer": "1"}
711
+ {"reference_answer": "0", "generated_answer": "1"}
712
+ {"reference_answer": "0", "generated_answer": "1"}
713
+ {"reference_answer": "0", "generated_answer": "1"}
714
+ {"reference_answer": "1", "generated_answer": "1"}
715
+ {"reference_answer": "1", "generated_answer": "1"}
716
+ {"reference_answer": "0", "generated_answer": "1"}
717
+ {"reference_answer": "1", "generated_answer": "1"}
718
+ {"reference_answer": "1", "generated_answer": "1"}
ProtT3/all_checkpoints/stage1_06221723/wandb/run-20250622_192645-zmuhvn72/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":42}}
ProtT3/all_checkpoints/stage1_06221723/wandb/run-20250622_192645-zmuhvn72/logs/debug-internal.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-06-22T19:26:45.459589355+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage1_06221723/wandb/run-20250622_192645-zmuhvn72/logs/debug-core.log"}
2
+ {"time":"2025-06-22T19:26:46.458621861+08:00","level":"INFO","msg":"created new stream","id":"zmuhvn72"}
3
+ {"time":"2025-06-22T19:26:46.458658451+08:00","level":"INFO","msg":"stream: started","id":"zmuhvn72"}
4
+ {"time":"2025-06-22T19:26:46.458694604+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"zmuhvn72"}
5
+ {"time":"2025-06-22T19:26:46.458731318+08:00","level":"INFO","msg":"sender: started","stream_id":"zmuhvn72"}
6
+ {"time":"2025-06-22T19:26:46.45883181+08:00","level":"INFO","msg":"handler: started","stream_id":"zmuhvn72"}
7
+ {"time":"2025-06-22T19:26:47.693527925+08:00","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-06-22T19:27:27.828788085+08:00","level":"INFO","msg":"stream: closing","id":"zmuhvn72"}
9
+ {"time":"2025-06-22T19:27:27.828880453+08:00","level":"INFO","msg":"Stopping system monitor"}
10
+ {"time":"2025-06-22T19:27:27.829610297+08:00","level":"INFO","msg":"Stopped system monitor"}
11
+ {"time":"2025-06-22T19:27:29.428896311+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
12
+ {"time":"2025-06-22T19:27:30.389945053+08:00","level":"INFO","msg":"handler: closed","stream_id":"zmuhvn72"}
13
+ {"time":"2025-06-22T19:27:30.38999824+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"zmuhvn72"}
14
+ {"time":"2025-06-22T19:27:30.390015358+08:00","level":"INFO","msg":"sender: closed","stream_id":"zmuhvn72"}
15
+ {"time":"2025-06-22T19:27:30.395367992+08:00","level":"INFO","msg":"stream: closed","id":"zmuhvn72"}
ProtT3/all_checkpoints/stage1_06221723/wandb/run-20250622_192645-zmuhvn72/logs/debug.log ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-06-22 19:26:45,451 INFO MainThread:104393 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
2
+ 2025-06-22 19:26:45,451 INFO MainThread:104393 [wandb_setup.py:_flush():70] Configure stats pid to 104393
3
+ 2025-06-22 19:26:45,451 INFO MainThread:104393 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
4
+ 2025-06-22 19:26:45,451 INFO MainThread:104393 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
5
+ 2025-06-22 19:26:45,451 INFO MainThread:104393 [wandb_setup.py:_flush():70] Loading settings from environment variables
6
+ 2025-06-22 19:26:45,451 INFO MainThread:104393 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage1_06221723/wandb/run-20250622_192645-zmuhvn72/logs/debug.log
7
+ 2025-06-22 19:26:45,451 INFO MainThread:104393 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage1_06221723/wandb/run-20250622_192645-zmuhvn72/logs/debug-internal.log
8
+ 2025-06-22 19:26:45,451 INFO MainThread:104393 [wandb_init.py:init():852] calling init triggers
9
+ 2025-06-22 19:26:45,451 INFO MainThread:104393 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-06-22 19:26:45,451 INFO MainThread:104393 [wandb_init.py:init():893] starting backend
12
+ 2025-06-22 19:26:45,451 INFO MainThread:104393 [wandb_init.py:init():897] sending inform_init request
13
+ 2025-06-22 19:26:45,453 INFO MainThread:104393 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-06-22 19:26:45,454 INFO MainThread:104393 [wandb_init.py:init():907] backend started and connected
15
+ 2025-06-22 19:26:45,458 INFO MainThread:104393 [wandb_init.py:init():1005] updated telemetry
16
+ 2025-06-22 19:26:45,461 INFO MainThread:104393 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
17
+ 2025-06-22 19:26:47,645 INFO MainThread:104393 [wandb_init.py:init():1104] starting run threads in backend
18
+ 2025-06-22 19:26:47,826 INFO MainThread:104393 [wandb_run.py:_console_start():2573] atexit reg
19
+ 2025-06-22 19:26:47,826 INFO MainThread:104393 [wandb_run.py:_redirect():2421] redirect: wrap_raw
20
+ 2025-06-22 19:26:47,829 INFO MainThread:104393 [wandb_run.py:_redirect():2490] Wrapping output streams.
21
+ 2025-06-22 19:26:47,830 INFO MainThread:104393 [wandb_run.py:_redirect():2513] Redirects installed.
22
+ 2025-06-22 19:26:47,831 INFO MainThread:104393 [wandb_init.py:init():1150] run started, returning control to user process
23
+ 2025-06-22 19:26:54,211 INFO MainThread:104393 [wandb_run.py:_config_callback():1436] config_cb None None {'filename': 'stage1_06221723', 'seed': 42, 'mode': 'train', 'strategy': 'deepspeed', 'accelerator': 'gpu', 'devices': '0,1,2,3,4,5,6,7', 'precision': 'bf16-mixed', 'max_epochs': 20, 'check_val_every_n_epoch': 1, 'use_wandb_logger': True, 'mix_dataset': True, 'temperature': 0.1, 'save_every_n_epochs': 5, 'ptm': True, 'lm': True, 'rerank_cand_num': 128, 'plm_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m', 'plm_tune': 'freeze', 'load_4bit': False, 'pool_size': 0, 'bert_hidden_dim': 768, 'bert_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft', 'projection_dim': 256, 'cross_attention_freq': 2, 'num_query_token': 8, 'weight_decay': 0.05, 'init_lr': 0.0001, 'min_lr': 1e-05, 'warmup_lr': 1e-06, 'warmup_steps': 1000, 'lr_decay_rate': 0.9, 'scheduler': 'linear_warmup_cosine_lr', 'init_checkpoint': '', 'retrieval_eval_epoch': 10, 'num_workers': 8, 'batch_size': 32, 'match_batch_size': 64, 'root': 'data', 'text_max_len': 128, 'prot_max_len': 1024, 'prot_aug': 'None'}
24
+ 2025-06-22 19:27:27,827 INFO MsgRouterThr:104393 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
ProtT3/all_checkpoints/stage1_06261435/wandb/debug-internal.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-06-26T14:47:31.30118788+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/logs/debug-core.log"}
2
+ {"time":"2025-06-26T14:47:33.763139479+08:00","level":"INFO","msg":"created new stream","id":"1bz1vyyf"}
3
+ {"time":"2025-06-26T14:47:33.763180996+08:00","level":"INFO","msg":"stream: started","id":"1bz1vyyf"}
4
+ {"time":"2025-06-26T14:47:33.76320552+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"1bz1vyyf"}
5
+ {"time":"2025-06-26T14:47:33.763263895+08:00","level":"INFO","msg":"handler: started","stream_id":"1bz1vyyf"}
6
+ {"time":"2025-06-26T14:47:33.763302435+08:00","level":"INFO","msg":"sender: started","stream_id":"1bz1vyyf"}
7
+ {"time":"2025-06-26T14:47:35.049823143+08:00","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-06-26T14:59:24.18296941+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06261435/1bz1vyyf/file_stream\": EOF"}
9
+ {"time":"2025-06-26T15:47:20.217171547+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
10
+ {"time":"2025-06-26T15:47:52.39492821+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
11
+ {"time":"2025-06-26T15:48:27.272812301+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
12
+ {"time":"2025-06-26T15:49:05.914803533+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
13
+ {"time":"2025-06-26T15:49:51.452193247+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06261435/1bz1vyyf/file_stream\": net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)"}
14
+ {"time":"2025-06-26T15:49:52.900302871+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
15
+ {"time":"2025-06-26T15:50:55.486728897+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
ProtT3/all_checkpoints/stage1_06261435/wandb/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
2
+ 2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_setup.py:_flush():70] Configure stats pid to 13641
3
+ 2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
4
+ 2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
5
+ 2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_setup.py:_flush():70] Loading settings from environment variables
6
+ 2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/logs/debug.log
7
+ 2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/logs/debug-internal.log
8
+ 2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:init():852] calling init triggers
9
+ 2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:init():893] starting backend
12
+ 2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:init():897] sending inform_init request
13
+ 2025-06-26 14:47:31,294 INFO MainThread:13641 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-06-26 14:47:31,296 INFO MainThread:13641 [wandb_init.py:init():907] backend started and connected
15
+ 2025-06-26 14:47:31,300 INFO MainThread:13641 [wandb_init.py:init():1005] updated telemetry
16
+ 2025-06-26 14:47:31,303 INFO MainThread:13641 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
17
+ 2025-06-26 14:47:35,041 INFO MainThread:13641 [wandb_init.py:init():1104] starting run threads in backend
18
+ 2025-06-26 14:47:35,187 INFO MainThread:13641 [wandb_run.py:_console_start():2573] atexit reg
19
+ 2025-06-26 14:47:35,187 INFO MainThread:13641 [wandb_run.py:_redirect():2421] redirect: wrap_raw
20
+ 2025-06-26 14:47:35,192 INFO MainThread:13641 [wandb_run.py:_redirect():2490] Wrapping output streams.
21
+ 2025-06-26 14:47:35,192 INFO MainThread:13641 [wandb_run.py:_redirect():2513] Redirects installed.
22
+ 2025-06-26 14:47:35,197 INFO MainThread:13641 [wandb_init.py:init():1150] run started, returning control to user process
23
+ 2025-06-26 14:47:41,914 INFO MainThread:13641 [wandb_run.py:_config_callback():1436] config_cb None None {'filename': 'stage1_06261435', 'seed': 42, 'mode': 'train', 'strategy': 'deepspeed', 'accelerator': 'gpu', 'devices': '0,1,2,3,4,5,6,7', 'precision': 'bf16-mixed', 'max_epochs': 30, 'check_val_every_n_epoch': 1, 'use_wandb_logger': True, 'mix_dataset': True, 'temperature': 0.1, 'save_every_n_epochs': 5, 'ptm': True, 'lm': True, 'rerank_cand_num': 128, 'plm_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m', 'plm_tune': 'freeze', 'load_4bit': False, 'pool_size': 0, 'bert_hidden_dim': 768, 'bert_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft', 'projection_dim': 256, 'cross_attention_freq': 2, 'num_query_token': 8, 'weight_decay': 0.05, 'init_lr': 0.0001, 'min_lr': 1e-05, 'warmup_lr': 1e-06, 'warmup_steps': 1000, 'lr_decay_rate': 0.9, 'scheduler': 'linear_warmup_cosine_lr', 'init_checkpoint': '', 'retrieval_eval_epoch': 10, 'num_workers': 8, 'batch_size': 160, 'match_batch_size': 64, 'root': 'data', 'text_max_len': 128, 'prot_max_len': 1024, 'prot_aug': 'None'}
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_143705-0goy7nyw/logs/debug-internal.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {"time":"2025-06-26T14:37:05.188561402+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage1_06261435/wandb/run-20250626_143705-0goy7nyw/logs/debug-core.log"}
2
+ {"time":"2025-06-26T14:37:35.294367385+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)"}
3
+ {"time":"2025-06-26T14:38:07.797500016+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_143705-0goy7nyw/logs/debug.log ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-06-26 14:37:05,174 INFO MainThread:2555 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
2
+ 2025-06-26 14:37:05,174 INFO MainThread:2555 [wandb_setup.py:_flush():70] Configure stats pid to 2555
3
+ 2025-06-26 14:37:05,174 INFO MainThread:2555 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
4
+ 2025-06-26 14:37:05,174 INFO MainThread:2555 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
5
+ 2025-06-26 14:37:05,174 INFO MainThread:2555 [wandb_setup.py:_flush():70] Loading settings from environment variables
6
+ 2025-06-26 14:37:05,174 INFO MainThread:2555 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage1_06261435/wandb/run-20250626_143705-0goy7nyw/logs/debug.log
7
+ 2025-06-26 14:37:05,174 INFO MainThread:2555 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage1_06261435/wandb/run-20250626_143705-0goy7nyw/logs/debug-internal.log
8
+ 2025-06-26 14:37:05,174 INFO MainThread:2555 [wandb_init.py:init():852] calling init triggers
9
+ 2025-06-26 14:37:05,174 INFO MainThread:2555 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-06-26 14:37:05,174 INFO MainThread:2555 [wandb_init.py:init():893] starting backend
12
+ 2025-06-26 14:37:05,174 INFO MainThread:2555 [wandb_init.py:init():897] sending inform_init request
13
+ 2025-06-26 14:37:05,178 INFO MainThread:2555 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-06-26 14:37:05,178 INFO MainThread:2555 [wandb_init.py:init():907] backend started and connected
15
+ 2025-06-26 14:37:05,180 INFO MainThread:2555 [wandb_init.py:init():1005] updated telemetry
16
+ 2025-06-26 14:37:05,182 INFO MainThread:2555 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
17
+ 2025-06-26 14:37:37,418 INFO Thread-3 (wrapped_target):2555 [retry.py:__call__():175] [no run ID] Retry attempt failed:
18
+ Traceback (most recent call last):
19
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/urllib3/connection.py", line 198, in _new_conn
20
+ sock = connection.create_connection(
21
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/urllib3/util/connection.py", line 85, in create_connection
22
+ raise err
23
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/urllib3/util/connection.py", line 73, in create_connection
24
+ sock.connect(sa)
25
+ TimeoutError: timed out
26
+
27
+ The above exception was the direct cause of the following exception:
28
+
29
+ Traceback (most recent call last):
30
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/urllib3/connectionpool.py", line 787, in urlopen
31
+ response = self._make_request(
32
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/urllib3/connectionpool.py", line 488, in _make_request
33
+ raise new_e
34
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/urllib3/connectionpool.py", line 464, in _make_request
35
+ self._validate_conn(conn)
36
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
37
+ conn.connect()
38
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/urllib3/connection.py", line 704, in connect
39
+ self.sock = sock = self._new_conn()
40
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/urllib3/connection.py", line 207, in _new_conn
41
+ raise ConnectTimeoutError(
42
+ urllib3.exceptions.ConnectTimeoutError: (<urllib3.connection.HTTPSConnection object at 0x7fa5a7f7b9a0>, 'Connection to api.wandb.ai timed out. (connect timeout=20)')
43
+
44
+ The above exception was the direct cause of the following exception:
45
+
46
+ Traceback (most recent call last):
47
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/requests/adapters.py", line 667, in send
48
+ resp = conn.urlopen(
49
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/urllib3/connectionpool.py", line 841, in urlopen
50
+ retries = retries.increment(
51
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/urllib3/util/retry.py", line 519, in increment
52
+ raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
53
+ urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='api.wandb.ai', port=443): Max retries exceeded with url: /graphql (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7fa5a7f7b9a0>, 'Connection to api.wandb.ai timed out. (connect timeout=20)'))
54
+
55
+ During handling of the above exception, another exception occurred:
56
+
57
+ Traceback (most recent call last):
58
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/wandb/sdk/lib/retry.py", line 134, in __call__
59
+ result = self._call_fn(*args, **kwargs)
60
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/wandb/sdk/internal/internal_api.py", line 398, in execute
61
+ return self.client.execute(*args, **kwargs) # type: ignore
62
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/wandb/vendor/gql-0.2.0/wandb_gql/client.py", line 52, in execute
63
+ result = self._get_result(document, *args, **kwargs)
64
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/wandb/vendor/gql-0.2.0/wandb_gql/client.py", line 60, in _get_result
65
+ return self.transport.execute(document, *args, **kwargs)
66
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/wandb/sdk/lib/gql_request.py", line 58, in execute
67
+ request = self.session.post(self.url, **post_args)
68
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/requests/sessions.py", line 637, in post
69
+ return self.request("POST", url, data=data, json=json, **kwargs)
70
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/requests/sessions.py", line 589, in request
71
+ resp = self.send(prep, **send_kwargs)
72
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/requests/sessions.py", line 703, in send
73
+ r = adapter.send(request, **kwargs)
74
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/requests/adapters.py", line 688, in send
75
+ raise ConnectTimeout(e, request=request)
76
+ requests.exceptions.ConnectTimeout: HTTPSConnectionPool(host='api.wandb.ai', port=443): Max retries exceeded with url: /graphql (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7fa5a7f7b9a0>, 'Connection to api.wandb.ai timed out. (connect timeout=20)'))
77
+ 2025-06-26 14:38:28,356 WARNING MainThread:2555 [wandb_init.py:init():1681] [no run ID] interrupted
78
+ Traceback (most recent call last):
79
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/wandb/sdk/wandb_init.py", line 1677, in init
80
+ return wi.init(run_settings, run_config, run_printer)
81
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/wandb/sdk/wandb_init.py", line 1055, in init
82
+ result = wait_with_progress(
83
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/wandb/sdk/mailbox/wait_with_progress.py", line 24, in wait_with_progress
84
+ return wait_all_with_progress(
85
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/wandb/sdk/mailbox/wait_with_progress.py", line 87, in wait_all_with_progress
86
+ return asyncio_compat.run(progress_loop_with_timeout)
87
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/wandb/sdk/lib/asyncio_compat.py", line 30, in run
88
+ return future.result()
89
+ File "/root/miniconda3/envs/protT3/lib/python3.10/concurrent/futures/_base.py", line 440, in result
90
+ self._condition.wait(timeout)
91
+ File "/root/miniconda3/envs/protT3/lib/python3.10/threading.py", line 320, in wait
92
+ waiter.acquire()
93
+ KeyboardInterrupt
94
+ 2025-06-26 14:38:29,216 INFO MsgRouterThr:2555 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 2 handles.
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/files/output.log ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06261435 exists and is not empty.
2
+ Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.
3
+ LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
4
+
5
+ | Name | Type | Params | Mode
6
+ ------------------------------------------------------
7
+ 0 | blip2qformer | Blip2Qformer | 327 M | train
8
+ ------------------------------------------------------
9
+ 179 M Trainable params
10
+ 147 M Non-trainable params
11
+ 327 M Total params
12
+ 1,309.467 Total estimated model params size (MB)
13
+ 5 Modules in train mode
14
+ 926 Modules in eval mode
15
+ Epoch 0: 3%|██████▉ | 48/1665 [00:58<32:34, 0.83it/s, v_num=hy79]
16
+ /nas/shared/kilab/wangyujia/ProtT3/model/blip2qformer.py:220: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
17
+ with torch.cuda.amp.autocast(enable_autocast, dtype=torch.float32):
18
+
19
+ Detected KeyboardInterrupt, attempting graceful shutdown ...
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/files/requirements.txt ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ attrs==25.3.0
2
+ tqdm==4.67.1
3
+ langcodes==3.5.0
4
+ nvidia-cublas-cu12==12.4.5.8
5
+ tifffile==2025.5.10
6
+ nvidia-cufile-cu12==1.11.1.6
7
+ nltk==3.9.1
8
+ salesforce-lavis==1.0.2
9
+ tzdata==2025.2
10
+ pyparsing==3.2.3
11
+ six==1.17.0
12
+ python-dateutil==2.9.0.post0
13
+ pandas==2.2.3
14
+ pytorch-lightning==2.5.1.post0
15
+ blinker==1.9.0
16
+ opencv-python-headless==4.5.5.64
17
+ nvidia-cusparse-cu12==12.3.1.170
18
+ pytz==2025.2
19
+ async-timeout==5.0.1
20
+ pillow==11.2.1
21
+ parso==0.8.4
22
+ joblib==1.5.1
23
+ contourpy==1.3.2
24
+ triton==3.2.0
25
+ marisa-trie==1.2.1
26
+ PyYAML==6.0.2
27
+ regex==2024.11.6
28
+ idna==3.10
29
+ nvidia-curand-cu12==10.3.5.147
30
+ rpds-py==0.25.1
31
+ aiosignal==1.3.2
32
+ srsly==2.5.1
33
+ confection==0.1.5
34
+ typing-inspection==0.4.1
35
+ packaging==24.2
36
+ distlib==0.3.9
37
+ networkx==3.4.2
38
+ absl-py==2.2.2
39
+ yarl==1.20.0
40
+ lightning-utilities==0.14.3
41
+ executing==2.2.0
42
+ pycocoevalcap==1.2
43
+ wheel==0.45.1
44
+ nvidia-ml-py==12.575.51
45
+ cycler==0.12.1
46
+ wrapt==1.17.2
47
+ jsonschema-specifications==2025.4.1
48
+ protobuf==6.31.0
49
+ mpmath==1.3.0
50
+ certifi==2025.4.26
51
+ py-cpuinfo==9.0.0
52
+ contexttimer==0.3.3
53
+ watchdog==6.0.0
54
+ pexpect==4.9.0
55
+ webencodings==0.5.1
56
+ hf-xet==1.1.2
57
+ cymem==2.0.11
58
+ requests==2.32.3
59
+ timm==0.4.12
60
+ omegaconf==2.3.0
61
+ nvidia-nvjitlink-cu12==12.4.127
62
+ webdataset==0.2.111
63
+ nodeenv==1.9.1
64
+ frozenlist==1.6.0
65
+ annotated-types==0.7.0
66
+ matplotlib-inline==0.1.7
67
+ urllib3==2.4.0
68
+ rich==14.0.0
69
+ GitPython==3.1.44
70
+ lazy_loader==0.4
71
+ msgpack==1.1.0
72
+ prompt_toolkit==3.0.51
73
+ fonttools==4.58.0
74
+ multidict==6.4.4
75
+ blis==1.3.0
76
+ thinc==8.3.6
77
+ nvidia-nvtx-cu12==12.4.127
78
+ torchmetrics==1.7.1
79
+ weasel==0.4.1
80
+ numpy==2.2.6
81
+ cachetools==5.5.2
82
+ Jinja2==3.1.6
83
+ matplotlib==3.10.3
84
+ nvidia-cudnn-cu12==9.1.0.70
85
+ Pygments==2.19.1
86
+ tornado==6.5.1
87
+ scipy==1.15.3
88
+ rouge_score==0.1.2
89
+ cloudpathlib==0.21.1
90
+ jedi==0.19.2
91
+ referencing==0.36.2
92
+ decord==0.6.0
93
+ setuptools==78.1.1
94
+ mdurl==0.1.2
95
+ identify==2.6.12
96
+ python-slugify==8.0.4
97
+ portalocker==3.1.1
98
+ catalogue==2.0.10
99
+ platformdirs==4.3.8
100
+ antlr4-python3-runtime==4.9.3
101
+ nvidia-cusolver-cu12==11.6.1.9
102
+ kaggle==1.7.4.5
103
+ pydeck==0.9.1
104
+ pydantic==2.11.5
105
+ nvidia-cufft-cu12==11.2.1.3
106
+ pyarrow==20.0.0
107
+ nvidia-nccl-cu12==2.21.5
108
+ markdown-it-py==3.0.0
109
+ gitdb==4.0.12
110
+ altair==5.5.0
111
+ torchvision==0.21.0
112
+ python-magic==0.4.27
113
+ iopath==0.1.10
114
+ smart-open==7.1.0
115
+ torch==2.6.0
116
+ pycocotools==2.0.8
117
+ fairscale==0.4.4
118
+ traitlets==5.14.3
119
+ pure_eval==0.2.3
120
+ sympy==1.13.1
121
+ nvidia-cusparselt-cu12==0.6.2
122
+ imageio==2.37.0
123
+ stack-data==0.6.3
124
+ shellingham==1.5.4
125
+ nvidia-cuda-runtime-cu12==12.4.127
126
+ einops==0.8.1
127
+ tenacity==9.1.2
128
+ virtualenv==20.31.2
129
+ ptyprocess==0.7.0
130
+ cfgv==3.4.0
131
+ pre_commit==4.2.0
132
+ language_data==1.3.0
133
+ typing_extensions==4.13.2
134
+ propcache==0.3.1
135
+ nvidia-cuda-cupti-cu12==12.4.127
136
+ safetensors==0.5.3
137
+ text-unidecode==1.3
138
+ wcwidth==0.2.13
139
+ charset-normalizer==3.4.2
140
+ aiohappyeyeballs==2.6.1
141
+ ipython==8.36.0
142
+ streamlit==1.45.1
143
+ asttokens==3.0.0
144
+ psutil==7.0.0
145
+ smmap==5.0.2
146
+ exceptiongroup==1.3.0
147
+ murmurhash==1.0.13
148
+ filelock==3.18.0
149
+ plotly==6.1.1
150
+ hjson==3.1.0
151
+ pydantic_core==2.33.2
152
+ ninja==1.11.1.4
153
+ kiwisolver==1.4.8
154
+ spacy-legacy==3.0.12
155
+ opendatasets==0.1.22
156
+ decorator==5.2.1
157
+ spacy==3.8.7
158
+ wasabi==1.1.3
159
+ sentencepiece==0.2.0
160
+ toml==0.10.2
161
+ scikit-image==0.25.2
162
+ deepspeed==0.16.10+b666844f
163
+ ftfy==6.3.1
164
+ bleach==6.2.0
165
+ nvidia-cuda-nvrtc-cu12==12.4.127
166
+ spacy-loggers==1.0.5
167
+ MarkupSafe==3.0.2
168
+ braceexpand==0.1.7
169
+ oss2==2.15.0
170
+ preshed==3.0.10
171
+ transformers==4.52.3
172
+ aiohttp==3.12.2
173
+ web.py==0.62
174
+ threadpoolctl==3.6.0
175
+ jaraco.functools==4.1.0
176
+ wandb==0.19.11
177
+ sentry-sdk==2.29.1
178
+ tokenizers==0.21.1
179
+ fsspec==2025.3.0
180
+ flash-attn==2.7.1.post1
181
+ opendelta==0.3.2
182
+ opencv-python==4.11.0.86
183
+ click==8.2.1
184
+ docker-pycreds==0.4.0
185
+ typer==0.16.0
186
+ xxhash==3.5.0
187
+ pathlib==1.0.1
188
+ dill==0.3.8
189
+ crcmod==1.7
190
+ bigmodelvis==0.0.1
191
+ datasets==3.6.0
192
+ pycryptodome==3.23.0
193
+ jsonschema==4.24.0
194
+ aliyun-python-sdk-core==2.16.0
195
+ jmespath==0.10.0
196
+ more-itertools==10.7.0
197
+ scikit-learn==1.6.1
198
+ huggingface-hub==0.32.1
199
+ cryptography==45.0.3
200
+ pycparser==2.22
201
+ yacs==0.1.8
202
+ aliyun-python-sdk-kms==2.16.5
203
+ cffi==1.17.1
204
+ delta-center-client==0.0.4
205
+ multiprocess==0.70.16
206
+ setproctitle==1.3.6
207
+ narwhals==1.41.0
208
+ pip==25.1.1
209
+ cheroot==10.0.1
210
+ jaraco.context==5.3.0
211
+ more-itertools==10.3.0
212
+ jaraco.functools==4.0.1
213
+ jaraco.text==3.12.1
214
+ platformdirs==4.2.2
215
+ packaging==24.2
216
+ wheel==0.45.1
217
+ zipp==3.19.2
218
+ inflect==7.3.1
219
+ autocommand==2.2.2
220
+ typeguard==4.3.0
221
+ jaraco.collections==5.1.0
222
+ backports.tarfile==1.2.0
223
+ tomli==2.0.1
224
+ importlib_metadata==8.0.0
225
+ typing_extensions==4.12.2
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/files/wandb-metadata.json ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.0",
4
+ "startedAt": "2025-06-26T06:40:00.459849Z",
5
+ "args": [
6
+ "--devices",
7
+ "0,1,2,3,4,5,6,7",
8
+ "--mode",
9
+ "train",
10
+ "--filename",
11
+ "stage1_06261435",
12
+ "--num_query_token",
13
+ "8",
14
+ "--plm_name",
15
+ "/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m",
16
+ "--bert_name",
17
+ "/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft",
18
+ "--save_every_n_epochs",
19
+ "5",
20
+ "--max_epochs",
21
+ "30",
22
+ "--batch_size",
23
+ "64",
24
+ "--precision",
25
+ "bf16-mixed",
26
+ "--mix_dataset",
27
+ "--num_workers",
28
+ "8",
29
+ "--use_wandb_logger"
30
+ ],
31
+ "program": "/nas/shared/kilab/wangyujia/ProtT3/stage1.py",
32
+ "codePath": "stage1.py",
33
+ "email": "gia0603yucca@gmail.com",
34
+ "root": "./all_checkpoints/stage1_06261435/",
35
+ "host": "dsw-265304-cd576ddc5-gh74w",
36
+ "executable": "/root/miniconda3/envs/protT3/bin/python",
37
+ "codePathLocal": "stage1.py",
38
+ "cpu_count": 64,
39
+ "cpu_count_logical": 64,
40
+ "gpu": "NVIDIA A800-SXM4-80GB",
41
+ "gpu_count": 8,
42
+ "disk": {
43
+ "/": {
44
+ "total": "1623302262784",
45
+ "used": "1290833920"
46
+ }
47
+ },
48
+ "memory": {
49
+ "total": "549755813888"
50
+ },
51
+ "cpu": {
52
+ "count": 64,
53
+ "countLogical": 64
54
+ },
55
+ "gpu_nvidia": [
56
+ {
57
+ "name": "NVIDIA A800-SXM4-80GB",
58
+ "memoryTotal": "85198045184",
59
+ "architecture": "Ampere"
60
+ },
61
+ {
62
+ "name": "NVIDIA A800-SXM4-80GB",
63
+ "memoryTotal": "85198045184",
64
+ "architecture": "Ampere"
65
+ },
66
+ {
67
+ "name": "NVIDIA A800-SXM4-80GB",
68
+ "memoryTotal": "85198045184",
69
+ "architecture": "Ampere"
70
+ },
71
+ {
72
+ "name": "NVIDIA A800-SXM4-80GB",
73
+ "memoryTotal": "85198045184",
74
+ "architecture": "Ampere"
75
+ },
76
+ {
77
+ "name": "NVIDIA A800-SXM4-80GB",
78
+ "memoryTotal": "85198045184",
79
+ "architecture": "Ampere"
80
+ },
81
+ {
82
+ "name": "NVIDIA A800-SXM4-80GB",
83
+ "memoryTotal": "85198045184",
84
+ "architecture": "Ampere"
85
+ },
86
+ {
87
+ "name": "NVIDIA A800-SXM4-80GB",
88
+ "memoryTotal": "85198045184",
89
+ "architecture": "Ampere"
90
+ },
91
+ {
92
+ "name": "NVIDIA A800-SXM4-80GB",
93
+ "memoryTotal": "85198045184",
94
+ "architecture": "Ampere"
95
+ }
96
+ ],
97
+ "cudaVersion": "12.1"
98
+ }
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":87}}
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/logs/debug-internal.log ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-06-26T14:40:00.459807265+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/logs/debug-core.log"}
2
+ {"time":"2025-06-26T14:40:02.083580433+08:00","level":"INFO","msg":"created new stream","id":"coknhy79"}
3
+ {"time":"2025-06-26T14:40:02.083623333+08:00","level":"INFO","msg":"stream: started","id":"coknhy79"}
4
+ {"time":"2025-06-26T14:40:02.083652289+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"coknhy79"}
5
+ {"time":"2025-06-26T14:40:02.083685885+08:00","level":"INFO","msg":"sender: started","stream_id":"coknhy79"}
6
+ {"time":"2025-06-26T14:40:02.083706361+08:00","level":"INFO","msg":"handler: started","stream_id":"coknhy79"}
7
+ {"time":"2025-06-26T14:40:05.653872904+08:00","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-06-26T14:40:21.95203173+08:00","level":"ERROR","msg":"request failed","error":"Put \"https://storage.googleapis.com/wandb-production.appspot.com/gia0603yucca/stage1_06261435/coknhy79/wandb-metadata.json?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gorilla-files-url-signer-man%40wandb-production.iam.gserviceaccount.com%2F20250626%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250626T064005Z&X-Goog-Expires=86399&X-Goog-Signature=2ebb3b79b72a33f0dea008298dd64586dedd8e6bc4d1254ca4ee427dd9bc9031e9639976842b04c6f1e621bb4977fcba6d4c94c21e92008c662e898226494154965b113c395f94636cbca6cfd2249046ec06117750eebbc838bade9d9fa21ae04a11b6eb67dacc0319fb3bb297deca6c80a588a6a41a7b1f22b20c94a59d1f800926208ec84aa369a0a265f5cb9507e60248cf60d0ca950966ead524dec91e347bdb50ed95b2daa080e4e381b4e3dd1d85267b1297dc3c33d3bc683cac980ad150cf1033532af34d80aef288235efe863a3246dad1f2b656d6dbea37ff48485a03307ab8f7d65f6827ff79a95ebc8be7283ace09ee4bdd580361dd389452900a&X-Goog-SignedHeaders=host&X-User=gia0603yucca\": read tcp 10.1.8.160:47468->142.250.73.155:443: read: connection reset by peer","method":"PUT","url":"https://storage.googleapis.com/wandb-production.appspot.com/gia0603yucca/stage1_06261435/coknhy79/wandb-metadata.json?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gorilla-files-url-signer-man%40wandb-production.iam.gserviceaccount.com%2F20250626%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250626T064005Z&X-Goog-Expires=86399&X-Goog-Signature=2ebb3b79b72a33f0dea008298dd64586dedd8e6bc4d1254ca4ee427dd9bc9031e9639976842b04c6f1e621bb4977fcba6d4c94c21e92008c662e898226494154965b113c395f94636cbca6cfd2249046ec06117750eebbc838bade9d9fa21ae04a11b6eb67dacc0319fb3bb297deca6c80a588a6a41a7b1f22b20c94a59d1f800926208ec84aa369a0a265f5cb9507e60248cf60d0ca950966ead524dec91e347bdb50ed95b2daa080e4e381b4e3dd1d85267b1297dc3c33d3bc683cac980ad150cf1033532af34d80aef288235efe863a3246dad1f2b656d6dbea37ff48485a03307ab8f7d65f6827ff79a95ebc8be7283ace09ee4bdd580361dd389452900a&X-Goog-SignedHeaders=host&X-User=gia0603yucca"}
9
+ {"time":"2025-06-26T14:40:21.952032953+08:00","level":"ERROR","msg":"request failed","error":"Put \"https://storage.googleapis.com/wandb-production.appspot.com/gia0603yucca/stage1_06261435/coknhy79/requirements.txt?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gorilla-files-url-signer-man%40wandb-production.iam.gserviceaccount.com%2F20250626%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250626T064006Z&X-Goog-Expires=86399&X-Goog-Signature=78ae20776b7f86131b9272da30634723f339c53fbe35baa579e4d622bae104bf8b38c0c3cd5e64009eab167fa1132f4e5d02a2124a12f56f53a2fa10277fb6a7c582a81981d7372e6f2b2b33ff175bb4af1fc83ed04c00275ef4e90cec3133057f39515a0b0b4d55f5e5257bb5e28f9d142c819f9f4848bd11229d9c73dee989c0f69232f269e7efca958b28ec14404ae411af486b22b4b581a6f37ad7a8593025d9d20519451abe9ea912dac4130082c228f58687eb52f37b0911bf5fff35f7c2f75597dc0beebc355bb0a83b0f71cb06e8fadec0b9f798cf0da7e329a991260240180d651b91d4129bbd2f503247ad12cbb6e080076b2824d51383d713ed3b&X-Goog-SignedHeaders=host&X-User=gia0603yucca\": read tcp 10.1.8.160:47468->142.250.73.155:443: read: connection reset by peer","method":"PUT","url":"https://storage.googleapis.com/wandb-production.appspot.com/gia0603yucca/stage1_06261435/coknhy79/requirements.txt?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gorilla-files-url-signer-man%40wandb-production.iam.gserviceaccount.com%2F20250626%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250626T064006Z&X-Goog-Expires=86399&X-Goog-Signature=78ae20776b7f86131b9272da30634723f339c53fbe35baa579e4d622bae104bf8b38c0c3cd5e64009eab167fa1132f4e5d02a2124a12f56f53a2fa10277fb6a7c582a81981d7372e6f2b2b33ff175bb4af1fc83ed04c00275ef4e90cec3133057f39515a0b0b4d55f5e5257bb5e28f9d142c819f9f4848bd11229d9c73dee989c0f69232f269e7efca958b28ec14404ae411af486b22b4b581a6f37ad7a8593025d9d20519451abe9ea912dac4130082c228f58687eb52f37b0911bf5fff35f7c2f75597dc0beebc355bb0a83b0f71cb06e8fadec0b9f798cf0da7e329a991260240180d651b91d4129bbd2f503247ad12cbb6e080076b2824d51383d713ed3b&X-Goog-SignedHeaders=host&X-User=gia0603yucca"}
10
+ {"time":"2025-06-26T14:41:27.871959935+08:00","level":"INFO","msg":"stream: closing","id":"coknhy79"}
11
+ {"time":"2025-06-26T14:41:27.872028784+08:00","level":"INFO","msg":"Stopping system monitor"}
12
+ {"time":"2025-06-26T14:41:27.940516252+08:00","level":"INFO","msg":"Stopped system monitor"}
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/logs/debug.log ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-06-26 14:40:00,443 INFO MainThread:5872 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
2
+ 2025-06-26 14:40:00,443 INFO MainThread:5872 [wandb_setup.py:_flush():70] Configure stats pid to 5872
3
+ 2025-06-26 14:40:00,443 INFO MainThread:5872 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
4
+ 2025-06-26 14:40:00,443 INFO MainThread:5872 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
5
+ 2025-06-26 14:40:00,443 INFO MainThread:5872 [wandb_setup.py:_flush():70] Loading settings from environment variables
6
+ 2025-06-26 14:40:00,444 INFO MainThread:5872 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/logs/debug.log
7
+ 2025-06-26 14:40:00,444 INFO MainThread:5872 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/logs/debug-internal.log
8
+ 2025-06-26 14:40:00,444 INFO MainThread:5872 [wandb_init.py:init():852] calling init triggers
9
+ 2025-06-26 14:40:00,444 INFO MainThread:5872 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-06-26 14:40:00,444 INFO MainThread:5872 [wandb_init.py:init():893] starting backend
12
+ 2025-06-26 14:40:00,444 INFO MainThread:5872 [wandb_init.py:init():897] sending inform_init request
13
+ 2025-06-26 14:40:00,446 INFO MainThread:5872 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-06-26 14:40:00,452 INFO MainThread:5872 [wandb_init.py:init():907] backend started and connected
15
+ 2025-06-26 14:40:00,460 INFO MainThread:5872 [wandb_init.py:init():1005] updated telemetry
16
+ 2025-06-26 14:40:00,464 INFO MainThread:5872 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
17
+ 2025-06-26 14:40:05,603 INFO MainThread:5872 [wandb_init.py:init():1104] starting run threads in backend
18
+ 2025-06-26 14:40:05,851 INFO MainThread:5872 [wandb_run.py:_console_start():2573] atexit reg
19
+ 2025-06-26 14:40:05,851 INFO MainThread:5872 [wandb_run.py:_redirect():2421] redirect: wrap_raw
20
+ 2025-06-26 14:40:05,855 INFO MainThread:5872 [wandb_run.py:_redirect():2490] Wrapping output streams.
21
+ 2025-06-26 14:40:05,855 INFO MainThread:5872 [wandb_run.py:_redirect():2513] Redirects installed.
22
+ 2025-06-26 14:40:05,857 INFO MainThread:5872 [wandb_init.py:init():1150] run started, returning control to user process
23
+ 2025-06-26 14:40:11,773 INFO MainThread:5872 [wandb_run.py:_config_callback():1436] config_cb None None {'filename': 'stage1_06261435', 'seed': 42, 'mode': 'train', 'strategy': 'deepspeed', 'accelerator': 'gpu', 'devices': '0,1,2,3,4,5,6,7', 'precision': 'bf16-mixed', 'max_epochs': 30, 'check_val_every_n_epoch': 1, 'use_wandb_logger': True, 'mix_dataset': True, 'temperature': 0.1, 'save_every_n_epochs': 5, 'ptm': True, 'lm': True, 'rerank_cand_num': 128, 'plm_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m', 'plm_tune': 'freeze', 'load_4bit': False, 'pool_size': 0, 'bert_hidden_dim': 768, 'bert_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft', 'projection_dim': 256, 'cross_attention_freq': 2, 'num_query_token': 8, 'weight_decay': 0.05, 'init_lr': 0.0001, 'min_lr': 1e-05, 'warmup_lr': 1e-06, 'warmup_steps': 1000, 'lr_decay_rate': 0.9, 'scheduler': 'linear_warmup_cosine_lr', 'init_checkpoint': '', 'retrieval_eval_epoch': 10, 'num_workers': 8, 'batch_size': 64, 'match_batch_size': 64, 'root': 'data', 'text_max_len': 128, 'prot_max_len': 1024, 'prot_aug': 'None'}
24
+ 2025-06-26 14:41:27,870 INFO MsgRouterThr:5872 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/run-coknhy79.wandb ADDED
Binary file (32.8 kB). View file
 
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/files/output.log ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06261435 exists and is not empty.
2
+ Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.
3
+ LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
4
+
5
+ | Name | Type | Params | Mode
6
+ ------------------------------------------------------
7
+ 0 | blip2qformer | Blip2Qformer | 327 M | train
8
+ ------------------------------------------------------
9
+ 179 M Trainable params
10
+ 147 M Non-trainable params
11
+ 327 M Total params
12
+ 1,309.467 Total estimated model params size (MB)
13
+ 5 Modules in train mode
14
+ 926 Modules in eval mode
15
+ Epoch 9: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 666/666 [31:47<00:00, 0.35it/s, v_num=vyyf]
16
+ /nas/shared/kilab/wangyujia/ProtT3/model/blip2qformer.py:220: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
17
+ with torch.cuda.amp.autocast(enable_autocast, dtype=torch.float32):
18
+ Validation DataLoader 2: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:18<00:00, 0.44it/s]
19
+ /nas/shared/kilab/wangyujia/ProtT3/model/dist_funs.py:18: FutureWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/main/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.
20
+ sd = self.module.state_dict(destination, prefix, keep_vars)
21
+ /nas/shared/kilab/wangyujia/ProtT3/model/blip2_stage1.py:42: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
22
+ return torch.cuda.amp.autocast(dtype=dtype)
23
+ 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 157/157 [08:51<00:00, 3.39s/it]
24
+ 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1250/1250 [00:01<00:00, 1063.25it/s]
25
+ re-ranking p2t: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████���███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2500/2500 [10:06<00:00, 4.12it/s]
26
+ 94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 1179/1250 [00:01<00:00, 1084.47it/s][rank: 1] Child process with PID 14146 terminated with code -6. Forcefully terminating all other processes to avoid zombies 🧟
27
+ re-ranking p2t: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2500/2500 [10:06<00:00, 4.13it/s]
28
+ re-ranking t2p: 58%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 1451/2500 [10:59<08:00, 2.18it/s]
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/files/requirements.txt ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ attrs==25.3.0
2
+ tqdm==4.67.1
3
+ langcodes==3.5.0
4
+ nvidia-cublas-cu12==12.4.5.8
5
+ tifffile==2025.5.10
6
+ nvidia-cufile-cu12==1.11.1.6
7
+ nltk==3.9.1
8
+ salesforce-lavis==1.0.2
9
+ tzdata==2025.2
10
+ pyparsing==3.2.3
11
+ six==1.17.0
12
+ python-dateutil==2.9.0.post0
13
+ pandas==2.2.3
14
+ pytorch-lightning==2.5.1.post0
15
+ blinker==1.9.0
16
+ opencv-python-headless==4.5.5.64
17
+ nvidia-cusparse-cu12==12.3.1.170
18
+ pytz==2025.2
19
+ async-timeout==5.0.1
20
+ pillow==11.2.1
21
+ parso==0.8.4
22
+ joblib==1.5.1
23
+ contourpy==1.3.2
24
+ triton==3.2.0
25
+ marisa-trie==1.2.1
26
+ PyYAML==6.0.2
27
+ regex==2024.11.6
28
+ idna==3.10
29
+ nvidia-curand-cu12==10.3.5.147
30
+ rpds-py==0.25.1
31
+ aiosignal==1.3.2
32
+ srsly==2.5.1
33
+ confection==0.1.5
34
+ typing-inspection==0.4.1
35
+ packaging==24.2
36
+ distlib==0.3.9
37
+ networkx==3.4.2
38
+ absl-py==2.2.2
39
+ yarl==1.20.0
40
+ lightning-utilities==0.14.3
41
+ executing==2.2.0
42
+ pycocoevalcap==1.2
43
+ wheel==0.45.1
44
+ nvidia-ml-py==12.575.51
45
+ cycler==0.12.1
46
+ wrapt==1.17.2
47
+ jsonschema-specifications==2025.4.1
48
+ protobuf==6.31.0
49
+ mpmath==1.3.0
50
+ certifi==2025.4.26
51
+ py-cpuinfo==9.0.0
52
+ contexttimer==0.3.3
53
+ watchdog==6.0.0
54
+ pexpect==4.9.0
55
+ webencodings==0.5.1
56
+ hf-xet==1.1.2
57
+ cymem==2.0.11
58
+ requests==2.32.3
59
+ timm==0.4.12
60
+ omegaconf==2.3.0
61
+ nvidia-nvjitlink-cu12==12.4.127
62
+ webdataset==0.2.111
63
+ nodeenv==1.9.1
64
+ frozenlist==1.6.0
65
+ annotated-types==0.7.0
66
+ matplotlib-inline==0.1.7
67
+ urllib3==2.4.0
68
+ rich==14.0.0
69
+ GitPython==3.1.44
70
+ lazy_loader==0.4
71
+ msgpack==1.1.0
72
+ prompt_toolkit==3.0.51
73
+ fonttools==4.58.0
74
+ multidict==6.4.4
75
+ blis==1.3.0
76
+ thinc==8.3.6
77
+ nvidia-nvtx-cu12==12.4.127
78
+ torchmetrics==1.7.1
79
+ weasel==0.4.1
80
+ numpy==2.2.6
81
+ cachetools==5.5.2
82
+ Jinja2==3.1.6
83
+ matplotlib==3.10.3
84
+ nvidia-cudnn-cu12==9.1.0.70
85
+ Pygments==2.19.1
86
+ tornado==6.5.1
87
+ scipy==1.15.3
88
+ rouge_score==0.1.2
89
+ cloudpathlib==0.21.1
90
+ jedi==0.19.2
91
+ referencing==0.36.2
92
+ decord==0.6.0
93
+ setuptools==78.1.1
94
+ mdurl==0.1.2
95
+ identify==2.6.12
96
+ python-slugify==8.0.4
97
+ portalocker==3.1.1
98
+ catalogue==2.0.10
99
+ platformdirs==4.3.8
100
+ antlr4-python3-runtime==4.9.3
101
+ nvidia-cusolver-cu12==11.6.1.9
102
+ kaggle==1.7.4.5
103
+ pydeck==0.9.1
104
+ pydantic==2.11.5
105
+ nvidia-cufft-cu12==11.2.1.3
106
+ pyarrow==20.0.0
107
+ nvidia-nccl-cu12==2.21.5
108
+ markdown-it-py==3.0.0
109
+ gitdb==4.0.12
110
+ altair==5.5.0
111
+ torchvision==0.21.0
112
+ python-magic==0.4.27
113
+ iopath==0.1.10
114
+ smart-open==7.1.0
115
+ torch==2.6.0
116
+ pycocotools==2.0.8
117
+ fairscale==0.4.4
118
+ traitlets==5.14.3
119
+ pure_eval==0.2.3
120
+ sympy==1.13.1
121
+ nvidia-cusparselt-cu12==0.6.2
122
+ imageio==2.37.0
123
+ stack-data==0.6.3
124
+ shellingham==1.5.4
125
+ nvidia-cuda-runtime-cu12==12.4.127
126
+ einops==0.8.1
127
+ tenacity==9.1.2
128
+ virtualenv==20.31.2
129
+ ptyprocess==0.7.0
130
+ cfgv==3.4.0
131
+ pre_commit==4.2.0
132
+ language_data==1.3.0
133
+ typing_extensions==4.13.2
134
+ propcache==0.3.1
135
+ nvidia-cuda-cupti-cu12==12.4.127
136
+ safetensors==0.5.3
137
+ text-unidecode==1.3
138
+ wcwidth==0.2.13
139
+ charset-normalizer==3.4.2
140
+ aiohappyeyeballs==2.6.1
141
+ ipython==8.36.0
142
+ streamlit==1.45.1
143
+ asttokens==3.0.0
144
+ psutil==7.0.0
145
+ smmap==5.0.2
146
+ exceptiongroup==1.3.0
147
+ murmurhash==1.0.13
148
+ filelock==3.18.0
149
+ plotly==6.1.1
150
+ hjson==3.1.0
151
+ pydantic_core==2.33.2
152
+ ninja==1.11.1.4
153
+ kiwisolver==1.4.8
154
+ spacy-legacy==3.0.12
155
+ opendatasets==0.1.22
156
+ decorator==5.2.1
157
+ spacy==3.8.7
158
+ wasabi==1.1.3
159
+ sentencepiece==0.2.0
160
+ toml==0.10.2
161
+ scikit-image==0.25.2
162
+ deepspeed==0.16.10+b666844f
163
+ ftfy==6.3.1
164
+ bleach==6.2.0
165
+ nvidia-cuda-nvrtc-cu12==12.4.127
166
+ spacy-loggers==1.0.5
167
+ MarkupSafe==3.0.2
168
+ braceexpand==0.1.7
169
+ oss2==2.15.0
170
+ preshed==3.0.10
171
+ transformers==4.52.3
172
+ aiohttp==3.12.2
173
+ web.py==0.62
174
+ threadpoolctl==3.6.0
175
+ jaraco.functools==4.1.0
176
+ wandb==0.19.11
177
+ sentry-sdk==2.29.1
178
+ tokenizers==0.21.1
179
+ fsspec==2025.3.0
180
+ flash-attn==2.7.1.post1
181
+ opendelta==0.3.2
182
+ opencv-python==4.11.0.86
183
+ click==8.2.1
184
+ docker-pycreds==0.4.0
185
+ typer==0.16.0
186
+ xxhash==3.5.0
187
+ pathlib==1.0.1
188
+ dill==0.3.8
189
+ crcmod==1.7
190
+ bigmodelvis==0.0.1
191
+ datasets==3.6.0
192
+ pycryptodome==3.23.0
193
+ jsonschema==4.24.0
194
+ aliyun-python-sdk-core==2.16.0
195
+ jmespath==0.10.0
196
+ more-itertools==10.7.0
197
+ scikit-learn==1.6.1
198
+ huggingface-hub==0.32.1
199
+ cryptography==45.0.3
200
+ pycparser==2.22
201
+ yacs==0.1.8
202
+ aliyun-python-sdk-kms==2.16.5
203
+ cffi==1.17.1
204
+ delta-center-client==0.0.4
205
+ multiprocess==0.70.16
206
+ setproctitle==1.3.6
207
+ narwhals==1.41.0
208
+ pip==25.1.1
209
+ cheroot==10.0.1
210
+ jaraco.context==5.3.0
211
+ more-itertools==10.3.0
212
+ jaraco.functools==4.0.1
213
+ jaraco.text==3.12.1
214
+ platformdirs==4.2.2
215
+ packaging==24.2
216
+ wheel==0.45.1
217
+ zipp==3.19.2
218
+ inflect==7.3.1
219
+ autocommand==2.2.2
220
+ typeguard==4.3.0
221
+ jaraco.collections==5.1.0
222
+ backports.tarfile==1.2.0
223
+ tomli==2.0.1
224
+ importlib_metadata==8.0.0
225
+ typing_extensions==4.12.2
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/files/wandb-metadata.json ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.0",
4
+ "startedAt": "2025-06-26T06:47:31.299461Z",
5
+ "args": [
6
+ "--devices",
7
+ "0,1,2,3,4,5,6,7",
8
+ "--mode",
9
+ "train",
10
+ "--filename",
11
+ "stage1_06261435",
12
+ "--num_query_token",
13
+ "8",
14
+ "--plm_name",
15
+ "/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m",
16
+ "--bert_name",
17
+ "/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft",
18
+ "--save_every_n_epochs",
19
+ "5",
20
+ "--max_epochs",
21
+ "30",
22
+ "--batch_size",
23
+ "160",
24
+ "--precision",
25
+ "bf16-mixed",
26
+ "--mix_dataset",
27
+ "--num_workers",
28
+ "8",
29
+ "--use_wandb_logger"
30
+ ],
31
+ "program": "/nas/shared/kilab/wangyujia/ProtT3/stage1.py",
32
+ "codePath": "stage1.py",
33
+ "email": "gia0603yucca@gmail.com",
34
+ "root": "./all_checkpoints/stage1_06261435/",
35
+ "host": "dsw-265304-cd576ddc5-gh74w",
36
+ "executable": "/root/miniconda3/envs/protT3/bin/python",
37
+ "codePathLocal": "stage1.py",
38
+ "cpu_count": 64,
39
+ "cpu_count_logical": 64,
40
+ "gpu": "NVIDIA A800-SXM4-80GB",
41
+ "gpu_count": 8,
42
+ "disk": {
43
+ "/": {
44
+ "total": "1623302262784",
45
+ "used": "1290838016"
46
+ }
47
+ },
48
+ "memory": {
49
+ "total": "549755813888"
50
+ },
51
+ "cpu": {
52
+ "count": 64,
53
+ "countLogical": 64
54
+ },
55
+ "gpu_nvidia": [
56
+ {
57
+ "name": "NVIDIA A800-SXM4-80GB",
58
+ "memoryTotal": "85198045184",
59
+ "architecture": "Ampere"
60
+ },
61
+ {
62
+ "name": "NVIDIA A800-SXM4-80GB",
63
+ "memoryTotal": "85198045184",
64
+ "architecture": "Ampere"
65
+ },
66
+ {
67
+ "name": "NVIDIA A800-SXM4-80GB",
68
+ "memoryTotal": "85198045184",
69
+ "architecture": "Ampere"
70
+ },
71
+ {
72
+ "name": "NVIDIA A800-SXM4-80GB",
73
+ "memoryTotal": "85198045184",
74
+ "architecture": "Ampere"
75
+ },
76
+ {
77
+ "name": "NVIDIA A800-SXM4-80GB",
78
+ "memoryTotal": "85198045184",
79
+ "architecture": "Ampere"
80
+ },
81
+ {
82
+ "name": "NVIDIA A800-SXM4-80GB",
83
+ "memoryTotal": "85198045184",
84
+ "architecture": "Ampere"
85
+ },
86
+ {
87
+ "name": "NVIDIA A800-SXM4-80GB",
88
+ "memoryTotal": "85198045184",
89
+ "architecture": "Ampere"
90
+ },
91
+ {
92
+ "name": "NVIDIA A800-SXM4-80GB",
93
+ "memoryTotal": "85198045184",
94
+ "architecture": "Ampere"
95
+ }
96
+ ],
97
+ "cudaVersion": "12.1"
98
+ }
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/logs/debug-internal.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-06-26T14:47:31.30118788+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/logs/debug-core.log"}
2
+ {"time":"2025-06-26T14:47:33.763139479+08:00","level":"INFO","msg":"created new stream","id":"1bz1vyyf"}
3
+ {"time":"2025-06-26T14:47:33.763180996+08:00","level":"INFO","msg":"stream: started","id":"1bz1vyyf"}
4
+ {"time":"2025-06-26T14:47:33.76320552+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"1bz1vyyf"}
5
+ {"time":"2025-06-26T14:47:33.763263895+08:00","level":"INFO","msg":"handler: started","stream_id":"1bz1vyyf"}
6
+ {"time":"2025-06-26T14:47:33.763302435+08:00","level":"INFO","msg":"sender: started","stream_id":"1bz1vyyf"}
7
+ {"time":"2025-06-26T14:47:35.049823143+08:00","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-06-26T14:59:24.18296941+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06261435/1bz1vyyf/file_stream\": EOF"}
9
+ {"time":"2025-06-26T15:47:20.217171547+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
10
+ {"time":"2025-06-26T15:47:52.39492821+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
11
+ {"time":"2025-06-26T15:48:27.272812301+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
12
+ {"time":"2025-06-26T15:49:05.914803533+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
13
+ {"time":"2025-06-26T15:49:51.452193247+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06261435/1bz1vyyf/file_stream\": net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)"}
14
+ {"time":"2025-06-26T15:49:52.900302871+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
15
+ {"time":"2025-06-26T15:50:55.486728897+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/logs/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
2
+ 2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_setup.py:_flush():70] Configure stats pid to 13641
3
+ 2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
4
+ 2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
5
+ 2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_setup.py:_flush():70] Loading settings from environment variables
6
+ 2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/logs/debug.log
7
+ 2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/logs/debug-internal.log
8
+ 2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:init():852] calling init triggers
9
+ 2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:init():893] starting backend
12
+ 2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:init():897] sending inform_init request
13
+ 2025-06-26 14:47:31,294 INFO MainThread:13641 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-06-26 14:47:31,296 INFO MainThread:13641 [wandb_init.py:init():907] backend started and connected
15
+ 2025-06-26 14:47:31,300 INFO MainThread:13641 [wandb_init.py:init():1005] updated telemetry
16
+ 2025-06-26 14:47:31,303 INFO MainThread:13641 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
17
+ 2025-06-26 14:47:35,041 INFO MainThread:13641 [wandb_init.py:init():1104] starting run threads in backend
18
+ 2025-06-26 14:47:35,187 INFO MainThread:13641 [wandb_run.py:_console_start():2573] atexit reg
19
+ 2025-06-26 14:47:35,187 INFO MainThread:13641 [wandb_run.py:_redirect():2421] redirect: wrap_raw
20
+ 2025-06-26 14:47:35,192 INFO MainThread:13641 [wandb_run.py:_redirect():2490] Wrapping output streams.
21
+ 2025-06-26 14:47:35,192 INFO MainThread:13641 [wandb_run.py:_redirect():2513] Redirects installed.
22
+ 2025-06-26 14:47:35,197 INFO MainThread:13641 [wandb_init.py:init():1150] run started, returning control to user process
23
+ 2025-06-26 14:47:41,914 INFO MainThread:13641 [wandb_run.py:_config_callback():1436] config_cb None None {'filename': 'stage1_06261435', 'seed': 42, 'mode': 'train', 'strategy': 'deepspeed', 'accelerator': 'gpu', 'devices': '0,1,2,3,4,5,6,7', 'precision': 'bf16-mixed', 'max_epochs': 30, 'check_val_every_n_epoch': 1, 'use_wandb_logger': True, 'mix_dataset': True, 'temperature': 0.1, 'save_every_n_epochs': 5, 'ptm': True, 'lm': True, 'rerank_cand_num': 128, 'plm_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m', 'plm_tune': 'freeze', 'load_4bit': False, 'pool_size': 0, 'bert_hidden_dim': 768, 'bert_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft', 'projection_dim': 256, 'cross_attention_freq': 2, 'num_query_token': 8, 'weight_decay': 0.05, 'init_lr': 0.0001, 'min_lr': 1e-05, 'warmup_lr': 1e-06, 'warmup_steps': 1000, 'lr_decay_rate': 0.9, 'scheduler': 'linear_warmup_cosine_lr', 'init_checkpoint': '', 'retrieval_eval_epoch': 10, 'num_workers': 8, 'batch_size': 160, 'match_batch_size': 64, 'root': 'data', 'text_max_len': 128, 'prot_max_len': 1024, 'prot_aug': 'None'}
ProtT3/all_checkpoints/stage1_06262112/wandb/debug-internal.log ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-06-26T21:13:59.919018005+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/logs/debug-core.log"}
2
+ {"time":"2025-06-26T21:14:01.751242877+08:00","level":"INFO","msg":"created new stream","id":"gp8ndo2v"}
3
+ {"time":"2025-06-26T21:14:01.751292945+08:00","level":"INFO","msg":"stream: started","id":"gp8ndo2v"}
4
+ {"time":"2025-06-26T21:14:01.751353982+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"gp8ndo2v"}
5
+ {"time":"2025-06-26T21:14:01.751376676+08:00","level":"INFO","msg":"handler: started","stream_id":"gp8ndo2v"}
6
+ {"time":"2025-06-26T21:14:01.751406784+08:00","level":"INFO","msg":"sender: started","stream_id":"gp8ndo2v"}
7
+ {"time":"2025-06-26T21:14:03.18201785+08:00","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-06-26T21:39:10.805194559+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:48308->172.67.193.61:443: read: connection timed out"}
9
+ {"time":"2025-06-26T21:45:08.181153524+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:33626->104.21.20.172:443: read: connection timed out"}
10
+ {"time":"2025-06-26T21:50:43.748258238+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:39240->172.67.193.61:443: read: connection reset by peer"}
11
+ {"time":"2025-06-26T21:56:59.349224169+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:41628->104.21.20.172:443: read: connection timed out"}
12
+ {"time":"2025-06-26T22:01:29.173164681+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:41150->172.67.193.61:443: read: connection timed out"}
13
+ {"time":"2025-06-26T22:04:52.9491833+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:40328->104.21.20.172:443: read: connection timed out"}
14
+ {"time":"2025-06-26T22:05:33.372515641+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
15
+ {"time":"2025-06-26T22:10:24.214205918+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:39814->104.21.20.172:443: read: connection timed out"}
16
+ {"time":"2025-06-26T22:11:19.608808233+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": unexpected EOF"}
17
+ {"time":"2025-06-26T22:15:15.541207766+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:57490->104.21.20.172:443: read: connection timed out"}
18
+ {"time":"2025-06-26T22:17:53.749178371+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:34226->172.67.193.61:443: read: connection timed out"}
19
+ {"time":"2025-06-26T22:20:43.734188539+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:58158->104.21.20.172:443: read: connection timed out"}
20
+ {"time":"2025-06-26T22:26:37.244674658+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
21
+ {"time":"2025-06-26T22:27:27.97084057+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:42482->172.67.193.61:443: read: connection reset by peer"}
22
+ {"time":"2025-06-26T22:32:12.373221258+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:42718->104.21.20.172:443: read: connection timed out"}
23
+ {"time":"2025-06-26T22:33:45.749714178+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:36352->172.67.193.61:443: read: connection reset by peer"}
24
+ {"time":"2025-06-26T22:34:27.154183486+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": unexpected EOF"}
25
+ {"time":"2025-06-26T22:37:03.388715023+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
26
+ {"time":"2025-06-26T22:38:44.053145624+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:56120->104.21.20.172:443: read: connection timed out"}
27
+ {"time":"2025-06-26T22:39:26.21620593+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:37762->172.67.193.61:443: read: connection reset by peer"}
28
+ {"time":"2025-06-26T22:42:48.392517517+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
29
+ {"time":"2025-06-26T22:43:20.509939526+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
30
+ {"time":"2025-06-26T22:43:55.50812991+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
31
+ {"time":"2025-06-26T22:44:05.260626832+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": context deadline exceeded"}
32
+ {"time":"2025-06-26T22:44:33.704733361+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
33
+ {"time":"2025-06-26T22:47:28.34118454+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:46752->104.21.20.172:443: read: connection timed out"}
34
+ {"time":"2025-06-26T22:48:22.800067638+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:35900->104.21.20.172:443: read: connection reset by peer"}
35
+ {"time":"2025-06-26T22:49:03.396821287+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
36
+ {"time":"2025-06-26T22:49:35.881823651+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
37
+ {"time":"2025-06-26T22:50:10.244289946+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
38
+ {"time":"2025-06-26T22:50:48.344767175+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
39
+ {"time":"2025-06-26T22:51:21.302223032+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:40288->172.67.193.61:443: read: connection timed out"}
40
+ {"time":"2025-06-26T22:54:35.861164416+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:35106->104.21.20.172:443: read: connection timed out"}
41
+ {"time":"2025-06-26T22:56:18.401507947+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
42
+ {"time":"2025-06-26T22:56:50.726216671+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
43
+ {"time":"2025-06-26T22:57:25.218970516+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
44
+ {"time":"2025-06-26T22:58:04.180971507+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
45
+ {"time":"2025-06-26T22:58:09.302334148+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:50854->104.21.20.172:443: read: connection reset by peer"}
46
+ {"time":"2025-06-26T22:58:51.546103299+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
47
+ {"time":"2025-06-26T23:00:01.390576029+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
48
+ {"time":"2025-06-26T23:02:34.06917085+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:37644->104.21.20.172:443: read: connection timed out"}
49
+ {"time":"2025-06-26T23:05:03.407063467+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
50
+ {"time":"2025-06-26T23:06:15.25317264+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:42244->172.67.193.61:443: read: connection timed out"}
51
+ {"time":"2025-06-26T23:06:33.40769986+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
52
+ {"time":"2025-06-26T23:09:18.410102964+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
53
+ {"time":"2025-06-26T23:11:33.411287647+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
54
+ {"time":"2025-06-26T23:12:46.933205687+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:55392->104.21.20.172:443: read: connection timed out"}
55
+ {"time":"2025-06-26T23:16:18.41464022+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
56
+ {"time":"2025-06-26T23:16:40.753618008+08:00","level":"ERROR","msg":"filestream: json decode error: net/http: request canceled (Client.Timeout or context cancellation while reading body)"}
57
+ {"time":"2025-06-26T23:16:50.454824576+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
58
+ {"time":"2025-06-26T23:17:25.158378302+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
59
+ {"time":"2025-06-26T23:18:00.15504283+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": unexpected EOF"}
60
+ {"time":"2025-06-26T23:18:03.880963877+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
61
+ {"time":"2025-06-26T23:18:50.836175421+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
62
+ {"time":"2025-06-26T23:20:45.653178557+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:38816->104.21.20.172:443: read: connection timed out"}
63
+ {"time":"2025-06-26T23:21:51.085821178+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:43680->172.67.193.61:443: read: connection reset by peer"}
64
+ {"time":"2025-06-26T23:24:50.901213106+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:40734->104.21.20.172:443: read: connection timed out"}
65
+ {"time":"2025-06-26T23:30:06.293188303+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:46360->104.21.20.172:443: read: connection timed out"}
66
+ {"time":"2025-06-26T23:36:26.709172933+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:33352->172.67.193.61:443: read: connection timed out"}
67
+ {"time":"2025-06-26T23:39:53.889169333+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": unexpected EOF"}
68
+ {"time":"2025-06-26T23:42:48.781078984+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": read tcp 10.1.8.160:35500->104.21.20.172:443: read: connection reset by peer"}
69
+ {"time":"2025-06-26T23:44:39.969525919+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:51204->172.67.193.61:443: read: connection reset by peer"}
70
+ {"time":"2025-06-26T23:52:25.685198314+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:41504->172.67.193.61:443: read: connection timed out"}
71
+ {"time":"2025-06-27T00:14:48.690187795+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": read tcp 10.1.8.160:49576->172.67.193.61:443: read: connection reset by peer"}
72
+ {"time":"2025-06-27T00:15:11.63645902+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": unexpected EOF"}
73
+ {"time":"2025-06-27T00:18:18.739690809+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": read tcp 10.1.8.160:55462->172.67.193.61:443: read: connection reset by peer"}
ProtT3/all_checkpoints/stage1_06262112/wandb/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
2
+ 2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_setup.py:_flush():70] Configure stats pid to 183028
3
+ 2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
4
+ 2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
5
+ 2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_setup.py:_flush():70] Loading settings from environment variables
6
+ 2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/logs/debug.log
7
+ 2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/logs/debug-internal.log
8
+ 2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:init():852] calling init triggers
9
+ 2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:init():893] starting backend
12
+ 2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:init():897] sending inform_init request
13
+ 2025-06-26 21:13:59,906 INFO MainThread:183028 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-06-26 21:13:59,908 INFO MainThread:183028 [wandb_init.py:init():907] backend started and connected
15
+ 2025-06-26 21:13:59,909 INFO MainThread:183028 [wandb_init.py:init():1005] updated telemetry
16
+ 2025-06-26 21:13:59,911 INFO MainThread:183028 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
17
+ 2025-06-26 21:14:03,171 INFO MainThread:183028 [wandb_init.py:init():1104] starting run threads in backend
18
+ 2025-06-26 21:14:03,332 INFO MainThread:183028 [wandb_run.py:_console_start():2573] atexit reg
19
+ 2025-06-26 21:14:03,333 INFO MainThread:183028 [wandb_run.py:_redirect():2421] redirect: wrap_raw
20
+ 2025-06-26 21:14:03,336 INFO MainThread:183028 [wandb_run.py:_redirect():2490] Wrapping output streams.
21
+ 2025-06-26 21:14:03,336 INFO MainThread:183028 [wandb_run.py:_redirect():2513] Redirects installed.
22
+ 2025-06-26 21:14:03,350 INFO MainThread:183028 [wandb_init.py:init():1150] run started, returning control to user process
23
+ 2025-06-26 21:14:09,516 INFO MainThread:183028 [wandb_run.py:_config_callback():1436] config_cb None None {'filename': 'stage1_06262112', 'seed': 42, 'mode': 'train', 'strategy': 'deepspeed', 'accelerator': 'gpu', 'devices': '0,1,2,3,4,5,6,7', 'precision': 'bf16-mixed', 'max_epochs': 30, 'check_val_every_n_epoch': 1, 'use_wandb_logger': True, 'mix_dataset': True, 'temperature': 0.1, 'save_every_n_epochs': 5, 'ptm': True, 'lm': True, 'rerank_cand_num': 128, 'plm_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m', 'plm_tune': 'freeze', 'load_4bit': False, 'pool_size': 0, 'bert_hidden_dim': 768, 'bert_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft', 'projection_dim': 256, 'cross_attention_freq': 2, 'num_query_token': 8, 'weight_decay': 0.05, 'init_lr': 0.0001, 'min_lr': 1e-05, 'warmup_lr': 1e-06, 'warmup_steps': 1000, 'lr_decay_rate': 0.9, 'scheduler': 'linear_warmup_cosine_lr', 'init_checkpoint': '', 'retrieval_eval_epoch': 10, 'num_workers': 8, 'batch_size': 160, 'match_batch_size': 64, 'root': 'data', 'text_max_len': 128, 'prot_max_len': 1024, 'prot_aug': 'None'}
ProtT3/all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/files/output.log ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06262112 exists and is not empty.
2
+ Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.
3
+ LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
4
+
5
+ | Name | Type | Params | Mode
6
+ ------------------------------------------------------
7
+ 0 | blip2qformer | Blip2Qformer | 327 M | train
8
+ ------------------------------------------------------
9
+ 179 M Trainable params
10
+ 147 M Non-trainable params
11
+ 327 M Total params
12
+ 1,309.467 Total estimated model params size (MB)
13
+ 5 Modules in train mode
14
+ 926 Modules in eval mode
15
+ Epoch 9: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 336/336 [16:02<00:00, 0.35it/s, v_num=do2v]
16
+ /nas/shared/kilab/wangyujia/ProtT3/model/blip2qformer.py:220: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
17
+ with torch.cuda.amp.autocast(enable_autocast, dtype=torch.float32):
18
+ Validation DataLoader 2: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:18<00:00, 0.44it/s]
19
+ /nas/shared/kilab/wangyujia/ProtT3/model/dist_funs.py:18: FutureWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/main/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.
20
+ sd = self.module.state_dict(destination, prefix, keep_vars)
21
+ /nas/shared/kilab/wangyujia/ProtT3/model/blip2_stage1.py:42: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
22
+ return torch.cuda.amp.autocast(dtype=dtype)
23
+ 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 157/157 [08:52<00:00, 3.39s/it]
24
+ 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1250/1250 [00:01<00:00, 1024.41it/s]
25
+ re-ranking p2t: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████���███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2500/2500 [10:06<00:00, 4.12it/s]
26
+ 94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 1177/1250 [00:01<00:00, 1045.73it/s][rank: 1] Child process with PID 183538 terminated with code -6. Forcefully terminating all other processes to avoid zombies 🧟
27
+ re-ranking p2t: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2500/2500 [10:06<00:00, 4.13it/s]
28
+ re-ranking t2p: 57%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 1424/2500 [11:01<08:20, 2.15it/s]
ProtT3/all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/files/requirements.txt ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ attrs==25.3.0
2
+ tqdm==4.67.1
3
+ langcodes==3.5.0
4
+ nvidia-cublas-cu12==12.4.5.8
5
+ tifffile==2025.5.10
6
+ nvidia-cufile-cu12==1.11.1.6
7
+ nltk==3.9.1
8
+ salesforce-lavis==1.0.2
9
+ tzdata==2025.2
10
+ pyparsing==3.2.3
11
+ six==1.17.0
12
+ python-dateutil==2.9.0.post0
13
+ pandas==2.2.3
14
+ pytorch-lightning==2.5.1.post0
15
+ blinker==1.9.0
16
+ opencv-python-headless==4.5.5.64
17
+ nvidia-cusparse-cu12==12.3.1.170
18
+ pytz==2025.2
19
+ async-timeout==5.0.1
20
+ pillow==11.2.1
21
+ parso==0.8.4
22
+ joblib==1.5.1
23
+ contourpy==1.3.2
24
+ triton==3.2.0
25
+ marisa-trie==1.2.1
26
+ PyYAML==6.0.2
27
+ regex==2024.11.6
28
+ idna==3.10
29
+ nvidia-curand-cu12==10.3.5.147
30
+ rpds-py==0.25.1
31
+ aiosignal==1.3.2
32
+ srsly==2.5.1
33
+ confection==0.1.5
34
+ typing-inspection==0.4.1
35
+ packaging==24.2
36
+ distlib==0.3.9
37
+ networkx==3.4.2
38
+ absl-py==2.2.2
39
+ yarl==1.20.0
40
+ lightning-utilities==0.14.3
41
+ executing==2.2.0
42
+ pycocoevalcap==1.2
43
+ wheel==0.45.1
44
+ nvidia-ml-py==12.575.51
45
+ cycler==0.12.1
46
+ wrapt==1.17.2
47
+ jsonschema-specifications==2025.4.1
48
+ protobuf==6.31.0
49
+ mpmath==1.3.0
50
+ certifi==2025.4.26
51
+ py-cpuinfo==9.0.0
52
+ contexttimer==0.3.3
53
+ watchdog==6.0.0
54
+ pexpect==4.9.0
55
+ webencodings==0.5.1
56
+ hf-xet==1.1.2
57
+ cymem==2.0.11
58
+ requests==2.32.3
59
+ timm==0.4.12
60
+ omegaconf==2.3.0
61
+ nvidia-nvjitlink-cu12==12.4.127
62
+ webdataset==0.2.111
63
+ nodeenv==1.9.1
64
+ frozenlist==1.6.0
65
+ annotated-types==0.7.0
66
+ matplotlib-inline==0.1.7
67
+ urllib3==2.4.0
68
+ rich==14.0.0
69
+ GitPython==3.1.44
70
+ lazy_loader==0.4
71
+ msgpack==1.1.0
72
+ prompt_toolkit==3.0.51
73
+ fonttools==4.58.0
74
+ multidict==6.4.4
75
+ blis==1.3.0
76
+ thinc==8.3.6
77
+ nvidia-nvtx-cu12==12.4.127
78
+ torchmetrics==1.7.1
79
+ weasel==0.4.1
80
+ numpy==2.2.6
81
+ cachetools==5.5.2
82
+ Jinja2==3.1.6
83
+ matplotlib==3.10.3
84
+ nvidia-cudnn-cu12==9.1.0.70
85
+ Pygments==2.19.1
86
+ tornado==6.5.1
87
+ scipy==1.15.3
88
+ rouge_score==0.1.2
89
+ cloudpathlib==0.21.1
90
+ jedi==0.19.2
91
+ referencing==0.36.2
92
+ decord==0.6.0
93
+ setuptools==78.1.1
94
+ mdurl==0.1.2
95
+ identify==2.6.12
96
+ python-slugify==8.0.4
97
+ portalocker==3.1.1
98
+ catalogue==2.0.10
99
+ platformdirs==4.3.8
100
+ antlr4-python3-runtime==4.9.3
101
+ nvidia-cusolver-cu12==11.6.1.9
102
+ kaggle==1.7.4.5
103
+ pydeck==0.9.1
104
+ pydantic==2.11.5
105
+ nvidia-cufft-cu12==11.2.1.3
106
+ pyarrow==20.0.0
107
+ nvidia-nccl-cu12==2.21.5
108
+ markdown-it-py==3.0.0
109
+ gitdb==4.0.12
110
+ altair==5.5.0
111
+ torchvision==0.21.0
112
+ python-magic==0.4.27
113
+ iopath==0.1.10
114
+ smart-open==7.1.0
115
+ torch==2.6.0
116
+ pycocotools==2.0.8
117
+ fairscale==0.4.4
118
+ traitlets==5.14.3
119
+ pure_eval==0.2.3
120
+ sympy==1.13.1
121
+ nvidia-cusparselt-cu12==0.6.2
122
+ imageio==2.37.0
123
+ stack-data==0.6.3
124
+ shellingham==1.5.4
125
+ nvidia-cuda-runtime-cu12==12.4.127
126
+ einops==0.8.1
127
+ tenacity==9.1.2
128
+ virtualenv==20.31.2
129
+ ptyprocess==0.7.0
130
+ cfgv==3.4.0
131
+ pre_commit==4.2.0
132
+ language_data==1.3.0
133
+ typing_extensions==4.13.2
134
+ propcache==0.3.1
135
+ nvidia-cuda-cupti-cu12==12.4.127
136
+ safetensors==0.5.3
137
+ text-unidecode==1.3
138
+ wcwidth==0.2.13
139
+ charset-normalizer==3.4.2
140
+ aiohappyeyeballs==2.6.1
141
+ ipython==8.36.0
142
+ streamlit==1.45.1
143
+ asttokens==3.0.0
144
+ psutil==7.0.0
145
+ smmap==5.0.2
146
+ exceptiongroup==1.3.0
147
+ murmurhash==1.0.13
148
+ filelock==3.18.0
149
+ plotly==6.1.1
150
+ hjson==3.1.0
151
+ pydantic_core==2.33.2
152
+ ninja==1.11.1.4
153
+ kiwisolver==1.4.8
154
+ spacy-legacy==3.0.12
155
+ opendatasets==0.1.22
156
+ decorator==5.2.1
157
+ spacy==3.8.7
158
+ wasabi==1.1.3
159
+ sentencepiece==0.2.0
160
+ toml==0.10.2
161
+ scikit-image==0.25.2
162
+ deepspeed==0.16.10+b666844f
163
+ ftfy==6.3.1
164
+ bleach==6.2.0
165
+ nvidia-cuda-nvrtc-cu12==12.4.127
166
+ spacy-loggers==1.0.5
167
+ MarkupSafe==3.0.2
168
+ braceexpand==0.1.7
169
+ oss2==2.15.0
170
+ preshed==3.0.10
171
+ transformers==4.52.3
172
+ aiohttp==3.12.2
173
+ web.py==0.62
174
+ threadpoolctl==3.6.0
175
+ jaraco.functools==4.1.0
176
+ wandb==0.19.11
177
+ sentry-sdk==2.29.1
178
+ tokenizers==0.21.1
179
+ fsspec==2025.3.0
180
+ flash-attn==2.7.1.post1
181
+ opendelta==0.3.2
182
+ opencv-python==4.11.0.86
183
+ click==8.2.1
184
+ docker-pycreds==0.4.0
185
+ typer==0.16.0
186
+ xxhash==3.5.0
187
+ pathlib==1.0.1
188
+ dill==0.3.8
189
+ crcmod==1.7
190
+ bigmodelvis==0.0.1
191
+ datasets==3.6.0
192
+ pycryptodome==3.23.0
193
+ jsonschema==4.24.0
194
+ aliyun-python-sdk-core==2.16.0
195
+ jmespath==0.10.0
196
+ more-itertools==10.7.0
197
+ scikit-learn==1.6.1
198
+ huggingface-hub==0.32.1
199
+ cryptography==45.0.3
200
+ pycparser==2.22
201
+ yacs==0.1.8
202
+ aliyun-python-sdk-kms==2.16.5
203
+ cffi==1.17.1
204
+ delta-center-client==0.0.4
205
+ multiprocess==0.70.16
206
+ setproctitle==1.3.6
207
+ narwhals==1.41.0
208
+ pip==25.1.1
209
+ cheroot==10.0.1
210
+ jaraco.context==5.3.0
211
+ more-itertools==10.3.0
212
+ jaraco.functools==4.0.1
213
+ jaraco.text==3.12.1
214
+ platformdirs==4.2.2
215
+ packaging==24.2
216
+ wheel==0.45.1
217
+ zipp==3.19.2
218
+ inflect==7.3.1
219
+ autocommand==2.2.2
220
+ typeguard==4.3.0
221
+ jaraco.collections==5.1.0
222
+ backports.tarfile==1.2.0
223
+ tomli==2.0.1
224
+ importlib_metadata==8.0.0
225
+ typing_extensions==4.12.2
ProtT3/all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/files/wandb-metadata.json ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.0",
4
+ "startedAt": "2025-06-26T13:13:59.908975Z",
5
+ "args": [
6
+ "--devices",
7
+ "0,1,2,3,4,5,6,7",
8
+ "--mode",
9
+ "train",
10
+ "--filename",
11
+ "stage1_06262112",
12
+ "--num_query_token",
13
+ "8",
14
+ "--plm_name",
15
+ "/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m",
16
+ "--bert_name",
17
+ "/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft",
18
+ "--save_every_n_epochs",
19
+ "5",
20
+ "--max_epochs",
21
+ "30",
22
+ "--batch_size",
23
+ "160",
24
+ "--precision",
25
+ "bf16-mixed",
26
+ "--mix_dataset",
27
+ "--num_workers",
28
+ "8",
29
+ "--use_wandb_logger"
30
+ ],
31
+ "program": "/nas/shared/kilab/wangyujia/ProtT3/stage1.py",
32
+ "codePath": "stage1.py",
33
+ "email": "gia0603yucca@gmail.com",
34
+ "root": "./all_checkpoints/stage1_06262112/",
35
+ "host": "dsw-265304-cd576ddc5-gh74w",
36
+ "executable": "/root/miniconda3/envs/protT3/bin/python",
37
+ "codePathLocal": "stage1.py",
38
+ "cpu_count": 64,
39
+ "cpu_count_logical": 64,
40
+ "gpu": "NVIDIA A800-SXM4-80GB",
41
+ "gpu_count": 8,
42
+ "disk": {
43
+ "/": {
44
+ "total": "1623302262784",
45
+ "used": "1290924032"
46
+ }
47
+ },
48
+ "memory": {
49
+ "total": "549755813888"
50
+ },
51
+ "cpu": {
52
+ "count": 64,
53
+ "countLogical": 64
54
+ },
55
+ "gpu_nvidia": [
56
+ {
57
+ "name": "NVIDIA A800-SXM4-80GB",
58
+ "memoryTotal": "85198045184",
59
+ "architecture": "Ampere"
60
+ },
61
+ {
62
+ "name": "NVIDIA A800-SXM4-80GB",
63
+ "memoryTotal": "85198045184",
64
+ "architecture": "Ampere"
65
+ },
66
+ {
67
+ "name": "NVIDIA A800-SXM4-80GB",
68
+ "memoryTotal": "85198045184",
69
+ "architecture": "Ampere"
70
+ },
71
+ {
72
+ "name": "NVIDIA A800-SXM4-80GB",
73
+ "memoryTotal": "85198045184",
74
+ "architecture": "Ampere"
75
+ },
76
+ {
77
+ "name": "NVIDIA A800-SXM4-80GB",
78
+ "memoryTotal": "85198045184",
79
+ "architecture": "Ampere"
80
+ },
81
+ {
82
+ "name": "NVIDIA A800-SXM4-80GB",
83
+ "memoryTotal": "85198045184",
84
+ "architecture": "Ampere"
85
+ },
86
+ {
87
+ "name": "NVIDIA A800-SXM4-80GB",
88
+ "memoryTotal": "85198045184",
89
+ "architecture": "Ampere"
90
+ },
91
+ {
92
+ "name": "NVIDIA A800-SXM4-80GB",
93
+ "memoryTotal": "85198045184",
94
+ "architecture": "Ampere"
95
+ }
96
+ ],
97
+ "cudaVersion": "12.1"
98
+ }
ProtT3/all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/logs/debug-internal.log ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-06-26T21:13:59.919018005+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/logs/debug-core.log"}
2
+ {"time":"2025-06-26T21:14:01.751242877+08:00","level":"INFO","msg":"created new stream","id":"gp8ndo2v"}
3
+ {"time":"2025-06-26T21:14:01.751292945+08:00","level":"INFO","msg":"stream: started","id":"gp8ndo2v"}
4
+ {"time":"2025-06-26T21:14:01.751353982+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"gp8ndo2v"}
5
+ {"time":"2025-06-26T21:14:01.751376676+08:00","level":"INFO","msg":"handler: started","stream_id":"gp8ndo2v"}
6
+ {"time":"2025-06-26T21:14:01.751406784+08:00","level":"INFO","msg":"sender: started","stream_id":"gp8ndo2v"}
7
+ {"time":"2025-06-26T21:14:03.18201785+08:00","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-06-26T21:39:10.805194559+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:48308->172.67.193.61:443: read: connection timed out"}
9
+ {"time":"2025-06-26T21:45:08.181153524+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:33626->104.21.20.172:443: read: connection timed out"}
10
+ {"time":"2025-06-26T21:50:43.748258238+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:39240->172.67.193.61:443: read: connection reset by peer"}
11
+ {"time":"2025-06-26T21:56:59.349224169+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:41628->104.21.20.172:443: read: connection timed out"}
12
+ {"time":"2025-06-26T22:01:29.173164681+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:41150->172.67.193.61:443: read: connection timed out"}
13
+ {"time":"2025-06-26T22:04:52.9491833+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:40328->104.21.20.172:443: read: connection timed out"}
14
+ {"time":"2025-06-26T22:05:33.372515641+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
15
+ {"time":"2025-06-26T22:10:24.214205918+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:39814->104.21.20.172:443: read: connection timed out"}
16
+ {"time":"2025-06-26T22:11:19.608808233+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": unexpected EOF"}
17
+ {"time":"2025-06-26T22:15:15.541207766+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:57490->104.21.20.172:443: read: connection timed out"}
18
+ {"time":"2025-06-26T22:17:53.749178371+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:34226->172.67.193.61:443: read: connection timed out"}
19
+ {"time":"2025-06-26T22:20:43.734188539+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:58158->104.21.20.172:443: read: connection timed out"}
20
+ {"time":"2025-06-26T22:26:37.244674658+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
21
+ {"time":"2025-06-26T22:27:27.97084057+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:42482->172.67.193.61:443: read: connection reset by peer"}
22
+ {"time":"2025-06-26T22:32:12.373221258+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:42718->104.21.20.172:443: read: connection timed out"}
23
+ {"time":"2025-06-26T22:33:45.749714178+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:36352->172.67.193.61:443: read: connection reset by peer"}
24
+ {"time":"2025-06-26T22:34:27.154183486+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": unexpected EOF"}
25
+ {"time":"2025-06-26T22:37:03.388715023+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
26
+ {"time":"2025-06-26T22:38:44.053145624+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:56120->104.21.20.172:443: read: connection timed out"}
27
+ {"time":"2025-06-26T22:39:26.21620593+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:37762->172.67.193.61:443: read: connection reset by peer"}
28
+ {"time":"2025-06-26T22:42:48.392517517+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
29
+ {"time":"2025-06-26T22:43:20.509939526+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
30
+ {"time":"2025-06-26T22:43:55.50812991+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
31
+ {"time":"2025-06-26T22:44:05.260626832+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": context deadline exceeded"}
32
+ {"time":"2025-06-26T22:44:33.704733361+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
33
+ {"time":"2025-06-26T22:47:28.34118454+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:46752->104.21.20.172:443: read: connection timed out"}
34
+ {"time":"2025-06-26T22:48:22.800067638+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:35900->104.21.20.172:443: read: connection reset by peer"}
35
+ {"time":"2025-06-26T22:49:03.396821287+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
36
+ {"time":"2025-06-26T22:49:35.881823651+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
37
+ {"time":"2025-06-26T22:50:10.244289946+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
38
+ {"time":"2025-06-26T22:50:48.344767175+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
39
+ {"time":"2025-06-26T22:51:21.302223032+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:40288->172.67.193.61:443: read: connection timed out"}
40
+ {"time":"2025-06-26T22:54:35.861164416+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:35106->104.21.20.172:443: read: connection timed out"}
41
+ {"time":"2025-06-26T22:56:18.401507947+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
42
+ {"time":"2025-06-26T22:56:50.726216671+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
43
+ {"time":"2025-06-26T22:57:25.218970516+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
44
+ {"time":"2025-06-26T22:58:04.180971507+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
45
+ {"time":"2025-06-26T22:58:09.302334148+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:50854->104.21.20.172:443: read: connection reset by peer"}
46
+ {"time":"2025-06-26T22:58:51.546103299+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
47
+ {"time":"2025-06-26T23:00:01.390576029+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
48
+ {"time":"2025-06-26T23:02:34.06917085+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:37644->104.21.20.172:443: read: connection timed out"}
49
+ {"time":"2025-06-26T23:05:03.407063467+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
50
+ {"time":"2025-06-26T23:06:15.25317264+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:42244->172.67.193.61:443: read: connection timed out"}
51
+ {"time":"2025-06-26T23:06:33.40769986+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
52
+ {"time":"2025-06-26T23:09:18.410102964+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
53
+ {"time":"2025-06-26T23:11:33.411287647+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
54
+ {"time":"2025-06-26T23:12:46.933205687+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:55392->104.21.20.172:443: read: connection timed out"}
55
+ {"time":"2025-06-26T23:16:18.41464022+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
56
+ {"time":"2025-06-26T23:16:40.753618008+08:00","level":"ERROR","msg":"filestream: json decode error: net/http: request canceled (Client.Timeout or context cancellation while reading body)"}
57
+ {"time":"2025-06-26T23:16:50.454824576+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
58
+ {"time":"2025-06-26T23:17:25.158378302+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
59
+ {"time":"2025-06-26T23:18:00.15504283+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": unexpected EOF"}
60
+ {"time":"2025-06-26T23:18:03.880963877+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
61
+ {"time":"2025-06-26T23:18:50.836175421+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
62
+ {"time":"2025-06-26T23:20:45.653178557+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:38816->104.21.20.172:443: read: connection timed out"}
63
+ {"time":"2025-06-26T23:21:51.085821178+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:43680->172.67.193.61:443: read: connection reset by peer"}
64
+ {"time":"2025-06-26T23:24:50.901213106+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:40734->104.21.20.172:443: read: connection timed out"}
65
+ {"time":"2025-06-26T23:30:06.293188303+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:46360->104.21.20.172:443: read: connection timed out"}
66
+ {"time":"2025-06-26T23:36:26.709172933+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:33352->172.67.193.61:443: read: connection timed out"}
67
+ {"time":"2025-06-26T23:39:53.889169333+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": unexpected EOF"}
68
+ {"time":"2025-06-26T23:42:48.781078984+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": read tcp 10.1.8.160:35500->104.21.20.172:443: read: connection reset by peer"}
69
+ {"time":"2025-06-26T23:44:39.969525919+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:51204->172.67.193.61:443: read: connection reset by peer"}
70
+ {"time":"2025-06-26T23:52:25.685198314+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:41504->172.67.193.61:443: read: connection timed out"}
71
+ {"time":"2025-06-27T00:14:48.690187795+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": read tcp 10.1.8.160:49576->172.67.193.61:443: read: connection reset by peer"}
72
+ {"time":"2025-06-27T00:15:11.63645902+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": unexpected EOF"}
73
+ {"time":"2025-06-27T00:18:18.739690809+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": read tcp 10.1.8.160:55462->172.67.193.61:443: read: connection reset by peer"}
ProtT3/all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/logs/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
2
+ 2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_setup.py:_flush():70] Configure stats pid to 183028
3
+ 2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
4
+ 2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
5
+ 2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_setup.py:_flush():70] Loading settings from environment variables
6
+ 2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/logs/debug.log
7
+ 2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/logs/debug-internal.log
8
+ 2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:init():852] calling init triggers
9
+ 2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:init():893] starting backend
12
+ 2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:init():897] sending inform_init request
13
+ 2025-06-26 21:13:59,906 INFO MainThread:183028 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-06-26 21:13:59,908 INFO MainThread:183028 [wandb_init.py:init():907] backend started and connected
15
+ 2025-06-26 21:13:59,909 INFO MainThread:183028 [wandb_init.py:init():1005] updated telemetry
16
+ 2025-06-26 21:13:59,911 INFO MainThread:183028 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
17
+ 2025-06-26 21:14:03,171 INFO MainThread:183028 [wandb_init.py:init():1104] starting run threads in backend
18
+ 2025-06-26 21:14:03,332 INFO MainThread:183028 [wandb_run.py:_console_start():2573] atexit reg
19
+ 2025-06-26 21:14:03,333 INFO MainThread:183028 [wandb_run.py:_redirect():2421] redirect: wrap_raw
20
+ 2025-06-26 21:14:03,336 INFO MainThread:183028 [wandb_run.py:_redirect():2490] Wrapping output streams.
21
+ 2025-06-26 21:14:03,336 INFO MainThread:183028 [wandb_run.py:_redirect():2513] Redirects installed.
22
+ 2025-06-26 21:14:03,350 INFO MainThread:183028 [wandb_init.py:init():1150] run started, returning control to user process
23
+ 2025-06-26 21:14:09,516 INFO MainThread:183028 [wandb_run.py:_config_callback():1436] config_cb None None {'filename': 'stage1_06262112', 'seed': 42, 'mode': 'train', 'strategy': 'deepspeed', 'accelerator': 'gpu', 'devices': '0,1,2,3,4,5,6,7', 'precision': 'bf16-mixed', 'max_epochs': 30, 'check_val_every_n_epoch': 1, 'use_wandb_logger': True, 'mix_dataset': True, 'temperature': 0.1, 'save_every_n_epochs': 5, 'ptm': True, 'lm': True, 'rerank_cand_num': 128, 'plm_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m', 'plm_tune': 'freeze', 'load_4bit': False, 'pool_size': 0, 'bert_hidden_dim': 768, 'bert_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft', 'projection_dim': 256, 'cross_attention_freq': 2, 'num_query_token': 8, 'weight_decay': 0.05, 'init_lr': 0.0001, 'min_lr': 1e-05, 'warmup_lr': 1e-06, 'warmup_steps': 1000, 'lr_decay_rate': 0.9, 'scheduler': 'linear_warmup_cosine_lr', 'init_checkpoint': '', 'retrieval_eval_epoch': 10, 'num_workers': 8, 'batch_size': 160, 'match_batch_size': 64, 'root': 'data', 'text_max_len': 128, 'prot_max_len': 1024, 'prot_aug': 'None'}
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/debug-internal.log ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-07-07T20:02:29.75666986+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_200229-yex1pcwt/logs/debug-core.log"}
2
+ {"time":"2025-07-07T20:02:30.922635932+08:00","level":"INFO","msg":"created new stream","id":"yex1pcwt"}
3
+ {"time":"2025-07-07T20:02:30.922678667+08:00","level":"INFO","msg":"stream: started","id":"yex1pcwt"}
4
+ {"time":"2025-07-07T20:02:30.922713833+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"yex1pcwt"}
5
+ {"time":"2025-07-07T20:02:30.922757284+08:00","level":"INFO","msg":"sender: started","stream_id":"yex1pcwt"}
6
+ {"time":"2025-07-07T20:02:30.92278615+08:00","level":"INFO","msg":"handler: started","stream_id":"yex1pcwt"}
7
+ {"time":"2025-07-07T20:02:32.296458789+08:00","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-07-08T01:01:50.39071972+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": read tcp 10.1.4.159:39416->104.21.20.172:443: read: connection reset by peer"}
9
+ {"time":"2025-07-08T01:07:45.887474022+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2.5_mol_instruction/yex1pcwt/file_stream\": read tcp 10.1.4.159:48838->172.67.193.61:443: read: connection timed out"}
10
+ {"time":"2025-07-08T05:20:12.207426797+08:00","level":"INFO","msg":"stream: closing","id":"yex1pcwt"}
11
+ {"time":"2025-07-08T05:20:12.207468139+08:00","level":"INFO","msg":"Stopping system monitor"}
12
+ {"time":"2025-07-08T05:20:12.208684636+08:00","level":"INFO","msg":"Stopped system monitor"}
13
+ {"time":"2025-07-08T05:20:13.938647534+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
14
+ {"time":"2025-07-08T05:20:14.977621841+08:00","level":"INFO","msg":"handler: closed","stream_id":"yex1pcwt"}
15
+ {"time":"2025-07-08T05:20:14.977653692+08:00","level":"INFO","msg":"sender: closed","stream_id":"yex1pcwt"}
16
+ {"time":"2025-07-08T05:20:14.977651902+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"yex1pcwt"}
17
+ {"time":"2025-07-08T05:20:14.982274952+08:00","level":"INFO","msg":"stream: closed","id":"yex1pcwt"}
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/debug.log ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
2
+ 2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_setup.py:_flush():70] Configure stats pid to 129761
3
+ 2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
4
+ 2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
5
+ 2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_setup.py:_flush():70] Loading settings from environment variables
6
+ 2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_200229-yex1pcwt/logs/debug.log
7
+ 2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_200229-yex1pcwt/logs/debug-internal.log
8
+ 2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_init.py:init():852] calling init triggers
9
+ 2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_init.py:init():893] starting backend
12
+ 2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_init.py:init():897] sending inform_init request
13
+ 2025-07-07 20:02:29,745 INFO MainThread:129761 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-07-07 20:02:29,748 INFO MainThread:129761 [wandb_init.py:init():907] backend started and connected
15
+ 2025-07-07 20:02:29,754 INFO MainThread:129761 [wandb_init.py:init():1005] updated telemetry
16
+ 2025-07-07 20:02:29,762 INFO MainThread:129761 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
17
+ 2025-07-07 20:02:32,258 INFO MainThread:129761 [wandb_init.py:init():1104] starting run threads in backend
18
+ 2025-07-07 20:02:32,427 INFO MainThread:129761 [wandb_run.py:_console_start():2573] atexit reg
19
+ 2025-07-07 20:02:32,427 INFO MainThread:129761 [wandb_run.py:_redirect():2421] redirect: wrap_raw
20
+ 2025-07-07 20:02:32,457 INFO MainThread:129761 [wandb_run.py:_redirect():2490] Wrapping output streams.
21
+ 2025-07-07 20:02:32,462 INFO MainThread:129761 [wandb_run.py:_redirect():2513] Redirects installed.
22
+ 2025-07-07 20:02:32,463 INFO MainThread:129761 [wandb_init.py:init():1150] run started, returning control to user process
23
+ 2025-07-07 20:02:40,689 INFO MainThread:129761 [wandb_run.py:_config_callback():1436] config_cb None None {'filename': 'stage2.5_mol_instruction', 'seed': 42, 'mode': 'train', 'strategy': 'deepspeed', 'accelerator': 'gpu', 'devices': '0,1,2,3,4,5,6,7', 'precision': 'bf16-mixed', 'max_epochs': 10, 'accumulate_grad_batches': 1, 'check_val_every_n_epoch': 1, 'enable_flash': False, 'use_wandb_logger': True, 'mix_dataset': False, 'save_every_n_epochs': 1, 'bert_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft', 'cross_attention_freq': 2, 'num_query_token': 8, 'llm_name': '/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300', 'num_beams': 5, 'do_sample': False, 'max_inference_len': 128, 'min_inference_len': 1, 'llm_tune': 'mid_lora', 'peft_config': '', 'peft_dir': '', 'plm_model': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m', 'plm_tune': 'freeze', 'lora_r': 8, 'lora_alpha': 16, 'lora_dropout': 0.1, 'enbale_gradient_checkpointing': False, 'weight_decay': 0.05, 'init_lr': 0.0001, 'min_lr': 1e-05, 'warmup_lr': 1e-06, 'warmup_steps': 1000, 'lr_decay_rate': 0.9, 'scheduler': 'linear_warmup_cosine_lr', 'stage1_path': '/nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt', 'stage2_path': '', 'init_checkpoint': '', 'caption_eval_epoch': 10, 'num_workers': 8, 'batch_size': 2, 'inference_batch_size': 4, 'root': 'data', 'text_max_len': 1024, 'q_max_len': 29, 'a_max_len': 36, 'prot_max_len': 1024, 'prompt': 'The protein has the following properties: ', 'filter_side_qa': False}
24
+ 2025-07-08 05:20:12,205 INFO MsgRouterThr:129761 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/files/output.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2.5_mol_instruction exists and is not empty.
2
+ Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.
3
+ LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
4
+
5
+ | Name | Type | Params | Mode
6
+ -------------------------------------------
7
+ 0 | blip2 | Blip2OPT | 7.9 B | train
8
+ -------------------------------------------
9
+ 104 M Trainable params
10
+ 7.8 B Non-trainable params
11
+ 7.9 B Total params
12
+ 31,459.025Total estimated model params size (MB)
13
+ 174 Modules in train mode
14
+ 1203 Modules in eval mode
15
+ Epoch 0: 0%| | 0/410 [00:00<?, ?it/s]
16
+ [rank: 1] Child process with PID 111788 terminated with code 1. Forcefully terminating all other processes to avoid zombies 🧟
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/files/requirements.txt ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pydantic_core==2.33.2
2
+ psutil==7.0.0
3
+ nvidia-cuda-nvrtc-cu12==12.4.127
4
+ mpmath==1.3.0
5
+ tzdata==2025.2
6
+ contexttimer==0.3.3
7
+ cycler==0.12.1
8
+ python-magic==0.4.27
9
+ pexpect==4.9.0
10
+ sympy==1.13.1
11
+ wrapt==1.17.2
12
+ marisa-trie==1.2.1
13
+ langcodes==3.5.0
14
+ nvidia-nvtx-cu12==12.4.127
15
+ ipython==8.36.0
16
+ opencv-python-headless==4.5.5.64
17
+ MarkupSafe==3.0.2
18
+ jsonschema-specifications==2025.4.1
19
+ wasabi==1.1.3
20
+ blinker==1.9.0
21
+ cfgv==3.4.0
22
+ numpy==2.2.6
23
+ idna==3.10
24
+ nvidia-cufile-cu12==1.11.1.6
25
+ ninja==1.11.1.4
26
+ nvidia-nccl-cu12==2.21.5
27
+ networkx==3.4.2
28
+ certifi==2025.4.26
29
+ deepspeed==0.16.10+b666844f
30
+ pure_eval==0.2.3
31
+ packaging==24.2
32
+ nltk==3.9.1
33
+ contourpy==1.3.2
34
+ pre_commit==4.2.0
35
+ nodeenv==1.9.1
36
+ setuptools==78.1.1
37
+ annotated-types==0.7.0
38
+ multidict==6.4.4
39
+ requests==2.32.3
40
+ tornado==6.5.1
41
+ triton==3.2.0
42
+ pillow==11.2.1
43
+ decord==0.6.0
44
+ shellingham==1.5.4
45
+ streamlit==1.45.1
46
+ pydeck==0.9.1
47
+ confection==0.1.5
48
+ exceptiongroup==1.3.0
49
+ prompt_toolkit==3.0.51
50
+ text-unidecode==1.3
51
+ nvidia-cufft-cu12==11.2.1.3
52
+ antlr4-python3-runtime==4.9.3
53
+ fairscale==0.4.4
54
+ rouge_score==0.1.2
55
+ nvidia-cudnn-cu12==9.1.0.70
56
+ tqdm==4.67.1
57
+ rich==14.0.0
58
+ frozenlist==1.6.0
59
+ webencodings==0.5.1
60
+ altair==5.5.0
61
+ opendatasets==0.1.22
62
+ nvidia-curand-cu12==10.3.5.147
63
+ protobuf==6.31.0
64
+ asttokens==3.0.0
65
+ wheel==0.45.1
66
+ hf-xet==1.1.2
67
+ weasel==0.4.1
68
+ aiosignal==1.3.2
69
+ absl-py==2.2.2
70
+ thinc==8.3.6
71
+ torchvision==0.21.0
72
+ pandas==2.2.3
73
+ fonttools==4.58.0
74
+ bleach==6.2.0
75
+ typing-inspection==0.4.1
76
+ ftfy==6.3.1
77
+ typing_extensions==4.13.2
78
+ nvidia-ml-py==12.575.51
79
+ python-slugify==8.0.4
80
+ lightning-utilities==0.14.3
81
+ py-cpuinfo==9.0.0
82
+ smmap==5.0.2
83
+ regex==2024.11.6
84
+ scikit-image==0.25.2
85
+ iopath==0.1.10
86
+ spacy-legacy==3.0.12
87
+ hjson==3.1.0
88
+ executing==2.2.0
89
+ kiwisolver==1.4.8
90
+ scipy==1.15.3
91
+ aiohappyeyeballs==2.6.1
92
+ toml==0.10.2
93
+ jedi==0.19.2
94
+ GitPython==3.1.44
95
+ ptyprocess==0.7.0
96
+ kaggle==1.7.4.5
97
+ braceexpand==0.1.7
98
+ wcwidth==0.2.13
99
+ nvidia-cuda-runtime-cu12==12.4.127
100
+ pytorch-lightning==2.5.1.post0
101
+ Jinja2==3.1.6
102
+ urllib3==2.4.0
103
+ watchdog==6.0.0
104
+ filelock==3.18.0
105
+ propcache==0.3.1
106
+ torch==2.6.0
107
+ nvidia-cusparse-cu12==12.3.1.170
108
+ cymem==2.0.11
109
+ nvidia-cusolver-cu12==11.6.1.9
110
+ murmurhash==1.0.13
111
+ catalogue==2.0.10
112
+ yarl==1.20.0
113
+ charset-normalizer==3.4.2
114
+ gitdb==4.0.12
115
+ matplotlib==3.10.3
116
+ portalocker==3.1.1
117
+ platformdirs==4.3.8
118
+ async-timeout==5.0.1
119
+ parso==0.8.4
120
+ markdown-it-py==3.0.0
121
+ omegaconf==2.3.0
122
+ cloudpathlib==0.21.1
123
+ nvidia-cusparselt-cu12==0.6.2
124
+ spacy-loggers==1.0.5
125
+ srsly==2.5.1
126
+ identify==2.6.12
127
+ rpds-py==0.25.1
128
+ spacy==3.8.7
129
+ matplotlib-inline==0.1.7
130
+ smart-open==7.1.0
131
+ pydantic==2.11.5
132
+ mdurl==0.1.2
133
+ virtualenv==20.31.2
134
+ pytz==2025.2
135
+ pycocotools==2.0.8
136
+ six==1.17.0
137
+ decorator==5.2.1
138
+ referencing==0.36.2
139
+ sentencepiece==0.2.0
140
+ PyYAML==6.0.2
141
+ pycocoevalcap==1.2
142
+ imageio==2.37.0
143
+ distlib==0.3.9
144
+ pyarrow==20.0.0
145
+ tenacity==9.1.2
146
+ language_data==1.3.0
147
+ nvidia-cuda-cupti-cu12==12.4.127
148
+ blis==1.3.0
149
+ Pygments==2.19.1
150
+ tifffile==2025.5.10
151
+ pyparsing==3.2.3
152
+ cachetools==5.5.2
153
+ safetensors==0.5.3
154
+ attrs==25.3.0
155
+ webdataset==0.2.111
156
+ plotly==6.1.1
157
+ nvidia-cublas-cu12==12.4.5.8
158
+ timm==0.4.12
159
+ torchmetrics==1.7.1
160
+ nvidia-nvjitlink-cu12==12.4.127
161
+ stack-data==0.6.3
162
+ python-dateutil==2.9.0.post0
163
+ lazy_loader==0.4
164
+ traitlets==5.14.3
165
+ einops==0.8.1
166
+ salesforce-lavis==1.0.2
167
+ joblib==1.5.1
168
+ msgpack==1.1.0
169
+ tokenizers==0.21.1
170
+ sentry-sdk==2.29.1
171
+ oss2==2.15.0
172
+ setproctitle==1.3.6
173
+ pip==25.1.1
174
+ cffi==1.17.1
175
+ transformers==4.52.3
176
+ narwhals==1.41.0
177
+ aliyun-python-sdk-core==2.16.0
178
+ jsonschema==4.24.0
179
+ flash-attn==2.7.1.post1
180
+ preshed==3.0.10
181
+ multiprocess==0.70.16
182
+ cryptography==45.0.3
183
+ aliyun-python-sdk-kms==2.16.5
184
+ scikit-learn==1.6.1
185
+ huggingface-hub==0.32.1
186
+ crcmod==1.7
187
+ typer==0.16.0
188
+ web.py==0.62
189
+ docker-pycreds==0.4.0
190
+ xxhash==3.5.0
191
+ bigmodelvis==0.0.1
192
+ datasets==3.6.0
193
+ more-itertools==10.7.0
194
+ yacs==0.1.8
195
+ jmespath==0.10.0
196
+ aiohttp==3.12.2
197
+ opencv-python==4.11.0.86
198
+ pycparser==2.22
199
+ threadpoolctl==3.6.0
200
+ jaraco.functools==4.1.0
201
+ click==8.2.1
202
+ wandb==0.19.11
203
+ opendelta==0.3.2
204
+ pycryptodome==3.23.0
205
+ pathlib==1.0.1
206
+ dill==0.3.8
207
+ fsspec==2025.3.0
208
+ delta-center-client==0.0.4
209
+ cheroot==10.0.1
210
+ typing_extensions==4.12.2
211
+ platformdirs==4.2.2
212
+ jaraco.text==3.12.1
213
+ packaging==24.2
214
+ inflect==7.3.1
215
+ jaraco.context==5.3.0
216
+ wheel==0.45.1
217
+ typeguard==4.3.0
218
+ more-itertools==10.3.0
219
+ tomli==2.0.1
220
+ importlib_metadata==8.0.0
221
+ backports.tarfile==1.2.0
222
+ zipp==3.19.2
223
+ jaraco.collections==5.1.0
224
+ autocommand==2.2.2
225
+ jaraco.functools==4.0.1
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/files/wandb-metadata.json ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.0",
4
+ "startedAt": "2025-07-07T10:49:42.891959Z",
5
+ "args": [
6
+ "--devices",
7
+ "0,1,2,3,4,5,6,7",
8
+ "--mode",
9
+ "train",
10
+ "--filename",
11
+ "stage2.5_mol_instruction",
12
+ "--num_query_token",
13
+ "8",
14
+ "--save_every_n_epochs",
15
+ "1",
16
+ "--max_epochs",
17
+ "10",
18
+ "--batch_size",
19
+ "32",
20
+ "--precision",
21
+ "bf16-mixed",
22
+ "--num_workers",
23
+ "8",
24
+ "--plm_model",
25
+ "/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m",
26
+ "--bert_name",
27
+ "/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft",
28
+ "--llm_name",
29
+ "/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300",
30
+ "--llm_tune",
31
+ "mid_lora",
32
+ "--stage1_path",
33
+ "/nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt",
34
+ "--use_wandb_logger"
35
+ ],
36
+ "program": "/nas/shared/kilab/wangyujia/ProtT3/stage2.py",
37
+ "codePath": "stage2.py",
38
+ "email": "gia0603yucca@gmail.com",
39
+ "root": "./all_checkpoints/stage2.5_mol_instruction/",
40
+ "host": "dsw-265304-7f6db6b4bb-g4b9r",
41
+ "executable": "/root/miniconda3/envs/protT3/bin/python",
42
+ "codePathLocal": "stage2.py",
43
+ "cpu_count": 64,
44
+ "cpu_count_logical": 64,
45
+ "gpu": "NVIDIA A800-SXM4-80GB",
46
+ "gpu_count": 8,
47
+ "disk": {
48
+ "/": {
49
+ "total": "1623302262784",
50
+ "used": "1260912640"
51
+ }
52
+ },
53
+ "memory": {
54
+ "total": "549755813888"
55
+ },
56
+ "cpu": {
57
+ "count": 64,
58
+ "countLogical": 64
59
+ },
60
+ "gpu_nvidia": [
61
+ {
62
+ "name": "NVIDIA A800-SXM4-80GB",
63
+ "memoryTotal": "85198045184",
64
+ "architecture": "Ampere"
65
+ },
66
+ {
67
+ "name": "NVIDIA A800-SXM4-80GB",
68
+ "memoryTotal": "85198045184",
69
+ "architecture": "Ampere"
70
+ },
71
+ {
72
+ "name": "NVIDIA A800-SXM4-80GB",
73
+ "memoryTotal": "85198045184",
74
+ "architecture": "Ampere"
75
+ },
76
+ {
77
+ "name": "NVIDIA A800-SXM4-80GB",
78
+ "memoryTotal": "85198045184",
79
+ "architecture": "Ampere"
80
+ },
81
+ {
82
+ "name": "NVIDIA A800-SXM4-80GB",
83
+ "memoryTotal": "85198045184",
84
+ "architecture": "Ampere"
85
+ },
86
+ {
87
+ "name": "NVIDIA A800-SXM4-80GB",
88
+ "memoryTotal": "85198045184",
89
+ "architecture": "Ampere"
90
+ },
91
+ {
92
+ "name": "NVIDIA A800-SXM4-80GB",
93
+ "memoryTotal": "85198045184",
94
+ "architecture": "Ampere"
95
+ },
96
+ {
97
+ "name": "NVIDIA A800-SXM4-80GB",
98
+ "memoryTotal": "85198045184",
99
+ "architecture": "Ampere"
100
+ }
101
+ ],
102
+ "cudaVersion": "12.1"
103
+ }
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/logs/debug-internal.log ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {"time":"2025-07-07T18:49:42.893783985+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/logs/debug-core.log"}
2
+ {"time":"2025-07-07T18:49:43.936695317+08:00","level":"INFO","msg":"created new stream","id":"2bo0nfvt"}
3
+ {"time":"2025-07-07T18:49:43.936731645+08:00","level":"INFO","msg":"stream: started","id":"2bo0nfvt"}
4
+ {"time":"2025-07-07T18:49:43.936758154+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"2bo0nfvt"}
5
+ {"time":"2025-07-07T18:49:43.936774344+08:00","level":"INFO","msg":"sender: started","stream_id":"2bo0nfvt"}
6
+ {"time":"2025-07-07T18:49:43.936811419+08:00","level":"INFO","msg":"handler: started","stream_id":"2bo0nfvt"}
7
+ {"time":"2025-07-07T18:49:45.07418554+08:00","level":"INFO","msg":"Starting system monitor"}
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/logs/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-07-07 18:49:42,884 INFO MainThread:111335 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
2
+ 2025-07-07 18:49:42,884 INFO MainThread:111335 [wandb_setup.py:_flush():70] Configure stats pid to 111335
3
+ 2025-07-07 18:49:42,884 INFO MainThread:111335 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
4
+ 2025-07-07 18:49:42,884 INFO MainThread:111335 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
5
+ 2025-07-07 18:49:42,884 INFO MainThread:111335 [wandb_setup.py:_flush():70] Loading settings from environment variables
6
+ 2025-07-07 18:49:42,884 INFO MainThread:111335 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/logs/debug.log
7
+ 2025-07-07 18:49:42,884 INFO MainThread:111335 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/logs/debug-internal.log
8
+ 2025-07-07 18:49:42,884 INFO MainThread:111335 [wandb_init.py:init():852] calling init triggers
9
+ 2025-07-07 18:49:42,884 INFO MainThread:111335 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-07-07 18:49:42,884 INFO MainThread:111335 [wandb_init.py:init():893] starting backend
12
+ 2025-07-07 18:49:42,884 INFO MainThread:111335 [wandb_init.py:init():897] sending inform_init request
13
+ 2025-07-07 18:49:42,886 INFO MainThread:111335 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-07-07 18:49:42,888 INFO MainThread:111335 [wandb_init.py:init():907] backend started and connected
15
+ 2025-07-07 18:49:42,893 INFO MainThread:111335 [wandb_init.py:init():1005] updated telemetry
16
+ 2025-07-07 18:49:42,896 INFO MainThread:111335 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
17
+ 2025-07-07 18:49:45,025 INFO MainThread:111335 [wandb_init.py:init():1104] starting run threads in backend
18
+ 2025-07-07 18:49:45,210 INFO MainThread:111335 [wandb_run.py:_console_start():2573] atexit reg
19
+ 2025-07-07 18:49:45,210 INFO MainThread:111335 [wandb_run.py:_redirect():2421] redirect: wrap_raw
20
+ 2025-07-07 18:49:45,214 INFO MainThread:111335 [wandb_run.py:_redirect():2490] Wrapping output streams.
21
+ 2025-07-07 18:49:45,214 INFO MainThread:111335 [wandb_run.py:_redirect():2513] Redirects installed.
22
+ 2025-07-07 18:49:45,216 INFO MainThread:111335 [wandb_init.py:init():1150] run started, returning control to user process
23
+ 2025-07-07 18:49:53,747 INFO MainThread:111335 [wandb_run.py:_config_callback():1436] config_cb None None {'filename': 'stage2.5_mol_instruction', 'seed': 42, 'mode': 'train', 'strategy': 'deepspeed', 'accelerator': 'gpu', 'devices': '0,1,2,3,4,5,6,7', 'precision': 'bf16-mixed', 'max_epochs': 10, 'accumulate_grad_batches': 1, 'check_val_every_n_epoch': 1, 'enable_flash': False, 'use_wandb_logger': True, 'mix_dataset': False, 'save_every_n_epochs': 1, 'bert_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft', 'cross_attention_freq': 2, 'num_query_token': 8, 'llm_name': '/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300', 'num_beams': 5, 'do_sample': False, 'max_inference_len': 128, 'min_inference_len': 1, 'llm_tune': 'mid_lora', 'peft_config': '', 'peft_dir': '', 'plm_model': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m', 'plm_tune': 'freeze', 'lora_r': 8, 'lora_alpha': 16, 'lora_dropout': 0.1, 'enbale_gradient_checkpointing': False, 'weight_decay': 0.05, 'init_lr': 0.0001, 'min_lr': 1e-05, 'warmup_lr': 1e-06, 'warmup_steps': 1000, 'lr_decay_rate': 0.9, 'scheduler': 'linear_warmup_cosine_lr', 'stage1_path': '/nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt', 'stage2_path': '', 'init_checkpoint': '', 'caption_eval_epoch': 10, 'num_workers': 8, 'batch_size': 32, 'inference_batch_size': 4, 'root': 'data', 'text_max_len': 1024, 'q_max_len': 29, 'a_max_len': 36, 'prot_max_len': 1024, 'prompt': 'The protein has the following properties: ', 'filter_side_qa': False}
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/run-2bo0nfvt.wandb ADDED
File without changes
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/files/output.log ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2.5_mol_instruction exists and is not empty.
2
+ Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.
3
+ LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
4
+ [rank: 5] Child process with PID 116132 terminated with code 1. Forcefully terminating all other processes to avoid zombies 🧟
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/files/requirements.txt ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pydantic_core==2.33.2
2
+ psutil==7.0.0
3
+ nvidia-cuda-nvrtc-cu12==12.4.127
4
+ mpmath==1.3.0
5
+ tzdata==2025.2
6
+ contexttimer==0.3.3
7
+ cycler==0.12.1
8
+ python-magic==0.4.27
9
+ pexpect==4.9.0
10
+ sympy==1.13.1
11
+ wrapt==1.17.2
12
+ marisa-trie==1.2.1
13
+ langcodes==3.5.0
14
+ nvidia-nvtx-cu12==12.4.127
15
+ ipython==8.36.0
16
+ opencv-python-headless==4.5.5.64
17
+ MarkupSafe==3.0.2
18
+ jsonschema-specifications==2025.4.1
19
+ wasabi==1.1.3
20
+ blinker==1.9.0
21
+ cfgv==3.4.0
22
+ numpy==2.2.6
23
+ idna==3.10
24
+ nvidia-cufile-cu12==1.11.1.6
25
+ ninja==1.11.1.4
26
+ nvidia-nccl-cu12==2.21.5
27
+ networkx==3.4.2
28
+ certifi==2025.4.26
29
+ deepspeed==0.16.10+b666844f
30
+ pure_eval==0.2.3
31
+ packaging==24.2
32
+ nltk==3.9.1
33
+ contourpy==1.3.2
34
+ pre_commit==4.2.0
35
+ nodeenv==1.9.1
36
+ setuptools==78.1.1
37
+ annotated-types==0.7.0
38
+ multidict==6.4.4
39
+ requests==2.32.3
40
+ tornado==6.5.1
41
+ triton==3.2.0
42
+ pillow==11.2.1
43
+ decord==0.6.0
44
+ shellingham==1.5.4
45
+ streamlit==1.45.1
46
+ pydeck==0.9.1
47
+ confection==0.1.5
48
+ exceptiongroup==1.3.0
49
+ prompt_toolkit==3.0.51
50
+ text-unidecode==1.3
51
+ nvidia-cufft-cu12==11.2.1.3
52
+ antlr4-python3-runtime==4.9.3
53
+ fairscale==0.4.4
54
+ rouge_score==0.1.2
55
+ nvidia-cudnn-cu12==9.1.0.70
56
+ tqdm==4.67.1
57
+ rich==14.0.0
58
+ frozenlist==1.6.0
59
+ webencodings==0.5.1
60
+ altair==5.5.0
61
+ opendatasets==0.1.22
62
+ nvidia-curand-cu12==10.3.5.147
63
+ protobuf==6.31.0
64
+ asttokens==3.0.0
65
+ wheel==0.45.1
66
+ hf-xet==1.1.2
67
+ weasel==0.4.1
68
+ aiosignal==1.3.2
69
+ absl-py==2.2.2
70
+ thinc==8.3.6
71
+ torchvision==0.21.0
72
+ pandas==2.2.3
73
+ fonttools==4.58.0
74
+ bleach==6.2.0
75
+ typing-inspection==0.4.1
76
+ ftfy==6.3.1
77
+ typing_extensions==4.13.2
78
+ nvidia-ml-py==12.575.51
79
+ python-slugify==8.0.4
80
+ lightning-utilities==0.14.3
81
+ py-cpuinfo==9.0.0
82
+ smmap==5.0.2
83
+ regex==2024.11.6
84
+ scikit-image==0.25.2
85
+ iopath==0.1.10
86
+ spacy-legacy==3.0.12
87
+ hjson==3.1.0
88
+ executing==2.2.0
89
+ kiwisolver==1.4.8
90
+ scipy==1.15.3
91
+ aiohappyeyeballs==2.6.1
92
+ toml==0.10.2
93
+ jedi==0.19.2
94
+ GitPython==3.1.44
95
+ ptyprocess==0.7.0
96
+ kaggle==1.7.4.5
97
+ braceexpand==0.1.7
98
+ wcwidth==0.2.13
99
+ nvidia-cuda-runtime-cu12==12.4.127
100
+ pytorch-lightning==2.5.1.post0
101
+ Jinja2==3.1.6
102
+ urllib3==2.4.0
103
+ watchdog==6.0.0
104
+ filelock==3.18.0
105
+ propcache==0.3.1
106
+ torch==2.6.0
107
+ nvidia-cusparse-cu12==12.3.1.170
108
+ cymem==2.0.11
109
+ nvidia-cusolver-cu12==11.6.1.9
110
+ murmurhash==1.0.13
111
+ catalogue==2.0.10
112
+ yarl==1.20.0
113
+ charset-normalizer==3.4.2
114
+ gitdb==4.0.12
115
+ matplotlib==3.10.3
116
+ portalocker==3.1.1
117
+ platformdirs==4.3.8
118
+ async-timeout==5.0.1
119
+ parso==0.8.4
120
+ markdown-it-py==3.0.0
121
+ omegaconf==2.3.0
122
+ cloudpathlib==0.21.1
123
+ nvidia-cusparselt-cu12==0.6.2
124
+ spacy-loggers==1.0.5
125
+ srsly==2.5.1
126
+ identify==2.6.12
127
+ rpds-py==0.25.1
128
+ spacy==3.8.7
129
+ matplotlib-inline==0.1.7
130
+ smart-open==7.1.0
131
+ pydantic==2.11.5
132
+ mdurl==0.1.2
133
+ virtualenv==20.31.2
134
+ pytz==2025.2
135
+ pycocotools==2.0.8
136
+ six==1.17.0
137
+ decorator==5.2.1
138
+ referencing==0.36.2
139
+ sentencepiece==0.2.0
140
+ PyYAML==6.0.2
141
+ pycocoevalcap==1.2
142
+ imageio==2.37.0
143
+ distlib==0.3.9
144
+ pyarrow==20.0.0
145
+ tenacity==9.1.2
146
+ language_data==1.3.0
147
+ nvidia-cuda-cupti-cu12==12.4.127
148
+ blis==1.3.0
149
+ Pygments==2.19.1
150
+ tifffile==2025.5.10
151
+ pyparsing==3.2.3
152
+ cachetools==5.5.2
153
+ safetensors==0.5.3
154
+ attrs==25.3.0
155
+ webdataset==0.2.111
156
+ plotly==6.1.1
157
+ nvidia-cublas-cu12==12.4.5.8
158
+ timm==0.4.12
159
+ torchmetrics==1.7.1
160
+ nvidia-nvjitlink-cu12==12.4.127
161
+ stack-data==0.6.3
162
+ python-dateutil==2.9.0.post0
163
+ lazy_loader==0.4
164
+ traitlets==5.14.3
165
+ einops==0.8.1
166
+ salesforce-lavis==1.0.2
167
+ joblib==1.5.1
168
+ msgpack==1.1.0
169
+ tokenizers==0.21.1
170
+ sentry-sdk==2.29.1
171
+ oss2==2.15.0
172
+ setproctitle==1.3.6
173
+ pip==25.1.1
174
+ cffi==1.17.1
175
+ transformers==4.52.3
176
+ narwhals==1.41.0
177
+ aliyun-python-sdk-core==2.16.0
178
+ jsonschema==4.24.0
179
+ flash-attn==2.7.1.post1
180
+ preshed==3.0.10
181
+ multiprocess==0.70.16
182
+ cryptography==45.0.3
183
+ aliyun-python-sdk-kms==2.16.5
184
+ scikit-learn==1.6.1
185
+ huggingface-hub==0.32.1
186
+ crcmod==1.7
187
+ typer==0.16.0
188
+ web.py==0.62
189
+ docker-pycreds==0.4.0
190
+ xxhash==3.5.0
191
+ bigmodelvis==0.0.1
192
+ datasets==3.6.0
193
+ more-itertools==10.7.0
194
+ yacs==0.1.8
195
+ jmespath==0.10.0
196
+ aiohttp==3.12.2
197
+ opencv-python==4.11.0.86
198
+ pycparser==2.22
199
+ threadpoolctl==3.6.0
200
+ jaraco.functools==4.1.0
201
+ click==8.2.1
202
+ wandb==0.19.11
203
+ opendelta==0.3.2
204
+ pycryptodome==3.23.0
205
+ pathlib==1.0.1
206
+ dill==0.3.8
207
+ fsspec==2025.3.0
208
+ delta-center-client==0.0.4
209
+ cheroot==10.0.1
210
+ typing_extensions==4.12.2
211
+ platformdirs==4.2.2
212
+ jaraco.text==3.12.1
213
+ packaging==24.2
214
+ inflect==7.3.1
215
+ jaraco.context==5.3.0
216
+ wheel==0.45.1
217
+ typeguard==4.3.0
218
+ more-itertools==10.3.0
219
+ tomli==2.0.1
220
+ importlib_metadata==8.0.0
221
+ backports.tarfile==1.2.0
222
+ zipp==3.19.2
223
+ jaraco.collections==5.1.0
224
+ autocommand==2.2.2
225
+ jaraco.functools==4.0.1
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/files/wandb-metadata.json ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.0",
4
+ "startedAt": "2025-07-07T10:52:39.364663Z",
5
+ "args": [
6
+ "--devices",
7
+ "0,1,2,3,4,5,6,7",
8
+ "--mode",
9
+ "train",
10
+ "--filename",
11
+ "stage2.5_mol_instruction",
12
+ "--num_query_token",
13
+ "8",
14
+ "--save_every_n_epochs",
15
+ "1",
16
+ "--max_epochs",
17
+ "10",
18
+ "--batch_size",
19
+ "2",
20
+ "--precision",
21
+ "bf16-mixed",
22
+ "--num_workers",
23
+ "8",
24
+ "--plm_model",
25
+ "/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m",
26
+ "--bert_name",
27
+ "/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft",
28
+ "--llm_name",
29
+ "/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300",
30
+ "--llm_tune",
31
+ "mid_lora",
32
+ "--stage1_path",
33
+ "/nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt",
34
+ "--use_wandb_logger"
35
+ ],
36
+ "program": "/nas/shared/kilab/wangyujia/ProtT3/stage2.py",
37
+ "codePath": "stage2.py",
38
+ "email": "gia0603yucca@gmail.com",
39
+ "root": "./all_checkpoints/stage2.5_mol_instruction/",
40
+ "host": "dsw-265304-7f6db6b4bb-g4b9r",
41
+ "executable": "/root/miniconda3/envs/protT3/bin/python",
42
+ "codePathLocal": "stage2.py",
43
+ "cpu_count": 64,
44
+ "cpu_count_logical": 64,
45
+ "gpu": "NVIDIA A800-SXM4-80GB",
46
+ "gpu_count": 8,
47
+ "disk": {
48
+ "/": {
49
+ "total": "1623302262784",
50
+ "used": "1260916736"
51
+ }
52
+ },
53
+ "memory": {
54
+ "total": "549755813888"
55
+ },
56
+ "cpu": {
57
+ "count": 64,
58
+ "countLogical": 64
59
+ },
60
+ "gpu_nvidia": [
61
+ {
62
+ "name": "NVIDIA A800-SXM4-80GB",
63
+ "memoryTotal": "85198045184",
64
+ "architecture": "Ampere"
65
+ },
66
+ {
67
+ "name": "NVIDIA A800-SXM4-80GB",
68
+ "memoryTotal": "85198045184",
69
+ "architecture": "Ampere"
70
+ },
71
+ {
72
+ "name": "NVIDIA A800-SXM4-80GB",
73
+ "memoryTotal": "85198045184",
74
+ "architecture": "Ampere"
75
+ },
76
+ {
77
+ "name": "NVIDIA A800-SXM4-80GB",
78
+ "memoryTotal": "85198045184",
79
+ "architecture": "Ampere"
80
+ },
81
+ {
82
+ "name": "NVIDIA A800-SXM4-80GB",
83
+ "memoryTotal": "85198045184",
84
+ "architecture": "Ampere"
85
+ },
86
+ {
87
+ "name": "NVIDIA A800-SXM4-80GB",
88
+ "memoryTotal": "85198045184",
89
+ "architecture": "Ampere"
90
+ },
91
+ {
92
+ "name": "NVIDIA A800-SXM4-80GB",
93
+ "memoryTotal": "85198045184",
94
+ "architecture": "Ampere"
95
+ },
96
+ {
97
+ "name": "NVIDIA A800-SXM4-80GB",
98
+ "memoryTotal": "85198045184",
99
+ "architecture": "Ampere"
100
+ }
101
+ ],
102
+ "cudaVersion": "12.1"
103
+ }
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/logs/debug-internal.log ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {"time":"2025-07-07T18:52:39.366061072+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/logs/debug-core.log"}
2
+ {"time":"2025-07-07T18:52:40.447272944+08:00","level":"INFO","msg":"created new stream","id":"ftp1v3gy"}
3
+ {"time":"2025-07-07T18:52:40.447330666+08:00","level":"INFO","msg":"stream: started","id":"ftp1v3gy"}
4
+ {"time":"2025-07-07T18:52:40.447402585+08:00","level":"INFO","msg":"handler: started","stream_id":"ftp1v3gy"}
5
+ {"time":"2025-07-07T18:52:40.447405198+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"ftp1v3gy"}
6
+ {"time":"2025-07-07T18:52:40.447445082+08:00","level":"INFO","msg":"sender: started","stream_id":"ftp1v3gy"}
7
+ {"time":"2025-07-07T18:52:41.658224222+08:00","level":"INFO","msg":"Starting system monitor"}
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/logs/debug.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-07-07 18:52:39,355 INFO MainThread:115698 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
2
+ 2025-07-07 18:52:39,356 INFO MainThread:115698 [wandb_setup.py:_flush():70] Configure stats pid to 115698
3
+ 2025-07-07 18:52:39,356 INFO MainThread:115698 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
4
+ 2025-07-07 18:52:39,356 INFO MainThread:115698 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
5
+ 2025-07-07 18:52:39,356 INFO MainThread:115698 [wandb_setup.py:_flush():70] Loading settings from environment variables
6
+ 2025-07-07 18:52:39,356 INFO MainThread:115698 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/logs/debug.log
7
+ 2025-07-07 18:52:39,356 INFO MainThread:115698 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/logs/debug-internal.log
8
+ 2025-07-07 18:52:39,356 INFO MainThread:115698 [wandb_init.py:init():852] calling init triggers
9
+ 2025-07-07 18:52:39,356 INFO MainThread:115698 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-07-07 18:52:39,356 INFO MainThread:115698 [wandb_init.py:init():893] starting backend
12
+ 2025-07-07 18:52:39,356 INFO MainThread:115698 [wandb_init.py:init():897] sending inform_init request
13
+ 2025-07-07 18:52:39,357 INFO MainThread:115698 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-07-07 18:52:39,361 INFO MainThread:115698 [wandb_init.py:init():907] backend started and connected
15
+ 2025-07-07 18:52:39,365 INFO MainThread:115698 [wandb_init.py:init():1005] updated telemetry
16
+ 2025-07-07 18:52:39,368 INFO MainThread:115698 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
17
+ 2025-07-07 18:52:41,647 INFO MainThread:115698 [wandb_init.py:init():1104] starting run threads in backend
18
+ 2025-07-07 18:52:41,785 INFO MainThread:115698 [wandb_run.py:_console_start():2573] atexit reg
19
+ 2025-07-07 18:52:41,785 INFO MainThread:115698 [wandb_run.py:_redirect():2421] redirect: wrap_raw
20
+ 2025-07-07 18:52:41,788 INFO MainThread:115698 [wandb_run.py:_redirect():2490] Wrapping output streams.
21
+ 2025-07-07 18:52:41,788 INFO MainThread:115698 [wandb_run.py:_redirect():2513] Redirects installed.
22
+ 2025-07-07 18:52:41,789 INFO MainThread:115698 [wandb_init.py:init():1150] run started, returning control to user process
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/run-ftp1v3gy.wandb ADDED
File without changes
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/files/output.log ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2.5_mol_instruction exists and is not empty.
2
+ Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.
3
+ LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
4
+ [rank: 5] Child process with PID 118872 terminated with code 1. Forcefully terminating all other processes to avoid zombies 🧟
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/files/requirements.txt ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pydantic_core==2.33.2
2
+ psutil==7.0.0
3
+ nvidia-cuda-nvrtc-cu12==12.4.127
4
+ mpmath==1.3.0
5
+ tzdata==2025.2
6
+ contexttimer==0.3.3
7
+ cycler==0.12.1
8
+ python-magic==0.4.27
9
+ pexpect==4.9.0
10
+ sympy==1.13.1
11
+ wrapt==1.17.2
12
+ marisa-trie==1.2.1
13
+ langcodes==3.5.0
14
+ nvidia-nvtx-cu12==12.4.127
15
+ ipython==8.36.0
16
+ opencv-python-headless==4.5.5.64
17
+ MarkupSafe==3.0.2
18
+ jsonschema-specifications==2025.4.1
19
+ wasabi==1.1.3
20
+ blinker==1.9.0
21
+ cfgv==3.4.0
22
+ numpy==2.2.6
23
+ idna==3.10
24
+ nvidia-cufile-cu12==1.11.1.6
25
+ ninja==1.11.1.4
26
+ nvidia-nccl-cu12==2.21.5
27
+ networkx==3.4.2
28
+ certifi==2025.4.26
29
+ deepspeed==0.16.10+b666844f
30
+ pure_eval==0.2.3
31
+ packaging==24.2
32
+ nltk==3.9.1
33
+ contourpy==1.3.2
34
+ pre_commit==4.2.0
35
+ nodeenv==1.9.1
36
+ setuptools==78.1.1
37
+ annotated-types==0.7.0
38
+ multidict==6.4.4
39
+ requests==2.32.3
40
+ tornado==6.5.1
41
+ triton==3.2.0
42
+ pillow==11.2.1
43
+ decord==0.6.0
44
+ shellingham==1.5.4
45
+ streamlit==1.45.1
46
+ pydeck==0.9.1
47
+ confection==0.1.5
48
+ exceptiongroup==1.3.0
49
+ prompt_toolkit==3.0.51
50
+ text-unidecode==1.3
51
+ nvidia-cufft-cu12==11.2.1.3
52
+ antlr4-python3-runtime==4.9.3
53
+ fairscale==0.4.4
54
+ rouge_score==0.1.2
55
+ nvidia-cudnn-cu12==9.1.0.70
56
+ tqdm==4.67.1
57
+ rich==14.0.0
58
+ frozenlist==1.6.0
59
+ webencodings==0.5.1
60
+ altair==5.5.0
61
+ opendatasets==0.1.22
62
+ nvidia-curand-cu12==10.3.5.147
63
+ protobuf==6.31.0
64
+ asttokens==3.0.0
65
+ wheel==0.45.1
66
+ hf-xet==1.1.2
67
+ weasel==0.4.1
68
+ aiosignal==1.3.2
69
+ absl-py==2.2.2
70
+ thinc==8.3.6
71
+ torchvision==0.21.0
72
+ pandas==2.2.3
73
+ fonttools==4.58.0
74
+ bleach==6.2.0
75
+ typing-inspection==0.4.1
76
+ ftfy==6.3.1
77
+ typing_extensions==4.13.2
78
+ nvidia-ml-py==12.575.51
79
+ python-slugify==8.0.4
80
+ lightning-utilities==0.14.3
81
+ py-cpuinfo==9.0.0
82
+ smmap==5.0.2
83
+ regex==2024.11.6
84
+ scikit-image==0.25.2
85
+ iopath==0.1.10
86
+ spacy-legacy==3.0.12
87
+ hjson==3.1.0
88
+ executing==2.2.0
89
+ kiwisolver==1.4.8
90
+ scipy==1.15.3
91
+ aiohappyeyeballs==2.6.1
92
+ toml==0.10.2
93
+ jedi==0.19.2
94
+ GitPython==3.1.44
95
+ ptyprocess==0.7.0
96
+ kaggle==1.7.4.5
97
+ braceexpand==0.1.7
98
+ wcwidth==0.2.13
99
+ nvidia-cuda-runtime-cu12==12.4.127
100
+ pytorch-lightning==2.5.1.post0
101
+ Jinja2==3.1.6
102
+ urllib3==2.4.0
103
+ watchdog==6.0.0
104
+ filelock==3.18.0
105
+ propcache==0.3.1
106
+ torch==2.6.0
107
+ nvidia-cusparse-cu12==12.3.1.170
108
+ cymem==2.0.11
109
+ nvidia-cusolver-cu12==11.6.1.9
110
+ murmurhash==1.0.13
111
+ catalogue==2.0.10
112
+ yarl==1.20.0
113
+ charset-normalizer==3.4.2
114
+ gitdb==4.0.12
115
+ matplotlib==3.10.3
116
+ portalocker==3.1.1
117
+ platformdirs==4.3.8
118
+ async-timeout==5.0.1
119
+ parso==0.8.4
120
+ markdown-it-py==3.0.0
121
+ omegaconf==2.3.0
122
+ cloudpathlib==0.21.1
123
+ nvidia-cusparselt-cu12==0.6.2
124
+ spacy-loggers==1.0.5
125
+ srsly==2.5.1
126
+ identify==2.6.12
127
+ rpds-py==0.25.1
128
+ spacy==3.8.7
129
+ matplotlib-inline==0.1.7
130
+ smart-open==7.1.0
131
+ pydantic==2.11.5
132
+ mdurl==0.1.2
133
+ virtualenv==20.31.2
134
+ pytz==2025.2
135
+ pycocotools==2.0.8
136
+ six==1.17.0
137
+ decorator==5.2.1
138
+ referencing==0.36.2
139
+ sentencepiece==0.2.0
140
+ PyYAML==6.0.2
141
+ pycocoevalcap==1.2
142
+ imageio==2.37.0
143
+ distlib==0.3.9
144
+ pyarrow==20.0.0
145
+ tenacity==9.1.2
146
+ language_data==1.3.0
147
+ nvidia-cuda-cupti-cu12==12.4.127
148
+ blis==1.3.0
149
+ Pygments==2.19.1
150
+ tifffile==2025.5.10
151
+ pyparsing==3.2.3
152
+ cachetools==5.5.2
153
+ safetensors==0.5.3
154
+ attrs==25.3.0
155
+ webdataset==0.2.111
156
+ plotly==6.1.1
157
+ nvidia-cublas-cu12==12.4.5.8
158
+ timm==0.4.12
159
+ torchmetrics==1.7.1
160
+ nvidia-nvjitlink-cu12==12.4.127
161
+ stack-data==0.6.3
162
+ python-dateutil==2.9.0.post0
163
+ lazy_loader==0.4
164
+ traitlets==5.14.3
165
+ einops==0.8.1
166
+ salesforce-lavis==1.0.2
167
+ joblib==1.5.1
168
+ msgpack==1.1.0
169
+ tokenizers==0.21.1
170
+ sentry-sdk==2.29.1
171
+ oss2==2.15.0
172
+ setproctitle==1.3.6
173
+ pip==25.1.1
174
+ cffi==1.17.1
175
+ transformers==4.52.3
176
+ narwhals==1.41.0
177
+ aliyun-python-sdk-core==2.16.0
178
+ jsonschema==4.24.0
179
+ flash-attn==2.7.1.post1
180
+ preshed==3.0.10
181
+ multiprocess==0.70.16
182
+ cryptography==45.0.3
183
+ aliyun-python-sdk-kms==2.16.5
184
+ scikit-learn==1.6.1
185
+ huggingface-hub==0.32.1
186
+ crcmod==1.7
187
+ typer==0.16.0
188
+ web.py==0.62
189
+ docker-pycreds==0.4.0
190
+ xxhash==3.5.0
191
+ bigmodelvis==0.0.1
192
+ datasets==3.6.0
193
+ more-itertools==10.7.0
194
+ yacs==0.1.8
195
+ jmespath==0.10.0
196
+ aiohttp==3.12.2
197
+ opencv-python==4.11.0.86
198
+ pycparser==2.22
199
+ threadpoolctl==3.6.0
200
+ jaraco.functools==4.1.0
201
+ click==8.2.1
202
+ wandb==0.19.11
203
+ opendelta==0.3.2
204
+ pycryptodome==3.23.0
205
+ pathlib==1.0.1
206
+ dill==0.3.8
207
+ fsspec==2025.3.0
208
+ delta-center-client==0.0.4
209
+ cheroot==10.0.1
210
+ typing_extensions==4.12.2
211
+ platformdirs==4.2.2
212
+ jaraco.text==3.12.1
213
+ packaging==24.2
214
+ inflect==7.3.1
215
+ jaraco.context==5.3.0
216
+ wheel==0.45.1
217
+ typeguard==4.3.0
218
+ more-itertools==10.3.0
219
+ tomli==2.0.1
220
+ importlib_metadata==8.0.0
221
+ backports.tarfile==1.2.0
222
+ zipp==3.19.2
223
+ jaraco.collections==5.1.0
224
+ autocommand==2.2.2
225
+ jaraco.functools==4.0.1
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/files/wandb-metadata.json ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.0",
4
+ "startedAt": "2025-07-07T10:54:40.521235Z",
5
+ "args": [
6
+ "--devices",
7
+ "0,1,2,3,4,5,6,7",
8
+ "--mode",
9
+ "train",
10
+ "--filename",
11
+ "stage2.5_mol_instruction",
12
+ "--num_query_token",
13
+ "8",
14
+ "--save_every_n_epochs",
15
+ "1",
16
+ "--max_epochs",
17
+ "10",
18
+ "--batch_size",
19
+ "1",
20
+ "--precision",
21
+ "bf16-mixed",
22
+ "--num_workers",
23
+ "8",
24
+ "--plm_model",
25
+ "/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m",
26
+ "--bert_name",
27
+ "/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft",
28
+ "--llm_name",
29
+ "/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300",
30
+ "--llm_tune",
31
+ "mid_lora",
32
+ "--stage1_path",
33
+ "/nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt",
34
+ "--use_wandb_logger"
35
+ ],
36
+ "program": "/nas/shared/kilab/wangyujia/ProtT3/stage2.py",
37
+ "codePath": "stage2.py",
38
+ "email": "gia0603yucca@gmail.com",
39
+ "root": "./all_checkpoints/stage2.5_mol_instruction/",
40
+ "host": "dsw-265304-7f6db6b4bb-g4b9r",
41
+ "executable": "/root/miniconda3/envs/protT3/bin/python",
42
+ "codePathLocal": "stage2.py",
43
+ "cpu_count": 64,
44
+ "cpu_count_logical": 64,
45
+ "gpu": "NVIDIA A800-SXM4-80GB",
46
+ "gpu_count": 8,
47
+ "disk": {
48
+ "/": {
49
+ "total": "1623302262784",
50
+ "used": "1260920832"
51
+ }
52
+ },
53
+ "memory": {
54
+ "total": "549755813888"
55
+ },
56
+ "cpu": {
57
+ "count": 64,
58
+ "countLogical": 64
59
+ },
60
+ "gpu_nvidia": [
61
+ {
62
+ "name": "NVIDIA A800-SXM4-80GB",
63
+ "memoryTotal": "85198045184",
64
+ "architecture": "Ampere"
65
+ },
66
+ {
67
+ "name": "NVIDIA A800-SXM4-80GB",
68
+ "memoryTotal": "85198045184",
69
+ "architecture": "Ampere"
70
+ },
71
+ {
72
+ "name": "NVIDIA A800-SXM4-80GB",
73
+ "memoryTotal": "85198045184",
74
+ "architecture": "Ampere"
75
+ },
76
+ {
77
+ "name": "NVIDIA A800-SXM4-80GB",
78
+ "memoryTotal": "85198045184",
79
+ "architecture": "Ampere"
80
+ },
81
+ {
82
+ "name": "NVIDIA A800-SXM4-80GB",
83
+ "memoryTotal": "85198045184",
84
+ "architecture": "Ampere"
85
+ },
86
+ {
87
+ "name": "NVIDIA A800-SXM4-80GB",
88
+ "memoryTotal": "85198045184",
89
+ "architecture": "Ampere"
90
+ },
91
+ {
92
+ "name": "NVIDIA A800-SXM4-80GB",
93
+ "memoryTotal": "85198045184",
94
+ "architecture": "Ampere"
95
+ },
96
+ {
97
+ "name": "NVIDIA A800-SXM4-80GB",
98
+ "memoryTotal": "85198045184",
99
+ "architecture": "Ampere"
100
+ }
101
+ ],
102
+ "cudaVersion": "12.1"
103
+ }
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/logs/debug-internal.log ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {"time":"2025-07-07T18:54:40.526100632+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/logs/debug-core.log"}
2
+ {"time":"2025-07-07T18:54:41.559415905+08:00","level":"INFO","msg":"created new stream","id":"p1815hm9"}
3
+ {"time":"2025-07-07T18:54:41.559467213+08:00","level":"INFO","msg":"stream: started","id":"p1815hm9"}
4
+ {"time":"2025-07-07T18:54:41.559497666+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"p1815hm9"}
5
+ {"time":"2025-07-07T18:54:41.559545046+08:00","level":"INFO","msg":"handler: started","stream_id":"p1815hm9"}
6
+ {"time":"2025-07-07T18:54:41.559594522+08:00","level":"INFO","msg":"sender: started","stream_id":"p1815hm9"}
7
+ {"time":"2025-07-07T18:54:42.859018211+08:00","level":"INFO","msg":"Starting system monitor"}
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/logs/debug.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-07-07 18:54:40,510 INFO MainThread:118453 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
2
+ 2025-07-07 18:54:40,510 INFO MainThread:118453 [wandb_setup.py:_flush():70] Configure stats pid to 118453
3
+ 2025-07-07 18:54:40,511 INFO MainThread:118453 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
4
+ 2025-07-07 18:54:40,511 INFO MainThread:118453 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
5
+ 2025-07-07 18:54:40,511 INFO MainThread:118453 [wandb_setup.py:_flush():70] Loading settings from environment variables
6
+ 2025-07-07 18:54:40,511 INFO MainThread:118453 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/logs/debug.log
7
+ 2025-07-07 18:54:40,511 INFO MainThread:118453 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/logs/debug-internal.log
8
+ 2025-07-07 18:54:40,511 INFO MainThread:118453 [wandb_init.py:init():852] calling init triggers
9
+ 2025-07-07 18:54:40,511 INFO MainThread:118453 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-07-07 18:54:40,511 INFO MainThread:118453 [wandb_init.py:init():893] starting backend
12
+ 2025-07-07 18:54:40,511 INFO MainThread:118453 [wandb_init.py:init():897] sending inform_init request
13
+ 2025-07-07 18:54:40,512 INFO MainThread:118453 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-07-07 18:54:40,521 INFO MainThread:118453 [wandb_init.py:init():907] backend started and connected
15
+ 2025-07-07 18:54:40,522 INFO MainThread:118453 [wandb_init.py:init():1005] updated telemetry
16
+ 2025-07-07 18:54:40,523 INFO MainThread:118453 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
17
+ 2025-07-07 18:54:42,809 INFO MainThread:118453 [wandb_init.py:init():1104] starting run threads in backend
18
+ 2025-07-07 18:54:42,991 INFO MainThread:118453 [wandb_run.py:_console_start():2573] atexit reg
19
+ 2025-07-07 18:54:42,991 INFO MainThread:118453 [wandb_run.py:_redirect():2421] redirect: wrap_raw
20
+ 2025-07-07 18:54:42,995 INFO MainThread:118453 [wandb_run.py:_redirect():2490] Wrapping output streams.
21
+ 2025-07-07 18:54:42,999 INFO MainThread:118453 [wandb_run.py:_redirect():2513] Redirects installed.
22
+ 2025-07-07 18:54:43,000 INFO MainThread:118453 [wandb_init.py:init():1150] run started, returning control to user process
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/run-p1815hm9.wandb ADDED
File without changes
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_190145-vu5mgolt/files/output.log ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2.5_mol_instruction exists and is not empty.
2
+ Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.
3
+ LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
4
+ [rank: 5] Child process with PID 122036 terminated with code 1. Forcefully terminating all other processes to avoid zombies 🧟
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_190145-vu5mgolt/files/requirements.txt ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pydantic_core==2.33.2
2
+ psutil==7.0.0
3
+ nvidia-cuda-nvrtc-cu12==12.4.127
4
+ mpmath==1.3.0
5
+ tzdata==2025.2
6
+ contexttimer==0.3.3
7
+ cycler==0.12.1
8
+ python-magic==0.4.27
9
+ pexpect==4.9.0
10
+ sympy==1.13.1
11
+ wrapt==1.17.2
12
+ marisa-trie==1.2.1
13
+ langcodes==3.5.0
14
+ nvidia-nvtx-cu12==12.4.127
15
+ ipython==8.36.0
16
+ opencv-python-headless==4.5.5.64
17
+ MarkupSafe==3.0.2
18
+ jsonschema-specifications==2025.4.1
19
+ wasabi==1.1.3
20
+ blinker==1.9.0
21
+ cfgv==3.4.0
22
+ numpy==2.2.6
23
+ idna==3.10
24
+ nvidia-cufile-cu12==1.11.1.6
25
+ ninja==1.11.1.4
26
+ nvidia-nccl-cu12==2.21.5
27
+ networkx==3.4.2
28
+ certifi==2025.4.26
29
+ deepspeed==0.16.10+b666844f
30
+ pure_eval==0.2.3
31
+ packaging==24.2
32
+ nltk==3.9.1
33
+ contourpy==1.3.2
34
+ pre_commit==4.2.0
35
+ nodeenv==1.9.1
36
+ setuptools==78.1.1
37
+ annotated-types==0.7.0
38
+ multidict==6.4.4
39
+ requests==2.32.3
40
+ tornado==6.5.1
41
+ triton==3.2.0
42
+ pillow==11.2.1
43
+ decord==0.6.0
44
+ shellingham==1.5.4
45
+ streamlit==1.45.1
46
+ pydeck==0.9.1
47
+ confection==0.1.5
48
+ exceptiongroup==1.3.0
49
+ prompt_toolkit==3.0.51
50
+ text-unidecode==1.3
51
+ nvidia-cufft-cu12==11.2.1.3
52
+ antlr4-python3-runtime==4.9.3
53
+ fairscale==0.4.4
54
+ rouge_score==0.1.2
55
+ nvidia-cudnn-cu12==9.1.0.70
56
+ tqdm==4.67.1
57
+ rich==14.0.0
58
+ frozenlist==1.6.0
59
+ webencodings==0.5.1
60
+ altair==5.5.0
61
+ opendatasets==0.1.22
62
+ nvidia-curand-cu12==10.3.5.147
63
+ protobuf==6.31.0
64
+ asttokens==3.0.0
65
+ wheel==0.45.1
66
+ hf-xet==1.1.2
67
+ weasel==0.4.1
68
+ aiosignal==1.3.2
69
+ absl-py==2.2.2
70
+ thinc==8.3.6
71
+ torchvision==0.21.0
72
+ pandas==2.2.3
73
+ fonttools==4.58.0
74
+ bleach==6.2.0
75
+ typing-inspection==0.4.1
76
+ ftfy==6.3.1
77
+ typing_extensions==4.13.2
78
+ nvidia-ml-py==12.575.51
79
+ python-slugify==8.0.4
80
+ lightning-utilities==0.14.3
81
+ py-cpuinfo==9.0.0
82
+ smmap==5.0.2
83
+ regex==2024.11.6
84
+ scikit-image==0.25.2
85
+ iopath==0.1.10
86
+ spacy-legacy==3.0.12
87
+ hjson==3.1.0
88
+ executing==2.2.0
89
+ kiwisolver==1.4.8
90
+ scipy==1.15.3
91
+ aiohappyeyeballs==2.6.1
92
+ toml==0.10.2
93
+ jedi==0.19.2
94
+ GitPython==3.1.44
95
+ ptyprocess==0.7.0
96
+ kaggle==1.7.4.5
97
+ braceexpand==0.1.7
98
+ wcwidth==0.2.13
99
+ nvidia-cuda-runtime-cu12==12.4.127
100
+ pytorch-lightning==2.5.1.post0
101
+ Jinja2==3.1.6
102
+ urllib3==2.4.0
103
+ watchdog==6.0.0
104
+ filelock==3.18.0
105
+ propcache==0.3.1
106
+ torch==2.6.0
107
+ nvidia-cusparse-cu12==12.3.1.170
108
+ cymem==2.0.11
109
+ nvidia-cusolver-cu12==11.6.1.9
110
+ murmurhash==1.0.13
111
+ catalogue==2.0.10
112
+ yarl==1.20.0
113
+ charset-normalizer==3.4.2
114
+ gitdb==4.0.12
115
+ matplotlib==3.10.3
116
+ portalocker==3.1.1
117
+ platformdirs==4.3.8
118
+ async-timeout==5.0.1
119
+ parso==0.8.4
120
+ markdown-it-py==3.0.0
121
+ omegaconf==2.3.0
122
+ cloudpathlib==0.21.1
123
+ nvidia-cusparselt-cu12==0.6.2
124
+ spacy-loggers==1.0.5
125
+ srsly==2.5.1
126
+ identify==2.6.12
127
+ rpds-py==0.25.1
128
+ spacy==3.8.7
129
+ matplotlib-inline==0.1.7
130
+ smart-open==7.1.0
131
+ pydantic==2.11.5
132
+ mdurl==0.1.2
133
+ virtualenv==20.31.2
134
+ pytz==2025.2
135
+ pycocotools==2.0.8
136
+ six==1.17.0
137
+ decorator==5.2.1
138
+ referencing==0.36.2
139
+ sentencepiece==0.2.0
140
+ PyYAML==6.0.2
141
+ pycocoevalcap==1.2
142
+ imageio==2.37.0
143
+ distlib==0.3.9
144
+ pyarrow==20.0.0
145
+ tenacity==9.1.2
146
+ language_data==1.3.0
147
+ nvidia-cuda-cupti-cu12==12.4.127
148
+ blis==1.3.0
149
+ Pygments==2.19.1
150
+ tifffile==2025.5.10
151
+ pyparsing==3.2.3
152
+ cachetools==5.5.2
153
+ safetensors==0.5.3
154
+ attrs==25.3.0
155
+ webdataset==0.2.111
156
+ plotly==6.1.1
157
+ nvidia-cublas-cu12==12.4.5.8
158
+ timm==0.4.12
159
+ torchmetrics==1.7.1
160
+ nvidia-nvjitlink-cu12==12.4.127
161
+ stack-data==0.6.3
162
+ python-dateutil==2.9.0.post0
163
+ lazy_loader==0.4
164
+ traitlets==5.14.3
165
+ einops==0.8.1
166
+ salesforce-lavis==1.0.2
167
+ joblib==1.5.1
168
+ msgpack==1.1.0
169
+ tokenizers==0.21.1
170
+ sentry-sdk==2.29.1
171
+ oss2==2.15.0
172
+ setproctitle==1.3.6
173
+ pip==25.1.1
174
+ cffi==1.17.1
175
+ transformers==4.52.3
176
+ narwhals==1.41.0
177
+ aliyun-python-sdk-core==2.16.0
178
+ jsonschema==4.24.0
179
+ flash-attn==2.7.1.post1
180
+ preshed==3.0.10
181
+ multiprocess==0.70.16
182
+ cryptography==45.0.3
183
+ aliyun-python-sdk-kms==2.16.5
184
+ scikit-learn==1.6.1
185
+ huggingface-hub==0.32.1
186
+ crcmod==1.7
187
+ typer==0.16.0
188
+ web.py==0.62
189
+ docker-pycreds==0.4.0
190
+ xxhash==3.5.0
191
+ bigmodelvis==0.0.1
192
+ datasets==3.6.0
193
+ more-itertools==10.7.0
194
+ yacs==0.1.8
195
+ jmespath==0.10.0
196
+ aiohttp==3.12.2
197
+ opencv-python==4.11.0.86
198
+ pycparser==2.22
199
+ threadpoolctl==3.6.0
200
+ jaraco.functools==4.1.0
201
+ click==8.2.1
202
+ wandb==0.19.11
203
+ opendelta==0.3.2
204
+ pycryptodome==3.23.0
205
+ pathlib==1.0.1
206
+ dill==0.3.8
207
+ fsspec==2025.3.0
208
+ delta-center-client==0.0.4
209
+ cheroot==10.0.1
210
+ typing_extensions==4.12.2
211
+ platformdirs==4.2.2
212
+ jaraco.text==3.12.1
213
+ packaging==24.2
214
+ inflect==7.3.1
215
+ jaraco.context==5.3.0
216
+ wheel==0.45.1
217
+ typeguard==4.3.0
218
+ more-itertools==10.3.0
219
+ tomli==2.0.1
220
+ importlib_metadata==8.0.0
221
+ backports.tarfile==1.2.0
222
+ zipp==3.19.2
223
+ jaraco.collections==5.1.0
224
+ autocommand==2.2.2
225
+ jaraco.functools==4.0.1
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_190145-vu5mgolt/run-vu5mgolt.wandb ADDED
File without changes