yuccaaa commited on
Commit
0ecd035
·
verified ·
1 Parent(s): d3931d8

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. BIO/ablation/FLIP_GB1_sampled.jsonl +0 -0
  2. BIO/ablation/ProtSolM.jsonl +0 -0
  3. BIO/ablation/TAPE_Stability.jsonl +0 -0
  4. BIO/ablation/antibiotic_resistance.jsonl +339 -0
  5. BIO/ablation/cloning_clf.jsonl +0 -0
  6. BIO/ablation/enzyme_commission_number.jsonl +0 -0
  7. BIO/ablation/fluorescence_prediction_test.jsonl +0 -0
  8. BIO/ablation/material_production.jsonl +0 -0
  9. BIO/ablation/material_production_test.jsonl +0 -0
  10. BIO/ablation/metal_ion_binding.jsonl +718 -0
  11. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_200229-yex1pcwt/files/output.log +35 -0
  12. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_200229-yex1pcwt/files/requirements.txt +225 -0
  13. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_200229-yex1pcwt/files/wandb-metadata.json +103 -0
  14. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_200229-yex1pcwt/files/wandb-summary.json +1 -0
  15. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_200229-yex1pcwt/logs/debug-internal.log +17 -0
  16. ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_200229-yex1pcwt/logs/debug.log +24 -0
  17. ProtT3/all_checkpoints/stage2_07021249/wandb/debug-internal.log +17 -0
  18. ProtT3/all_checkpoints/stage2_07021249/wandb/debug.log +24 -0
  19. ProtT3/all_checkpoints/stage2_07021249/wandb/run-20250702_130101-ttc1macp/files/config.yaml +222 -0
  20. ProtT3/all_checkpoints/stage2_07021249/wandb/run-20250702_130101-ttc1macp/files/output.log +35 -0
  21. ProtT3/all_checkpoints/stage2_07021249/wandb/run-20250702_130101-ttc1macp/files/requirements.txt +225 -0
  22. ProtT3/all_checkpoints/stage2_07021249/wandb/run-20250702_130101-ttc1macp/files/wandb-metadata.json +104 -0
  23. ProtT3/all_checkpoints/stage2_07021249/wandb/run-20250702_130101-ttc1macp/files/wandb-summary.json +1 -0
  24. ProtT3/all_checkpoints/stage2_07021249/wandb/run-20250702_130101-ttc1macp/logs/debug-internal.log +17 -0
  25. ProtT3/all_checkpoints/stage2_07021249/wandb/run-20250702_130101-ttc1macp/logs/debug.log +24 -0
  26. ProtT3/all_checkpoints/stage2_07041521/wandb/debug-internal.log +95 -0
  27. ProtT3/all_checkpoints/stage2_07041521/wandb/debug.log +24 -0
  28. ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_152253-wgyb9m42/files/config.yaml +150 -0
  29. ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_152253-wgyb9m42/files/output.log +131 -0
  30. ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_152253-wgyb9m42/files/requirements.txt +225 -0
  31. ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_152253-wgyb9m42/files/wandb-metadata.json +104 -0
  32. ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_152253-wgyb9m42/files/wandb-summary.json +1 -0
  33. ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_152253-wgyb9m42/logs/debug-internal.log +15 -0
  34. ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_152253-wgyb9m42/logs/debug.log +24 -0
  35. ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_152253-wgyb9m42/run-wgyb9m42.wandb +0 -0
  36. ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153023-ggclx68a/files/config.yaml +150 -0
  37. ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153023-ggclx68a/files/output.log +116 -0
  38. ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153023-ggclx68a/files/requirements.txt +225 -0
  39. ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153023-ggclx68a/files/wandb-metadata.json +104 -0
  40. ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153023-ggclx68a/files/wandb-summary.json +1 -0
  41. ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153023-ggclx68a/logs/debug-internal.log +15 -0
  42. ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153023-ggclx68a/logs/debug.log +24 -0
  43. ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153023-ggclx68a/run-ggclx68a.wandb +0 -0
  44. ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153250-690krh73/files/config.yaml +150 -0
  45. ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153250-690krh73/files/output.log +191 -0
  46. ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153250-690krh73/files/requirements.txt +225 -0
  47. ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153250-690krh73/files/wandb-metadata.json +104 -0
  48. ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153250-690krh73/files/wandb-summary.json +1 -0
  49. ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153250-690krh73/logs/debug-internal.log +15 -0
  50. ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153250-690krh73/run-690krh73.wandb +0 -0
BIO/ablation/FLIP_GB1_sampled.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
BIO/ablation/ProtSolM.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
BIO/ablation/TAPE_Stability.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
BIO/ablation/antibiotic_resistance.jsonl ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
2
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
3
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
4
+ {"reference_answer": "7", "generated_answer": "<answer>15</answer>"}
5
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
6
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
7
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
8
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
9
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
10
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
11
+ {"reference_answer": "4", "generated_answer": "<answer>15</answer>"}
12
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
13
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
14
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
15
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
16
+ {"reference_answer": "8", "generated_answer": "<answer>15</answer>"}
17
+ {"reference_answer": "9", "generated_answer": "<answer>15</answer>"}
18
+ {"reference_answer": "8", "generated_answer": "<answer>15</answer>"}
19
+ {"reference_answer": "14", "generated_answer": "<answer>15</answer>"}
20
+ {"reference_answer": "5", "generated_answer": "<answer>15</answer>"}
21
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
22
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
23
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
24
+ {"reference_answer": "5", "generated_answer": "<answer>15</answer>"}
25
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
26
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
27
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
28
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
29
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
30
+ {"reference_answer": "12", "generated_answer": "<answer>15</answer>"}
31
+ {"reference_answer": "12", "generated_answer": "<answer>15</answer>"}
32
+ {"reference_answer": "12", "generated_answer": "<answer>15</answer>"}
33
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
34
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
35
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
36
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
37
+ {"reference_answer": "14", "generated_answer": "<answer>15</answer>"}
38
+ {"reference_answer": "17", "generated_answer": "<answer>15</answer>"}
39
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
40
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
41
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
42
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
43
+ {"reference_answer": "10", "generated_answer": "<answer>15</answer>"}
44
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
45
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
46
+ {"reference_answer": "12", "generated_answer": "<answer>15</answer>"}
47
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
48
+ {"reference_answer": "14", "generated_answer": "<answer>15</answer>"}
49
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
50
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
51
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
52
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
53
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
54
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
55
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
56
+ {"reference_answer": "16", "generated_answer": "<answer>15</answer>"}
57
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
58
+ {"reference_answer": "12", "generated_answer": "<answer>15</answer>"}
59
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
60
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
61
+ {"reference_answer": "5", "generated_answer": "<answer>15</answer>"}
62
+ {"reference_answer": "9", "generated_answer": "<answer>15</answer>"}
63
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
64
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
65
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
66
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
67
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
68
+ {"reference_answer": "9", "generated_answer": "<answer>15</answer>"}
69
+ {"reference_answer": "1", "generated_answer": "<answer>15</answer>"}
70
+ {"reference_answer": "9", "generated_answer": "<answer>15</answer>"}
71
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
72
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
73
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
74
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
75
+ {"reference_answer": "5", "generated_answer": "<answer>15</answer>"}
76
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
77
+ {"reference_answer": "4", "generated_answer": "<answer>15</answer>"}
78
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
79
+ {"reference_answer": "10", "generated_answer": "<answer>15</answer>"}
80
+ {"reference_answer": "10", "generated_answer": "<answer>15</answer>"}
81
+ {"reference_answer": "14", "generated_answer": "<answer>15</answer>"}
82
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
83
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
84
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
85
+ {"reference_answer": "10", "generated_answer": "<answer>15</answer>"}
86
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
87
+ {"reference_answer": "12", "generated_answer": "<answer>15</answer>"}
88
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
89
+ {"reference_answer": "12", "generated_answer": "<answer>1</answer>"}
90
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
91
+ {"reference_answer": "10", "generated_answer": "<answer>15</answer>"}
92
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
93
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
94
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
95
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
96
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
97
+ {"reference_answer": "4", "generated_answer": "<answer>15</answer>"}
98
+ {"reference_answer": "15", "generated_answer": "<answer>14</answer>"}
99
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
100
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
101
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
102
+ {"reference_answer": "10", "generated_answer": "<answer>15</answer>"}
103
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
104
+ {"reference_answer": "14", "generated_answer": "<answer>15</answer>"}
105
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
106
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
107
+ {"reference_answer": "10", "generated_answer": "<answer>15</answer>"}
108
+ {"reference_answer": "17", "generated_answer": "<answer>14</answer>"}
109
+ {"reference_answer": "7", "generated_answer": "<answer>15</answer>"}
110
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
111
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
112
+ {"reference_answer": "10", "generated_answer": "<answer>15</answer>"}
113
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
114
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
115
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
116
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
117
+ {"reference_answer": "12", "generated_answer": "<answer>15</answer>"}
118
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
119
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
120
+ {"reference_answer": "5", "generated_answer": "<answer>15</answer>"}
121
+ {"reference_answer": "14", "generated_answer": "<answer>15</answer>"}
122
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
123
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
124
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
125
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
126
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
127
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
128
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
129
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
130
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
131
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
132
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
133
+ {"reference_answer": "17", "generated_answer": "<answer>15</answer>"}
134
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
135
+ {"reference_answer": "17", "generated_answer": "<answer>15</answer>"}
136
+ {"reference_answer": "5", "generated_answer": "<answer>15</answer>"}
137
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
138
+ {"reference_answer": "7", "generated_answer": "<answer>15</answer>"}
139
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
140
+ {"reference_answer": "10", "generated_answer": "<answer>15</answer>"}
141
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
142
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
143
+ {"reference_answer": "7", "generated_answer": "<answer>15</answer>"}
144
+ {"reference_answer": "16", "generated_answer": "<answer>15</answer>"}
145
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
146
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
147
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
148
+ {"reference_answer": "14", "generated_answer": "<answer>15</answer>"}
149
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
150
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
151
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
152
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
153
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
154
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
155
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
156
+ {"reference_answer": "10", "generated_answer": "<answer>15</answer>"}
157
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
158
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
159
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
160
+ {"reference_answer": "5", "generated_answer": "<answer>15</answer>"}
161
+ {"reference_answer": "14", "generated_answer": "<answer>15</answer>"}
162
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
163
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
164
+ {"reference_answer": "4", "generated_answer": "<answer>15</answer>"}
165
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
166
+ {"reference_answer": "14", "generated_answer": "<answer>15</answer>"}
167
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
168
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
169
+ {"reference_answer": "10", "generated_answer": "<answer>15</answer>"}
170
+ {"reference_answer": "10", "generated_answer": "<answer>15</answer>"}
171
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
172
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
173
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
174
+ {"reference_answer": "16", "generated_answer": "<answer>15</answer>"}
175
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
176
+ {"reference_answer": "12", "generated_answer": "<answer>15</answer>"}
177
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
178
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
179
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
180
+ {"reference_answer": "14", "generated_answer": "<answer>15</answer>"}
181
+ {"reference_answer": "5", "generated_answer": "<answer>15</answer>"}
182
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
183
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
184
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
185
+ {"reference_answer": "1", "generated_answer": "<answer>15</answer>"}
186
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
187
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
188
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
189
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
190
+ {"reference_answer": "5", "generated_answer": "<answer>15</answer>"}
191
+ {"reference_answer": "12", "generated_answer": "<answer>15</answer>"}
192
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
193
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
194
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
195
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
196
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
197
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
198
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
199
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
200
+ {"reference_answer": "7", "generated_answer": "<answer>15</answer>"}
201
+ {"reference_answer": "14", "generated_answer": "<answer>15</answer>"}
202
+ {"reference_answer": "4", "generated_answer": "<answer>15</answer>"}
203
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
204
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
205
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
206
+ {"reference_answer": "12", "generated_answer": "<answer>15</answer>"}
207
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
208
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
209
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
210
+ {"reference_answer": "5", "generated_answer": "<answer>15</answer>"}
211
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
212
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
213
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
214
+ {"reference_answer": "10", "generated_answer": "<answer>15</answer>"}
215
+ {"reference_answer": "5", "generated_answer": "<answer>15</answer>"}
216
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
217
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
218
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
219
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
220
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
221
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
222
+ {"reference_answer": "4", "generated_answer": "<answer>15</answer>"}
223
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
224
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
225
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
226
+ {"reference_answer": "5", "generated_answer": "<answer>15</answer>"}
227
+ {"reference_answer": "1", "generated_answer": "<answer>15</answer>"}
228
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
229
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
230
+ {"reference_answer": "12", "generated_answer": "<answer>15</answer>"}
231
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
232
+ {"reference_answer": "16", "generated_answer": "<answer>15</answer>"}
233
+ {"reference_answer": "10", "generated_answer": "<answer>15</answer>"}
234
+ {"reference_answer": "12", "generated_answer": "<answer>15</answer>"}
235
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
236
+ {"reference_answer": "9", "generated_answer": "<answer>15</answer>"}
237
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
238
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
239
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
240
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
241
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
242
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
243
+ {"reference_answer": "10", "generated_answer": "<answer>15</answer>"}
244
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
245
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
246
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
247
+ {"reference_answer": "12", "generated_answer": "<answer>15</answer>"}
248
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
249
+ {"reference_answer": "12", "generated_answer": "<answer>15</answer>"}
250
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
251
+ {"reference_answer": "7", "generated_answer": "<answer>15</answer>"}
252
+ {"reference_answer": "1", "generated_answer": "<answer>15</answer>"}
253
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
254
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
255
+ {"reference_answer": "5", "generated_answer": "<answer>14</answer>"}
256
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
257
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
258
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
259
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
260
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
261
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
262
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
263
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
264
+ {"reference_answer": "12", "generated_answer": "<answer>15</answer>"}
265
+ {"reference_answer": "12", "generated_answer": "<answer>15</answer>"}
266
+ {"reference_answer": "17", "generated_answer": "<answer>15</answer>"}
267
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
268
+ {"reference_answer": "14", "generated_answer": "<answer>15</answer>"}
269
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
270
+ {"reference_answer": "9", "generated_answer": "<answer>15</answer>"}
271
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
272
+ {"reference_answer": "5", "generated_answer": "<answer>15</answer>"}
273
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
274
+ {"reference_answer": "12", "generated_answer": "<answer>15</answer>"}
275
+ {"reference_answer": "9", "generated_answer": "<answer>15</answer>"}
276
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
277
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
278
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
279
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
280
+ {"reference_answer": "12", "generated_answer": "<answer>15</answer>"}
281
+ {"reference_answer": "9", "generated_answer": "<answer>15</answer>"}
282
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
283
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
284
+ {"reference_answer": "5", "generated_answer": "<answer>15</answer>"}
285
+ {"reference_answer": "1", "generated_answer": "<answer>15</answer>"}
286
+ {"reference_answer": "16", "generated_answer": "<answer>15</answer>"}
287
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
288
+ {"reference_answer": "12", "generated_answer": "<answer>15</answer>"}
289
+ {"reference_answer": "14", "generated_answer": "<answer>15</answer>"}
290
+ {"reference_answer": "14", "generated_answer": "<answer>15</answer>"}
291
+ {"reference_answer": "5", "generated_answer": "<answer>15</answer>"}
292
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
293
+ {"reference_answer": "14", "generated_answer": "<answer>14</answer>"}
294
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
295
+ {"reference_answer": "14", "generated_answer": "<answer>15</answer>"}
296
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
297
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
298
+ {"reference_answer": "5", "generated_answer": "<answer>15</answer>"}
299
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
300
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
301
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
302
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
303
+ {"reference_answer": "4", "generated_answer": "<answer>15</answer>"}
304
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
305
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
306
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
307
+ {"reference_answer": "15", "generated_answer": "<answer>14</answer>"}
308
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
309
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
310
+ {"reference_answer": "12", "generated_answer": "<answer>15</answer>"}
311
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
312
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
313
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
314
+ {"reference_answer": "10", "generated_answer": "<answer>15</answer>"}
315
+ {"reference_answer": "12", "generated_answer": "<answer>15</answer>"}
316
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
317
+ {"reference_answer": "9", "generated_answer": "<answer>15</answer>"}
318
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
319
+ {"reference_answer": "7", "generated_answer": "<answer>15</answer>"}
320
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
321
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
322
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
323
+ {"reference_answer": "5", "generated_answer": "<answer>15</answer>"}
324
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
325
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
326
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
327
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
328
+ {"reference_answer": "16", "generated_answer": "<answer>15</answer>"}
329
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
330
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
331
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
332
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
333
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
334
+ {"reference_answer": "4", "generated_answer": "<answer>15</answer>"}
335
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
336
+ {"reference_answer": "15", "generated_answer": "<answer>15</answer>"}
337
+ {"reference_answer": "11", "generated_answer": "<answer>15</answer>"}
338
+ {"reference_answer": "5", "generated_answer": "<answer>15</answer>"}
339
+ {"reference_answer": "2", "generated_answer": "<answer>15</answer>"}
BIO/ablation/cloning_clf.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
BIO/ablation/enzyme_commission_number.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
BIO/ablation/fluorescence_prediction_test.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
BIO/ablation/material_production.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
BIO/ablation/material_production_test.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
BIO/ablation/metal_ion_binding.jsonl ADDED
@@ -0,0 +1,718 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
2
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
3
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
4
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
5
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
6
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
7
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
8
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
9
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
10
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
11
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
12
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
13
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
14
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
15
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
16
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
17
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
18
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
19
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
20
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
21
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
22
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
23
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
24
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
25
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
26
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
27
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
28
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
29
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
30
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
31
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
32
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
33
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
34
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
35
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
36
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
37
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
38
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
39
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
40
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
41
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
42
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
43
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
44
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
45
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
46
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
47
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
48
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
49
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
50
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
51
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
52
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
53
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
54
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
55
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
56
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
57
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
58
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
59
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
60
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
61
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
62
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
63
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
64
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
65
+ {"reference_answer": "0", "generated_answer": "<answer>0</answer>"}
66
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
67
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
68
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
69
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
70
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
71
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
72
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
73
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
74
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
75
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
76
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
77
+ {"reference_answer": "0", "generated_answer": "<answer>0</answer>"}
78
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
79
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
80
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
81
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
82
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
83
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
84
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
85
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
86
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
87
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
88
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
89
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
90
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
91
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
92
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
93
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
94
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
95
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
96
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
97
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
98
+ {"reference_answer": "1", "generated_answer": "<answer>0</answer>"}
99
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
100
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
101
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
102
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
103
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
104
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
105
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
106
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
107
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
108
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
109
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
110
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
111
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
112
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
113
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
114
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
115
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
116
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
117
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
118
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
119
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
120
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
121
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
122
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
123
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
124
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
125
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
126
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
127
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
128
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
129
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
130
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
131
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
132
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
133
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
134
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
135
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
136
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
137
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
138
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
139
+ {"reference_answer": "0", "generated_answer": "<answer>0</answer>"}
140
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
141
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
142
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
143
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
144
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
145
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
146
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
147
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
148
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
149
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
150
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
151
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
152
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
153
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
154
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
155
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
156
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
157
+ {"reference_answer": "0", "generated_answer": "<answer>0</answer>"}
158
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
159
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
160
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
161
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
162
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
163
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
164
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
165
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
166
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
167
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
168
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
169
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
170
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
171
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
172
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
173
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
174
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
175
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
176
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
177
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
178
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
179
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
180
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
181
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
182
+ {"reference_answer": "0", "generated_answer": "<answer>0</answer>"}
183
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
184
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
185
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
186
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
187
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
188
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
189
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
190
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
191
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
192
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
193
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
194
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
195
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
196
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
197
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
198
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
199
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
200
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
201
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
202
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
203
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
204
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
205
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
206
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
207
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
208
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
209
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
210
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
211
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
212
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
213
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
214
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
215
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
216
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
217
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
218
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
219
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
220
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
221
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
222
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
223
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
224
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
225
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
226
+ {"reference_answer": "0", "generated_answer": "<answer>0</answer>"}
227
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
228
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
229
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
230
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
231
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
232
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
233
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
234
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
235
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
236
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
237
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
238
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
239
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
240
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
241
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
242
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
243
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
244
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
245
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
246
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
247
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
248
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
249
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
250
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
251
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
252
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
253
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
254
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
255
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
256
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
257
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
258
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
259
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
260
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
261
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
262
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
263
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
264
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
265
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
266
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
267
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
268
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
269
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
270
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
271
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
272
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
273
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
274
+ {"reference_answer": "0", "generated_answer": "<answer>0</answer>"}
275
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
276
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
277
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
278
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
279
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
280
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
281
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
282
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
283
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
284
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
285
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
286
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
287
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
288
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
289
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
290
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
291
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
292
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
293
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
294
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
295
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
296
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
297
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
298
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
299
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
300
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
301
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
302
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
303
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
304
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
305
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
306
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
307
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
308
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
309
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
310
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
311
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
312
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
313
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
314
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
315
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
316
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
317
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
318
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
319
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
320
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
321
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
322
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
323
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
324
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
325
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
326
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
327
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
328
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
329
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
330
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
331
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
332
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
333
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
334
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
335
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
336
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
337
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
338
+ {"reference_answer": "0", "generated_answer": "<answer>0</answer>"}
339
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
340
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
341
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
342
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
343
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
344
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
345
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
346
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
347
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
348
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
349
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
350
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
351
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
352
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
353
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
354
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
355
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
356
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
357
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
358
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
359
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
360
+ {"reference_answer": "0", "generated_answer": "<answer>0</answer>"}
361
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
362
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
363
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
364
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
365
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
366
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
367
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
368
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
369
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
370
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
371
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
372
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
373
+ {"reference_answer": "1", "generated_answer": "<answer>0</answer>"}
374
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
375
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
376
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
377
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
378
+ {"reference_answer": "0", "generated_answer": "<answer>0</answer>"}
379
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
380
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
381
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
382
+ {"reference_answer": "0", "generated_answer": "<answer>0</answer>"}
383
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
384
+ {"reference_answer": "0", "generated_answer": "<answer>0</answer>"}
385
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
386
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
387
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
388
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
389
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
390
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
391
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
392
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
393
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
394
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
395
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
396
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
397
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
398
+ {"reference_answer": "0", "generated_answer": "<answer>0</answer>"}
399
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
400
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
401
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
402
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
403
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
404
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
405
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
406
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
407
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
408
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
409
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
410
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
411
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
412
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
413
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
414
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
415
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
416
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
417
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
418
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
419
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
420
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
421
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
422
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
423
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
424
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
425
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
426
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
427
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
428
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
429
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
430
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
431
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
432
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
433
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
434
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
435
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
436
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
437
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
438
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
439
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
440
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
441
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
442
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
443
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
444
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
445
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
446
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
447
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
448
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
449
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
450
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
451
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
452
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
453
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
454
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
455
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
456
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
457
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
458
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
459
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
460
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
461
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
462
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
463
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
464
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
465
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
466
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
467
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
468
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
469
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
470
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
471
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
472
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
473
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
474
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
475
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
476
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
477
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
478
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
479
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
480
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
481
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
482
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
483
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
484
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
485
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
486
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
487
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
488
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
489
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
490
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
491
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
492
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
493
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
494
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
495
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
496
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
497
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
498
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
499
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
500
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
501
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
502
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
503
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
504
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
505
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
506
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
507
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
508
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
509
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
510
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
511
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
512
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
513
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
514
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
515
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
516
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
517
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
518
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
519
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
520
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
521
+ {"reference_answer": "0", "generated_answer": "<answer>0</answer>"}
522
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
523
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
524
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
525
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
526
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
527
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
528
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
529
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
530
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
531
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
532
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
533
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
534
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
535
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
536
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
537
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
538
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
539
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
540
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
541
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
542
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
543
+ {"reference_answer": "0", "generated_answer": "<answer>0</answer>"}
544
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
545
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
546
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
547
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
548
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
549
+ {"reference_answer": "0", "generated_answer": "<answer>0</answer>"}
550
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
551
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
552
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
553
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
554
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
555
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
556
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
557
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
558
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
559
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
560
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
561
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
562
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
563
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
564
+ {"reference_answer": "1", "generated_answer": "<answer>0</answer>"}
565
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
566
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
567
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
568
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
569
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
570
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
571
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
572
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
573
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
574
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
575
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
576
+ {"reference_answer": "1", "generated_answer": "<answer>0</answer>"}
577
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
578
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
579
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
580
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
581
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
582
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
583
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
584
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
585
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
586
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
587
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
588
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
589
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
590
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
591
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
592
+ {"reference_answer": "1", "generated_answer": "<answer>0</answer>"}
593
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
594
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
595
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
596
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
597
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
598
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
599
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
600
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
601
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
602
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
603
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
604
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
605
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
606
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
607
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
608
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
609
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
610
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
611
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
612
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
613
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
614
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
615
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
616
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
617
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
618
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
619
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
620
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
621
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
622
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
623
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
624
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
625
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
626
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
627
+ {"reference_answer": "0", "generated_answer": "<answer>0</answer>"}
628
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
629
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
630
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
631
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
632
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
633
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
634
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
635
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
636
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
637
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
638
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
639
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
640
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
641
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
642
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
643
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
644
+ {"reference_answer": "1", "generated_answer": "<answer>0</answer>"}
645
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
646
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
647
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
648
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
649
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
650
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
651
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
652
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
653
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
654
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
655
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
656
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
657
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
658
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
659
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
660
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
661
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
662
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
663
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
664
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
665
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
666
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
667
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
668
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
669
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
670
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
671
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
672
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
673
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
674
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
675
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
676
+ {"reference_answer": "0", "generated_answer": "<answer>0</answer>"}
677
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
678
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
679
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
680
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
681
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
682
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
683
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
684
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
685
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
686
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
687
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
688
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
689
+ {"reference_answer": "0", "generated_answer": "<answer>0</answer>"}
690
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
691
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
692
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
693
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
694
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
695
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
696
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
697
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
698
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
699
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
700
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
701
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
702
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
703
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
704
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
705
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
706
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
707
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
708
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
709
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
710
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
711
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
712
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
713
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
714
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
715
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
716
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
717
+ {"reference_answer": "1", "generated_answer": "<answer>1</answer>"}
718
+ {"reference_answer": "0", "generated_answer": "<answer>1</answer>"}
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_200229-yex1pcwt/files/output.log ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2.5_mol_instruction exists and is not empty.
2
+ Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.
3
+ LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
4
+
5
+ | Name | Type | Params | Mode
6
+ -------------------------------------------
7
+ 0 | blip2 | Blip2OPT | 7.9 B | train
8
+ -------------------------------------------
9
+ 104 M Trainable params
10
+ 7.8 B Non-trainable params
11
+ 7.9 B Total params
12
+ 31,459.025Total estimated model params size (MB)
13
+ 174 Modules in train mode
14
+ 1203 Modules in eval mode
15
+ Epoch 9: 100%|████████████████████████████████████████████████████| 6572/6572 [22:30<00:00, 4.87it/s, v_num=pcwt]BLEU-2 score: 34.121767513883164
16
+ BLEU-4 score: 29.415650694730406████████████████████████████████████████████| 3287/3287 [4:02:24<00:00, 0.23it/s]
17
+ /nas/shared/kilab/wangyujia/ProtT3/model/dist_funs.py:18: FutureWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/main/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.
18
+ sd = self.module.state_dict(destination, prefix, keep_vars)
19
+ 105160it [04:48, 364.98it/s]
20
+ 105160it [01:55, 912.27it/s]
21
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:434: It is recommended to use `self.log('dataset0/acc', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
22
+ Average Meteor score: 58.413905959274196
23
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:434: It is recommended to use `self.log('dataset0/bleu2', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
24
+ ROUGE score:
25
+ rouge1: 46.26000494549008
26
+ rouge2: 35.3808428047301
27
+ rougeL: 39.73314434393358
28
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:434: It is recommended to use `self.log('dataset0/bleu4', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
29
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:434: It is recommended to use `self.log('dataset0/rouge_1', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
30
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:434: It is recommended to use `self.log('dataset0/rouge_2', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
31
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:434: It is recommended to use `self.log('dataset0/rouge_l', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
32
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:434: It is recommended to use `self.log('dataset0/meteor_score', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
33
+ Epoch 9: 100%|██████████████████████████████████████████████████| 6572/6572 [4:42:04<00:00, 0.39it/s, v_num=pcwt]
34
+
35
+ `Trainer.fit` stopped: `max_epochs=10` reached.
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_200229-yex1pcwt/files/requirements.txt ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pydantic_core==2.33.2
2
+ psutil==7.0.0
3
+ nvidia-cuda-nvrtc-cu12==12.4.127
4
+ mpmath==1.3.0
5
+ tzdata==2025.2
6
+ contexttimer==0.3.3
7
+ cycler==0.12.1
8
+ python-magic==0.4.27
9
+ pexpect==4.9.0
10
+ sympy==1.13.1
11
+ wrapt==1.17.2
12
+ marisa-trie==1.2.1
13
+ langcodes==3.5.0
14
+ nvidia-nvtx-cu12==12.4.127
15
+ ipython==8.36.0
16
+ opencv-python-headless==4.5.5.64
17
+ MarkupSafe==3.0.2
18
+ jsonschema-specifications==2025.4.1
19
+ wasabi==1.1.3
20
+ blinker==1.9.0
21
+ cfgv==3.4.0
22
+ numpy==2.2.6
23
+ idna==3.10
24
+ nvidia-cufile-cu12==1.11.1.6
25
+ ninja==1.11.1.4
26
+ nvidia-nccl-cu12==2.21.5
27
+ networkx==3.4.2
28
+ certifi==2025.4.26
29
+ deepspeed==0.16.10+b666844f
30
+ pure_eval==0.2.3
31
+ packaging==24.2
32
+ nltk==3.9.1
33
+ contourpy==1.3.2
34
+ pre_commit==4.2.0
35
+ nodeenv==1.9.1
36
+ setuptools==78.1.1
37
+ annotated-types==0.7.0
38
+ multidict==6.4.4
39
+ requests==2.32.3
40
+ tornado==6.5.1
41
+ triton==3.2.0
42
+ pillow==11.2.1
43
+ decord==0.6.0
44
+ shellingham==1.5.4
45
+ streamlit==1.45.1
46
+ pydeck==0.9.1
47
+ confection==0.1.5
48
+ exceptiongroup==1.3.0
49
+ prompt_toolkit==3.0.51
50
+ text-unidecode==1.3
51
+ nvidia-cufft-cu12==11.2.1.3
52
+ antlr4-python3-runtime==4.9.3
53
+ fairscale==0.4.4
54
+ rouge_score==0.1.2
55
+ nvidia-cudnn-cu12==9.1.0.70
56
+ tqdm==4.67.1
57
+ rich==14.0.0
58
+ frozenlist==1.6.0
59
+ webencodings==0.5.1
60
+ altair==5.5.0
61
+ opendatasets==0.1.22
62
+ nvidia-curand-cu12==10.3.5.147
63
+ protobuf==6.31.0
64
+ asttokens==3.0.0
65
+ wheel==0.45.1
66
+ hf-xet==1.1.2
67
+ weasel==0.4.1
68
+ aiosignal==1.3.2
69
+ absl-py==2.2.2
70
+ thinc==8.3.6
71
+ torchvision==0.21.0
72
+ pandas==2.2.3
73
+ fonttools==4.58.0
74
+ bleach==6.2.0
75
+ typing-inspection==0.4.1
76
+ ftfy==6.3.1
77
+ typing_extensions==4.13.2
78
+ nvidia-ml-py==12.575.51
79
+ python-slugify==8.0.4
80
+ lightning-utilities==0.14.3
81
+ py-cpuinfo==9.0.0
82
+ smmap==5.0.2
83
+ regex==2024.11.6
84
+ scikit-image==0.25.2
85
+ iopath==0.1.10
86
+ spacy-legacy==3.0.12
87
+ hjson==3.1.0
88
+ executing==2.2.0
89
+ kiwisolver==1.4.8
90
+ scipy==1.15.3
91
+ aiohappyeyeballs==2.6.1
92
+ toml==0.10.2
93
+ jedi==0.19.2
94
+ GitPython==3.1.44
95
+ ptyprocess==0.7.0
96
+ kaggle==1.7.4.5
97
+ braceexpand==0.1.7
98
+ wcwidth==0.2.13
99
+ nvidia-cuda-runtime-cu12==12.4.127
100
+ pytorch-lightning==2.5.1.post0
101
+ Jinja2==3.1.6
102
+ urllib3==2.4.0
103
+ watchdog==6.0.0
104
+ filelock==3.18.0
105
+ propcache==0.3.1
106
+ torch==2.6.0
107
+ nvidia-cusparse-cu12==12.3.1.170
108
+ cymem==2.0.11
109
+ nvidia-cusolver-cu12==11.6.1.9
110
+ murmurhash==1.0.13
111
+ catalogue==2.0.10
112
+ yarl==1.20.0
113
+ charset-normalizer==3.4.2
114
+ gitdb==4.0.12
115
+ matplotlib==3.10.3
116
+ portalocker==3.1.1
117
+ platformdirs==4.3.8
118
+ async-timeout==5.0.1
119
+ parso==0.8.4
120
+ markdown-it-py==3.0.0
121
+ omegaconf==2.3.0
122
+ cloudpathlib==0.21.1
123
+ nvidia-cusparselt-cu12==0.6.2
124
+ spacy-loggers==1.0.5
125
+ srsly==2.5.1
126
+ identify==2.6.12
127
+ rpds-py==0.25.1
128
+ spacy==3.8.7
129
+ matplotlib-inline==0.1.7
130
+ smart-open==7.1.0
131
+ pydantic==2.11.5
132
+ mdurl==0.1.2
133
+ virtualenv==20.31.2
134
+ pytz==2025.2
135
+ pycocotools==2.0.8
136
+ six==1.17.0
137
+ decorator==5.2.1
138
+ referencing==0.36.2
139
+ sentencepiece==0.2.0
140
+ PyYAML==6.0.2
141
+ pycocoevalcap==1.2
142
+ imageio==2.37.0
143
+ distlib==0.3.9
144
+ pyarrow==20.0.0
145
+ tenacity==9.1.2
146
+ language_data==1.3.0
147
+ nvidia-cuda-cupti-cu12==12.4.127
148
+ blis==1.3.0
149
+ Pygments==2.19.1
150
+ tifffile==2025.5.10
151
+ pyparsing==3.2.3
152
+ cachetools==5.5.2
153
+ safetensors==0.5.3
154
+ attrs==25.3.0
155
+ webdataset==0.2.111
156
+ plotly==6.1.1
157
+ nvidia-cublas-cu12==12.4.5.8
158
+ timm==0.4.12
159
+ torchmetrics==1.7.1
160
+ nvidia-nvjitlink-cu12==12.4.127
161
+ stack-data==0.6.3
162
+ python-dateutil==2.9.0.post0
163
+ lazy_loader==0.4
164
+ traitlets==5.14.3
165
+ einops==0.8.1
166
+ salesforce-lavis==1.0.2
167
+ joblib==1.5.1
168
+ msgpack==1.1.0
169
+ tokenizers==0.21.1
170
+ sentry-sdk==2.29.1
171
+ oss2==2.15.0
172
+ setproctitle==1.3.6
173
+ pip==25.1.1
174
+ cffi==1.17.1
175
+ transformers==4.52.3
176
+ narwhals==1.41.0
177
+ aliyun-python-sdk-core==2.16.0
178
+ jsonschema==4.24.0
179
+ flash-attn==2.7.1.post1
180
+ preshed==3.0.10
181
+ multiprocess==0.70.16
182
+ cryptography==45.0.3
183
+ aliyun-python-sdk-kms==2.16.5
184
+ scikit-learn==1.6.1
185
+ huggingface-hub==0.32.1
186
+ crcmod==1.7
187
+ typer==0.16.0
188
+ web.py==0.62
189
+ docker-pycreds==0.4.0
190
+ xxhash==3.5.0
191
+ bigmodelvis==0.0.1
192
+ datasets==3.6.0
193
+ more-itertools==10.7.0
194
+ yacs==0.1.8
195
+ jmespath==0.10.0
196
+ aiohttp==3.12.2
197
+ opencv-python==4.11.0.86
198
+ pycparser==2.22
199
+ threadpoolctl==3.6.0
200
+ jaraco.functools==4.1.0
201
+ click==8.2.1
202
+ wandb==0.19.11
203
+ opendelta==0.3.2
204
+ pycryptodome==3.23.0
205
+ pathlib==1.0.1
206
+ dill==0.3.8
207
+ fsspec==2025.3.0
208
+ delta-center-client==0.0.4
209
+ cheroot==10.0.1
210
+ typing_extensions==4.12.2
211
+ platformdirs==4.2.2
212
+ jaraco.text==3.12.1
213
+ packaging==24.2
214
+ inflect==7.3.1
215
+ jaraco.context==5.3.0
216
+ wheel==0.45.1
217
+ typeguard==4.3.0
218
+ more-itertools==10.3.0
219
+ tomli==2.0.1
220
+ importlib_metadata==8.0.0
221
+ backports.tarfile==1.2.0
222
+ zipp==3.19.2
223
+ jaraco.collections==5.1.0
224
+ autocommand==2.2.2
225
+ jaraco.functools==4.0.1
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_200229-yex1pcwt/files/wandb-metadata.json ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.0",
4
+ "startedAt": "2025-07-07T12:02:29.753102Z",
5
+ "args": [
6
+ "--devices",
7
+ "0,1,2,3,4,5,6,7",
8
+ "--mode",
9
+ "train",
10
+ "--filename",
11
+ "stage2.5_mol_instruction",
12
+ "--num_query_token",
13
+ "8",
14
+ "--save_every_n_epochs",
15
+ "1",
16
+ "--max_epochs",
17
+ "10",
18
+ "--batch_size",
19
+ "2",
20
+ "--precision",
21
+ "bf16-mixed",
22
+ "--num_workers",
23
+ "8",
24
+ "--plm_model",
25
+ "/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m",
26
+ "--bert_name",
27
+ "/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft",
28
+ "--llm_name",
29
+ "/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300",
30
+ "--llm_tune",
31
+ "mid_lora",
32
+ "--stage1_path",
33
+ "/nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt",
34
+ "--use_wandb_logger"
35
+ ],
36
+ "program": "/nas/shared/kilab/wangyujia/ProtT3/stage2.py",
37
+ "codePath": "stage2.py",
38
+ "email": "gia0603yucca@gmail.com",
39
+ "root": "./all_checkpoints/stage2.5_mol_instruction/",
40
+ "host": "dsw-265304-7f6db6b4bb-g4b9r",
41
+ "executable": "/root/miniconda3/envs/protT3/bin/python",
42
+ "codePathLocal": "stage2.py",
43
+ "cpu_count": 64,
44
+ "cpu_count_logical": 64,
45
+ "gpu": "NVIDIA A800-SXM4-80GB",
46
+ "gpu_count": 8,
47
+ "disk": {
48
+ "/": {
49
+ "total": "1623302262784",
50
+ "used": "1260978176"
51
+ }
52
+ },
53
+ "memory": {
54
+ "total": "549755813888"
55
+ },
56
+ "cpu": {
57
+ "count": 64,
58
+ "countLogical": 64
59
+ },
60
+ "gpu_nvidia": [
61
+ {
62
+ "name": "NVIDIA A800-SXM4-80GB",
63
+ "memoryTotal": "85198045184",
64
+ "architecture": "Ampere"
65
+ },
66
+ {
67
+ "name": "NVIDIA A800-SXM4-80GB",
68
+ "memoryTotal": "85198045184",
69
+ "architecture": "Ampere"
70
+ },
71
+ {
72
+ "name": "NVIDIA A800-SXM4-80GB",
73
+ "memoryTotal": "85198045184",
74
+ "architecture": "Ampere"
75
+ },
76
+ {
77
+ "name": "NVIDIA A800-SXM4-80GB",
78
+ "memoryTotal": "85198045184",
79
+ "architecture": "Ampere"
80
+ },
81
+ {
82
+ "name": "NVIDIA A800-SXM4-80GB",
83
+ "memoryTotal": "85198045184",
84
+ "architecture": "Ampere"
85
+ },
86
+ {
87
+ "name": "NVIDIA A800-SXM4-80GB",
88
+ "memoryTotal": "85198045184",
89
+ "architecture": "Ampere"
90
+ },
91
+ {
92
+ "name": "NVIDIA A800-SXM4-80GB",
93
+ "memoryTotal": "85198045184",
94
+ "architecture": "Ampere"
95
+ },
96
+ {
97
+ "name": "NVIDIA A800-SXM4-80GB",
98
+ "memoryTotal": "85198045184",
99
+ "architecture": "Ampere"
100
+ }
101
+ ],
102
+ "cudaVersion": "12.1"
103
+ }
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_200229-yex1pcwt/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"trainer/global_step":65719,"dataset0/rouge_l":39.7331428527832,"_runtime":33448.966741938,"_wandb":{"runtime":33462},"epoch":9,"dataset0/rouge_2":35.38084411621094,"loss":0.07072551548480988,"dataset0/bleu2":34.121768951416016,"dataset0/rouge_1":46.260005950927734,"dataloader0/val loss/dataloader_idx_0":0.18945328891277313,"dataset0/meteor_score":58.41390609741211,"lr":1.2202456673549023e-05,"_step":1323,"dataset0/acc":0,"_timestamp":1.7519231987190707e+09,"dataset0/bleu4":29.415651321411133}
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_200229-yex1pcwt/logs/debug-internal.log ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-07-07T20:02:29.75666986+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_200229-yex1pcwt/logs/debug-core.log"}
2
+ {"time":"2025-07-07T20:02:30.922635932+08:00","level":"INFO","msg":"created new stream","id":"yex1pcwt"}
3
+ {"time":"2025-07-07T20:02:30.922678667+08:00","level":"INFO","msg":"stream: started","id":"yex1pcwt"}
4
+ {"time":"2025-07-07T20:02:30.922713833+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"yex1pcwt"}
5
+ {"time":"2025-07-07T20:02:30.922757284+08:00","level":"INFO","msg":"sender: started","stream_id":"yex1pcwt"}
6
+ {"time":"2025-07-07T20:02:30.92278615+08:00","level":"INFO","msg":"handler: started","stream_id":"yex1pcwt"}
7
+ {"time":"2025-07-07T20:02:32.296458789+08:00","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-07-08T01:01:50.39071972+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": read tcp 10.1.4.159:39416->104.21.20.172:443: read: connection reset by peer"}
9
+ {"time":"2025-07-08T01:07:45.887474022+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2.5_mol_instruction/yex1pcwt/file_stream\": read tcp 10.1.4.159:48838->172.67.193.61:443: read: connection timed out"}
10
+ {"time":"2025-07-08T05:20:12.207426797+08:00","level":"INFO","msg":"stream: closing","id":"yex1pcwt"}
11
+ {"time":"2025-07-08T05:20:12.207468139+08:00","level":"INFO","msg":"Stopping system monitor"}
12
+ {"time":"2025-07-08T05:20:12.208684636+08:00","level":"INFO","msg":"Stopped system monitor"}
13
+ {"time":"2025-07-08T05:20:13.938647534+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
14
+ {"time":"2025-07-08T05:20:14.977621841+08:00","level":"INFO","msg":"handler: closed","stream_id":"yex1pcwt"}
15
+ {"time":"2025-07-08T05:20:14.977653692+08:00","level":"INFO","msg":"sender: closed","stream_id":"yex1pcwt"}
16
+ {"time":"2025-07-08T05:20:14.977651902+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"yex1pcwt"}
17
+ {"time":"2025-07-08T05:20:14.982274952+08:00","level":"INFO","msg":"stream: closed","id":"yex1pcwt"}
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_200229-yex1pcwt/logs/debug.log ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
2
+ 2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_setup.py:_flush():70] Configure stats pid to 129761
3
+ 2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
4
+ 2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
5
+ 2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_setup.py:_flush():70] Loading settings from environment variables
6
+ 2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_200229-yex1pcwt/logs/debug.log
7
+ 2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_200229-yex1pcwt/logs/debug-internal.log
8
+ 2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_init.py:init():852] calling init triggers
9
+ 2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_init.py:init():893] starting backend
12
+ 2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_init.py:init():897] sending inform_init request
13
+ 2025-07-07 20:02:29,745 INFO MainThread:129761 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-07-07 20:02:29,748 INFO MainThread:129761 [wandb_init.py:init():907] backend started and connected
15
+ 2025-07-07 20:02:29,754 INFO MainThread:129761 [wandb_init.py:init():1005] updated telemetry
16
+ 2025-07-07 20:02:29,762 INFO MainThread:129761 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
17
+ 2025-07-07 20:02:32,258 INFO MainThread:129761 [wandb_init.py:init():1104] starting run threads in backend
18
+ 2025-07-07 20:02:32,427 INFO MainThread:129761 [wandb_run.py:_console_start():2573] atexit reg
19
+ 2025-07-07 20:02:32,427 INFO MainThread:129761 [wandb_run.py:_redirect():2421] redirect: wrap_raw
20
+ 2025-07-07 20:02:32,457 INFO MainThread:129761 [wandb_run.py:_redirect():2490] Wrapping output streams.
21
+ 2025-07-07 20:02:32,462 INFO MainThread:129761 [wandb_run.py:_redirect():2513] Redirects installed.
22
+ 2025-07-07 20:02:32,463 INFO MainThread:129761 [wandb_init.py:init():1150] run started, returning control to user process
23
+ 2025-07-07 20:02:40,689 INFO MainThread:129761 [wandb_run.py:_config_callback():1436] config_cb None None {'filename': 'stage2.5_mol_instruction', 'seed': 42, 'mode': 'train', 'strategy': 'deepspeed', 'accelerator': 'gpu', 'devices': '0,1,2,3,4,5,6,7', 'precision': 'bf16-mixed', 'max_epochs': 10, 'accumulate_grad_batches': 1, 'check_val_every_n_epoch': 1, 'enable_flash': False, 'use_wandb_logger': True, 'mix_dataset': False, 'save_every_n_epochs': 1, 'bert_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft', 'cross_attention_freq': 2, 'num_query_token': 8, 'llm_name': '/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300', 'num_beams': 5, 'do_sample': False, 'max_inference_len': 128, 'min_inference_len': 1, 'llm_tune': 'mid_lora', 'peft_config': '', 'peft_dir': '', 'plm_model': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m', 'plm_tune': 'freeze', 'lora_r': 8, 'lora_alpha': 16, 'lora_dropout': 0.1, 'enbale_gradient_checkpointing': False, 'weight_decay': 0.05, 'init_lr': 0.0001, 'min_lr': 1e-05, 'warmup_lr': 1e-06, 'warmup_steps': 1000, 'lr_decay_rate': 0.9, 'scheduler': 'linear_warmup_cosine_lr', 'stage1_path': '/nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt', 'stage2_path': '', 'init_checkpoint': '', 'caption_eval_epoch': 10, 'num_workers': 8, 'batch_size': 2, 'inference_batch_size': 4, 'root': 'data', 'text_max_len': 1024, 'q_max_len': 29, 'a_max_len': 36, 'prot_max_len': 1024, 'prompt': 'The protein has the following properties: ', 'filter_side_qa': False}
24
+ 2025-07-08 05:20:12,205 INFO MsgRouterThr:129761 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
ProtT3/all_checkpoints/stage2_07021249/wandb/debug-internal.log ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-07-02T13:01:02.517398459+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage2_07021249/wandb/run-20250702_130101-ttc1macp/logs/debug-core.log"}
2
+ {"time":"2025-07-02T13:01:03.639276653+08:00","level":"INFO","msg":"created new stream","id":"ttc1macp"}
3
+ {"time":"2025-07-02T13:01:03.63932174+08:00","level":"INFO","msg":"stream: started","id":"ttc1macp"}
4
+ {"time":"2025-07-02T13:01:03.639331986+08:00","level":"INFO","msg":"handler: started","stream_id":"ttc1macp"}
5
+ {"time":"2025-07-02T13:01:03.639357726+08:00","level":"INFO","msg":"sender: started","stream_id":"ttc1macp"}
6
+ {"time":"2025-07-02T13:01:03.639363515+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"ttc1macp"}
7
+ {"time":"2025-07-02T13:01:05.044613784+08:00","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-07-02T14:25:40.366902458+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07021249/ttc1macp/file_stream\": unexpected EOF"}
9
+ {"time":"2025-07-02T15:45:50.370249421+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
10
+ {"time":"2025-07-02T17:37:30.121166513+08:00","level":"INFO","msg":"stream: closing","id":"ttc1macp"}
11
+ {"time":"2025-07-02T17:37:30.121209819+08:00","level":"INFO","msg":"Stopping system monitor"}
12
+ {"time":"2025-07-02T17:37:30.137879266+08:00","level":"INFO","msg":"Stopped system monitor"}
13
+ {"time":"2025-07-02T17:39:43.997948982+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
14
+ {"time":"2025-07-02T17:39:45.216379261+08:00","level":"INFO","msg":"handler: closed","stream_id":"ttc1macp"}
15
+ {"time":"2025-07-02T17:39:45.216437206+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"ttc1macp"}
16
+ {"time":"2025-07-02T17:39:45.216450748+08:00","level":"INFO","msg":"sender: closed","stream_id":"ttc1macp"}
17
+ {"time":"2025-07-02T17:39:45.269415307+08:00","level":"INFO","msg":"stream: closed","id":"ttc1macp"}
ProtT3/all_checkpoints/stage2_07021249/wandb/debug.log ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-07-02 13:01:02,446 INFO MainThread:2341 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
2
+ 2025-07-02 13:01:02,446 INFO MainThread:2341 [wandb_setup.py:_flush():70] Configure stats pid to 2341
3
+ 2025-07-02 13:01:02,446 INFO MainThread:2341 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
4
+ 2025-07-02 13:01:02,446 INFO MainThread:2341 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
5
+ 2025-07-02 13:01:02,446 INFO MainThread:2341 [wandb_setup.py:_flush():70] Loading settings from environment variables
6
+ 2025-07-02 13:01:02,446 INFO MainThread:2341 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage2_07021249/wandb/run-20250702_130101-ttc1macp/logs/debug.log
7
+ 2025-07-02 13:01:02,446 INFO MainThread:2341 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage2_07021249/wandb/run-20250702_130101-ttc1macp/logs/debug-internal.log
8
+ 2025-07-02 13:01:02,446 INFO MainThread:2341 [wandb_init.py:init():852] calling init triggers
9
+ 2025-07-02 13:01:02,446 INFO MainThread:2341 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-07-02 13:01:02,446 INFO MainThread:2341 [wandb_init.py:init():893] starting backend
12
+ 2025-07-02 13:01:02,446 INFO MainThread:2341 [wandb_init.py:init():897] sending inform_init request
13
+ 2025-07-02 13:01:02,449 INFO MainThread:2341 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-07-02 13:01:02,449 INFO MainThread:2341 [wandb_init.py:init():907] backend started and connected
15
+ 2025-07-02 13:01:02,468 INFO MainThread:2341 [wandb_init.py:init():1005] updated telemetry
16
+ 2025-07-02 13:01:02,498 INFO MainThread:2341 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
17
+ 2025-07-02 13:01:04,915 INFO MainThread:2341 [wandb_init.py:init():1104] starting run threads in backend
18
+ 2025-07-02 13:01:05,214 INFO MainThread:2341 [wandb_run.py:_console_start():2573] atexit reg
19
+ 2025-07-02 13:01:05,215 INFO MainThread:2341 [wandb_run.py:_redirect():2421] redirect: wrap_raw
20
+ 2025-07-02 13:01:05,215 INFO MainThread:2341 [wandb_run.py:_redirect():2490] Wrapping output streams.
21
+ 2025-07-02 13:01:05,215 INFO MainThread:2341 [wandb_run.py:_redirect():2513] Redirects installed.
22
+ 2025-07-02 13:01:05,218 INFO MainThread:2341 [wandb_init.py:init():1150] run started, returning control to user process
23
+ 2025-07-02 13:01:12,114 INFO MainThread:2341 [wandb_run.py:_config_callback():1436] config_cb None None {'filename': 'stage2_07021249', 'seed': 42, 'mode': 'train', 'strategy': 'deepspeed', 'accelerator': 'gpu', 'devices': '0,1,2,3,4,5,6,7', 'precision': 'bf16-mixed', 'max_epochs': 10, 'accumulate_grad_batches': 1, 'check_val_every_n_epoch': 1, 'enable_flash': False, 'use_wandb_logger': True, 'mix_dataset': True, 'save_every_n_epochs': 5, 'bert_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft', 'cross_attention_freq': 2, 'num_query_token': 8, 'llm_name': '/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300', 'num_beams': 5, 'do_sample': False, 'max_inference_len': 128, 'min_inference_len': 1, 'llm_tune': 'mid_lora', 'peft_config': '', 'peft_dir': '', 'plm_model': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m', 'plm_tune': 'freeze', 'lora_r': 8, 'lora_alpha': 16, 'lora_dropout': 0.1, 'enbale_gradient_checkpointing': False, 'weight_decay': 0.05, 'init_lr': 0.0001, 'min_lr': 1e-05, 'warmup_lr': 1e-06, 'warmup_steps': 1000, 'lr_decay_rate': 0.9, 'scheduler': 'linear_warmup_cosine_lr', 'stage1_path': '/nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt', 'stage2_path': '', 'init_checkpoint': '', 'caption_eval_epoch': 10, 'num_workers': 8, 'batch_size': 32, 'inference_batch_size': 4, 'root': 'data', 'text_max_len': 128, 'q_max_len': 29, 'a_max_len': 36, 'prot_max_len': 1024, 'prompt': 'The protein has the following properties: ', 'filter_side_qa': False}
24
+ 2025-07-02 17:37:30,072 INFO MsgRouterThr:2341 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
ProtT3/all_checkpoints/stage2_07021249/wandb/run-20250702_130101-ttc1macp/files/config.yaml ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.19.11
4
+ m:
5
+ - "1": dataset0/rouge_l
6
+ "5": 2
7
+ "6":
8
+ - 1
9
+ - 3
10
+ "7": []
11
+ - "1": trainer/global_step
12
+ "6":
13
+ - 3
14
+ "7": []
15
+ - "1": lr
16
+ "5": 2
17
+ "6":
18
+ - 1
19
+ - 3
20
+ "7": []
21
+ - "1": epoch
22
+ "5": 2
23
+ "6":
24
+ - 1
25
+ - 3
26
+ "7": []
27
+ - "1": dataset0/rouge_1
28
+ "5": 2
29
+ "6":
30
+ - 1
31
+ - 3
32
+ "7": []
33
+ - "1": dataset0/rouge_2
34
+ "5": 2
35
+ "6":
36
+ - 1
37
+ - 3
38
+ "7": []
39
+ - "1": dataset0/meteor_score
40
+ "5": 2
41
+ "6":
42
+ - 1
43
+ - 3
44
+ "7": []
45
+ - "1": dataset0/acc
46
+ "5": 2
47
+ "6":
48
+ - 1
49
+ - 3
50
+ "7": []
51
+ - "1": dataset0/bleu4
52
+ "5": 2
53
+ "6":
54
+ - 1
55
+ - 3
56
+ "7": []
57
+ - "1": loss
58
+ "5": 2
59
+ "6":
60
+ - 1
61
+ - 3
62
+ "7": []
63
+ - "1": dataloader2/val loss/dataloader_idx_2
64
+ "5": 2
65
+ "6":
66
+ - 1
67
+ - 3
68
+ "7": []
69
+ - "1": dataloader0/val loss/dataloader_idx_0
70
+ "5": 2
71
+ "6":
72
+ - 1
73
+ - 3
74
+ "7": []
75
+ - "1": dataset0/bleu2
76
+ "5": 2
77
+ "6":
78
+ - 1
79
+ - 3
80
+ "7": []
81
+ python_version: 3.10.0
82
+ t:
83
+ "1":
84
+ - 1
85
+ - 5
86
+ - 9
87
+ - 11
88
+ - 33
89
+ - 41
90
+ - 49
91
+ - 53
92
+ - 55
93
+ - 63
94
+ - 103
95
+ "2":
96
+ - 1
97
+ - 5
98
+ - 9
99
+ - 11
100
+ - 33
101
+ - 41
102
+ - 49
103
+ - 53
104
+ - 55
105
+ - 63
106
+ - 103
107
+ "3":
108
+ - 7
109
+ - 23
110
+ - 55
111
+ - 66
112
+ "4": 3.10.0
113
+ "5": 0.19.11
114
+ "6": 4.52.3
115
+ "8":
116
+ - 5
117
+ "12": 0.19.11
118
+ "13": linux-x86_64
119
+ a_max_len:
120
+ value: 36
121
+ accelerator:
122
+ value: gpu
123
+ accumulate_grad_batches:
124
+ value: 1
125
+ batch_size:
126
+ value: 32
127
+ bert_name:
128
+ value: /nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft
129
+ caption_eval_epoch:
130
+ value: 10
131
+ check_val_every_n_epoch:
132
+ value: 1
133
+ cross_attention_freq:
134
+ value: 2
135
+ devices:
136
+ value: 0,1,2,3,4,5,6,7
137
+ do_sample:
138
+ value: false
139
+ enable_flash:
140
+ value: false
141
+ enbale_gradient_checkpointing:
142
+ value: false
143
+ filename:
144
+ value: stage2_07021249
145
+ filter_side_qa:
146
+ value: false
147
+ inference_batch_size:
148
+ value: 4
149
+ init_checkpoint:
150
+ value: ""
151
+ init_lr:
152
+ value: 0.0001
153
+ llm_name:
154
+ value: /oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300
155
+ llm_tune:
156
+ value: mid_lora
157
+ lora_alpha:
158
+ value: 16
159
+ lora_dropout:
160
+ value: 0.1
161
+ lora_r:
162
+ value: 8
163
+ lr_decay_rate:
164
+ value: 0.9
165
+ max_epochs:
166
+ value: 10
167
+ max_inference_len:
168
+ value: 128
169
+ min_inference_len:
170
+ value: 1
171
+ min_lr:
172
+ value: 1e-05
173
+ mix_dataset:
174
+ value: true
175
+ mode:
176
+ value: train
177
+ num_beams:
178
+ value: 5
179
+ num_query_token:
180
+ value: 8
181
+ num_workers:
182
+ value: 8
183
+ peft_config:
184
+ value: ""
185
+ peft_dir:
186
+ value: ""
187
+ plm_model:
188
+ value: /nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m
189
+ plm_tune:
190
+ value: freeze
191
+ precision:
192
+ value: bf16-mixed
193
+ prompt:
194
+ value: 'The protein has the following properties: '
195
+ prot_max_len:
196
+ value: 1024
197
+ q_max_len:
198
+ value: 29
199
+ root:
200
+ value: data
201
+ save_every_n_epochs:
202
+ value: 5
203
+ scheduler:
204
+ value: linear_warmup_cosine_lr
205
+ seed:
206
+ value: 42
207
+ stage1_path:
208
+ value: /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt
209
+ stage2_path:
210
+ value: ""
211
+ strategy:
212
+ value: deepspeed
213
+ text_max_len:
214
+ value: 128
215
+ use_wandb_logger:
216
+ value: true
217
+ warmup_lr:
218
+ value: 1e-06
219
+ warmup_steps:
220
+ value: 1000
221
+ weight_decay:
222
+ value: 0.05
ProtT3/all_checkpoints/stage2_07021249/wandb/run-20250702_130101-ttc1macp/files/output.log ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2_07021249 exists and is not empty.
2
+ Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.
3
+ LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
4
+
5
+ | Name | Type | Params | Mode
6
+ -------------------------------------------
7
+ 0 | blip2 | Blip2OPT | 3.5 B | train
8
+ -------------------------------------------
9
+ 110 M Trainable params
10
+ 3.4 B Non-trainable params
11
+ 3.5 B Total params
12
+ 14,075.005Total estimated model params size (MB)
13
+ 342 Modules in train mode
14
+ 1148 Modules in eval mode
15
+ Epoch 9: 100%|██████████████████████████████████████████| 1682/1682 [22:58<00:00, 1.22it/s, v_num=macp]BLEU-2 score: 15.341177980749976
16
+ BLEU-4 score: 11.860699475707873██████████████████████████████████████| 313/313 [17:47<00:00, 0.29it/s]
17
+ /nas/shared/kilab/wangyujia/ProtT3/model/dist_funs.py:18: FutureWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/main/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.
18
+ sd = self.module.state_dict(destination, prefix, keep_vars)
19
+ 20000it [01:41, 197.92it/s]
20
+ 20000it [00:24, 802.60it/s]
21
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:434: It is recommended to use `self.log('dataset0/acc', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
22
+ Average Meteor score: 26.00935336031241
23
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:434: It is recommended to use `self.log('dataset0/bleu2', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
24
+ ROUGE score:
25
+ rouge1: 21.952246333483618
26
+ rouge2: 13.41357541493452
27
+ rougeL: 19.765010605045184
28
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:434: It is recommended to use `self.log('dataset0/bleu4', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
29
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:434: It is recommended to use `self.log('dataset0/rouge_1', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
30
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:434: It is recommended to use `self.log('dataset0/rouge_2', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
31
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:434: It is recommended to use `self.log('dataset0/rouge_l', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
32
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:434: It is recommended to use `self.log('dataset0/meteor_score', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
33
+ Epoch 9: 100%|████████████████████████████████████████| 1682/1682 [1:01:47<00:00, 0.45it/s, v_num=macp]
34
+
35
+ `Trainer.fit` stopped: `max_epochs=10` reached.
ProtT3/all_checkpoints/stage2_07021249/wandb/run-20250702_130101-ttc1macp/files/requirements.txt ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ stack-data==0.6.3
2
+ yarl==1.20.0
3
+ setuptools==78.1.1
4
+ cloudpathlib==0.21.1
5
+ pytz==2025.2
6
+ nvidia-cufile-cu12==1.11.1.6
7
+ shellingham==1.5.4
8
+ nvidia-cusolver-cu12==11.6.1.9
9
+ Jinja2==3.1.6
10
+ pycocotools==2.0.8
11
+ pandas==2.2.3
12
+ scipy==1.15.3
13
+ tenacity==9.1.2
14
+ lightning-utilities==0.14.3
15
+ cfgv==3.4.0
16
+ hf-xet==1.1.2
17
+ platformdirs==4.3.8
18
+ smart-open==7.1.0
19
+ text-unidecode==1.3
20
+ nvidia-cublas-cu12==12.4.5.8
21
+ omegaconf==2.3.0
22
+ referencing==0.36.2
23
+ mdurl==0.1.2
24
+ gitdb==4.0.12
25
+ identify==2.6.12
26
+ ipython==8.36.0
27
+ spacy-loggers==1.0.5
28
+ distlib==0.3.9
29
+ typing-inspection==0.4.1
30
+ antlr4-python3-runtime==4.9.3
31
+ multidict==6.4.4
32
+ nvidia-curand-cu12==10.3.5.147
33
+ prompt_toolkit==3.0.51
34
+ Pygments==2.19.1
35
+ numpy==2.2.6
36
+ decord==0.6.0
37
+ srsly==2.5.1
38
+ watchdog==6.0.0
39
+ pure_eval==0.2.3
40
+ virtualenv==20.31.2
41
+ altair==5.5.0
42
+ matplotlib-inline==0.1.7
43
+ bleach==6.2.0
44
+ exceptiongroup==1.3.0
45
+ fairscale==0.4.4
46
+ confection==0.1.5
47
+ fonttools==4.58.0
48
+ nvidia-cuda-nvrtc-cu12==12.4.127
49
+ ptyprocess==0.7.0
50
+ pytorch-lightning==2.5.1.post0
51
+ nodeenv==1.9.1
52
+ nvidia-cudnn-cu12==9.1.0.70
53
+ requests==2.32.3
54
+ marisa-trie==1.2.1
55
+ cachetools==5.5.2
56
+ matplotlib==3.10.3
57
+ typing_extensions==4.13.2
58
+ asttokens==3.0.0
59
+ torch==2.6.0
60
+ PyYAML==6.0.2
61
+ tifffile==2025.5.10
62
+ spacy==3.8.7
63
+ braceexpand==0.1.7
64
+ plotly==6.1.1
65
+ attrs==25.3.0
66
+ py-cpuinfo==9.0.0
67
+ frozenlist==1.6.0
68
+ catalogue==2.0.10
69
+ nvidia-cusparselt-cu12==0.6.2
70
+ traitlets==5.14.3
71
+ annotated-types==0.7.0
72
+ language_data==1.3.0
73
+ thinc==8.3.6
74
+ imageio==2.37.0
75
+ nvidia-cuda-runtime-cu12==12.4.127
76
+ certifi==2025.4.26
77
+ smmap==5.0.2
78
+ python-magic==0.4.27
79
+ triton==3.2.0
80
+ weasel==0.4.1
81
+ async-timeout==5.0.1
82
+ wcwidth==0.2.13
83
+ pillow==11.2.1
84
+ torchmetrics==1.7.1
85
+ kaggle==1.7.4.5
86
+ regex==2024.11.6
87
+ aiosignal==1.3.2
88
+ nvidia-cusparse-cu12==12.3.1.170
89
+ scikit-image==0.25.2
90
+ nvidia-nvtx-cu12==12.4.127
91
+ opendatasets==0.1.22
92
+ iopath==0.1.10
93
+ pyparsing==3.2.3
94
+ portalocker==3.1.1
95
+ executing==2.2.0
96
+ contexttimer==0.3.3
97
+ lazy_loader==0.4
98
+ wrapt==1.17.2
99
+ webdataset==0.2.111
100
+ blis==1.3.0
101
+ idna==3.10
102
+ timm==0.4.12
103
+ einops==0.8.1
104
+ packaging==24.2
105
+ decorator==5.2.1
106
+ filelock==3.18.0
107
+ python-slugify==8.0.4
108
+ cycler==0.12.1
109
+ charset-normalizer==3.4.2
110
+ pydantic==2.11.5
111
+ pydeck==0.9.1
112
+ tzdata==2025.2
113
+ jedi==0.19.2
114
+ aiohappyeyeballs==2.6.1
115
+ nvidia-nvjitlink-cu12==12.4.127
116
+ salesforce-lavis==1.0.2
117
+ parso==0.8.4
118
+ nvidia-nccl-cu12==2.21.5
119
+ toml==0.10.2
120
+ python-dateutil==2.9.0.post0
121
+ rich==14.0.0
122
+ tqdm==4.67.1
123
+ rpds-py==0.25.1
124
+ opencv-python-headless==4.5.5.64
125
+ tornado==6.5.1
126
+ propcache==0.3.1
127
+ webencodings==0.5.1
128
+ murmurhash==1.0.13
129
+ contourpy==1.3.2
130
+ joblib==1.5.1
131
+ networkx==3.4.2
132
+ six==1.17.0
133
+ markdown-it-py==3.0.0
134
+ nvidia-cuda-cupti-cu12==12.4.127
135
+ msgpack==1.1.0
136
+ sentencepiece==0.2.0
137
+ cymem==2.0.11
138
+ nvidia-cufft-cu12==11.2.1.3
139
+ absl-py==2.2.2
140
+ hjson==3.1.0
141
+ mpmath==1.3.0
142
+ pydantic_core==2.33.2
143
+ psutil==7.0.0
144
+ nvidia-ml-py==12.575.51
145
+ pyarrow==20.0.0
146
+ kiwisolver==1.4.8
147
+ sympy==1.13.1
148
+ ninja==1.11.1.4
149
+ rouge_score==0.1.2
150
+ deepspeed==0.16.10+b666844f
151
+ spacy-legacy==3.0.12
152
+ pycocoevalcap==1.2
153
+ pexpect==4.9.0
154
+ ftfy==6.3.1
155
+ protobuf==6.31.0
156
+ urllib3==2.4.0
157
+ wheel==0.45.1
158
+ nltk==3.9.1
159
+ streamlit==1.45.1
160
+ wasabi==1.1.3
161
+ pre_commit==4.2.0
162
+ safetensors==0.5.3
163
+ jsonschema-specifications==2025.4.1
164
+ langcodes==3.5.0
165
+ GitPython==3.1.44
166
+ blinker==1.9.0
167
+ torchvision==0.21.0
168
+ MarkupSafe==3.0.2
169
+ dill==0.3.8
170
+ yacs==0.1.8
171
+ pathlib==1.0.1
172
+ scikit-learn==1.6.1
173
+ cffi==1.17.1
174
+ pycparser==2.22
175
+ flash-attn==2.7.1.post1
176
+ cryptography==45.0.3
177
+ pycryptodome==3.23.0
178
+ cheroot==10.0.1
179
+ more-itertools==10.7.0
180
+ setproctitle==1.3.6
181
+ delta-center-client==0.0.4
182
+ jmespath==0.10.0
183
+ xxhash==3.5.0
184
+ pip==25.1.1
185
+ aliyun-python-sdk-core==2.16.0
186
+ jaraco.functools==4.1.0
187
+ bigmodelvis==0.0.1
188
+ aiohttp==3.12.2
189
+ multiprocess==0.70.16
190
+ opendelta==0.3.2
191
+ docker-pycreds==0.4.0
192
+ threadpoolctl==3.6.0
193
+ click==8.2.1
194
+ oss2==2.15.0
195
+ crcmod==1.7
196
+ transformers==4.52.3
197
+ datasets==3.6.0
198
+ jsonschema==4.24.0
199
+ opencv-python==4.11.0.86
200
+ wandb==0.19.11
201
+ fsspec==2025.3.0
202
+ tokenizers==0.21.1
203
+ sentry-sdk==2.29.1
204
+ preshed==3.0.10
205
+ aliyun-python-sdk-kms==2.16.5
206
+ huggingface-hub==0.32.1
207
+ typer==0.16.0
208
+ narwhals==1.41.0
209
+ web.py==0.62
210
+ autocommand==2.2.2
211
+ importlib_metadata==8.0.0
212
+ zipp==3.19.2
213
+ jaraco.context==5.3.0
214
+ typeguard==4.3.0
215
+ jaraco.collections==5.1.0
216
+ typing_extensions==4.12.2
217
+ backports.tarfile==1.2.0
218
+ jaraco.functools==4.0.1
219
+ more-itertools==10.3.0
220
+ platformdirs==4.2.2
221
+ packaging==24.2
222
+ tomli==2.0.1
223
+ jaraco.text==3.12.1
224
+ wheel==0.45.1
225
+ inflect==7.3.1
ProtT3/all_checkpoints/stage2_07021249/wandb/run-20250702_130101-ttc1macp/files/wandb-metadata.json ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.0",
4
+ "startedAt": "2025-07-02T05:01:02.468047Z",
5
+ "args": [
6
+ "--devices",
7
+ "0,1,2,3,4,5,6,7",
8
+ "--mode",
9
+ "train",
10
+ "--filename",
11
+ "stage2_07021249",
12
+ "--num_query_token",
13
+ "8",
14
+ "--save_every_n_epochs",
15
+ "5",
16
+ "--max_epochs",
17
+ "10",
18
+ "--batch_size",
19
+ "32",
20
+ "--precision",
21
+ "bf16-mixed",
22
+ "--num_workers",
23
+ "8",
24
+ "--plm_model",
25
+ "/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m",
26
+ "--bert_name",
27
+ "/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft",
28
+ "--llm_name",
29
+ "/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300",
30
+ "--llm_tune",
31
+ "mid_lora",
32
+ "--mix_dataset",
33
+ "--stage1_path",
34
+ "/nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt",
35
+ "--use_wandb_logger"
36
+ ],
37
+ "program": "/nas/shared/kilab/wangyujia/ProtT3/stage2.py",
38
+ "codePath": "stage2.py",
39
+ "email": "gia0603yucca@gmail.com",
40
+ "root": "./all_checkpoints/stage2_07021249/",
41
+ "host": "dsw-265304-6d4dbc55d6-l29sk",
42
+ "executable": "/root/miniconda3/envs/protT3/bin/python",
43
+ "codePathLocal": "stage2.py",
44
+ "cpu_count": 64,
45
+ "cpu_count_logical": 64,
46
+ "gpu": "NVIDIA A800-SXM4-80GB",
47
+ "gpu_count": 8,
48
+ "disk": {
49
+ "/": {
50
+ "total": "1623302262784",
51
+ "used": "987885568"
52
+ }
53
+ },
54
+ "memory": {
55
+ "total": "549755813888"
56
+ },
57
+ "cpu": {
58
+ "count": 64,
59
+ "countLogical": 64
60
+ },
61
+ "gpu_nvidia": [
62
+ {
63
+ "name": "NVIDIA A800-SXM4-80GB",
64
+ "memoryTotal": "85198045184",
65
+ "architecture": "Ampere"
66
+ },
67
+ {
68
+ "name": "NVIDIA A800-SXM4-80GB",
69
+ "memoryTotal": "85198045184",
70
+ "architecture": "Ampere"
71
+ },
72
+ {
73
+ "name": "NVIDIA A800-SXM4-80GB",
74
+ "memoryTotal": "85198045184",
75
+ "architecture": "Ampere"
76
+ },
77
+ {
78
+ "name": "NVIDIA A800-SXM4-80GB",
79
+ "memoryTotal": "85198045184",
80
+ "architecture": "Ampere"
81
+ },
82
+ {
83
+ "name": "NVIDIA A800-SXM4-80GB",
84
+ "memoryTotal": "85198045184",
85
+ "architecture": "Ampere"
86
+ },
87
+ {
88
+ "name": "NVIDIA A800-SXM4-80GB",
89
+ "memoryTotal": "85198045184",
90
+ "architecture": "Ampere"
91
+ },
92
+ {
93
+ "name": "NVIDIA A800-SXM4-80GB",
94
+ "memoryTotal": "85198045184",
95
+ "architecture": "Ampere"
96
+ },
97
+ {
98
+ "name": "NVIDIA A800-SXM4-80GB",
99
+ "memoryTotal": "85198045184",
100
+ "architecture": "Ampere"
101
+ }
102
+ ],
103
+ "cudaVersion": "12.1"
104
+ }
ProtT3/all_checkpoints/stage2_07021249/wandb/run-20250702_130101-ttc1macp/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset0/bleu4":11.860699653625488,"dataset0/rouge_2":13.413575172424316,"trainer/global_step":16819,"dataloader2/val loss/dataloader_idx_2":5.575827121734619,"_wandb":{"runtime":16587},"dataloader0/val loss/dataloader_idx_0":2.2659671306610107,"_timestamp":1.7514490380975404e+09,"_runtime":16575.629722537,"_step":345,"dataset0/bleu2":15.341177940368652,"loss":1.30044424533844,"dataset0/meteor_score":26.009353637695312,"dataset0/rouge_1":21.952245712280273,"epoch":9,"lr":1.2202456673549023e-05,"dataset0/acc":0.009999999776482582,"dataset0/rouge_l":19.765010833740234}
ProtT3/all_checkpoints/stage2_07021249/wandb/run-20250702_130101-ttc1macp/logs/debug-internal.log ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-07-02T13:01:02.517398459+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage2_07021249/wandb/run-20250702_130101-ttc1macp/logs/debug-core.log"}
2
+ {"time":"2025-07-02T13:01:03.639276653+08:00","level":"INFO","msg":"created new stream","id":"ttc1macp"}
3
+ {"time":"2025-07-02T13:01:03.63932174+08:00","level":"INFO","msg":"stream: started","id":"ttc1macp"}
4
+ {"time":"2025-07-02T13:01:03.639331986+08:00","level":"INFO","msg":"handler: started","stream_id":"ttc1macp"}
5
+ {"time":"2025-07-02T13:01:03.639357726+08:00","level":"INFO","msg":"sender: started","stream_id":"ttc1macp"}
6
+ {"time":"2025-07-02T13:01:03.639363515+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"ttc1macp"}
7
+ {"time":"2025-07-02T13:01:05.044613784+08:00","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-07-02T14:25:40.366902458+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07021249/ttc1macp/file_stream\": unexpected EOF"}
9
+ {"time":"2025-07-02T15:45:50.370249421+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
10
+ {"time":"2025-07-02T17:37:30.121166513+08:00","level":"INFO","msg":"stream: closing","id":"ttc1macp"}
11
+ {"time":"2025-07-02T17:37:30.121209819+08:00","level":"INFO","msg":"Stopping system monitor"}
12
+ {"time":"2025-07-02T17:37:30.137879266+08:00","level":"INFO","msg":"Stopped system monitor"}
13
+ {"time":"2025-07-02T17:39:43.997948982+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
14
+ {"time":"2025-07-02T17:39:45.216379261+08:00","level":"INFO","msg":"handler: closed","stream_id":"ttc1macp"}
15
+ {"time":"2025-07-02T17:39:45.216437206+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"ttc1macp"}
16
+ {"time":"2025-07-02T17:39:45.216450748+08:00","level":"INFO","msg":"sender: closed","stream_id":"ttc1macp"}
17
+ {"time":"2025-07-02T17:39:45.269415307+08:00","level":"INFO","msg":"stream: closed","id":"ttc1macp"}
ProtT3/all_checkpoints/stage2_07021249/wandb/run-20250702_130101-ttc1macp/logs/debug.log ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-07-02 13:01:02,446 INFO MainThread:2341 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
2
+ 2025-07-02 13:01:02,446 INFO MainThread:2341 [wandb_setup.py:_flush():70] Configure stats pid to 2341
3
+ 2025-07-02 13:01:02,446 INFO MainThread:2341 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
4
+ 2025-07-02 13:01:02,446 INFO MainThread:2341 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
5
+ 2025-07-02 13:01:02,446 INFO MainThread:2341 [wandb_setup.py:_flush():70] Loading settings from environment variables
6
+ 2025-07-02 13:01:02,446 INFO MainThread:2341 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage2_07021249/wandb/run-20250702_130101-ttc1macp/logs/debug.log
7
+ 2025-07-02 13:01:02,446 INFO MainThread:2341 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage2_07021249/wandb/run-20250702_130101-ttc1macp/logs/debug-internal.log
8
+ 2025-07-02 13:01:02,446 INFO MainThread:2341 [wandb_init.py:init():852] calling init triggers
9
+ 2025-07-02 13:01:02,446 INFO MainThread:2341 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-07-02 13:01:02,446 INFO MainThread:2341 [wandb_init.py:init():893] starting backend
12
+ 2025-07-02 13:01:02,446 INFO MainThread:2341 [wandb_init.py:init():897] sending inform_init request
13
+ 2025-07-02 13:01:02,449 INFO MainThread:2341 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-07-02 13:01:02,449 INFO MainThread:2341 [wandb_init.py:init():907] backend started and connected
15
+ 2025-07-02 13:01:02,468 INFO MainThread:2341 [wandb_init.py:init():1005] updated telemetry
16
+ 2025-07-02 13:01:02,498 INFO MainThread:2341 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
17
+ 2025-07-02 13:01:04,915 INFO MainThread:2341 [wandb_init.py:init():1104] starting run threads in backend
18
+ 2025-07-02 13:01:05,214 INFO MainThread:2341 [wandb_run.py:_console_start():2573] atexit reg
19
+ 2025-07-02 13:01:05,215 INFO MainThread:2341 [wandb_run.py:_redirect():2421] redirect: wrap_raw
20
+ 2025-07-02 13:01:05,215 INFO MainThread:2341 [wandb_run.py:_redirect():2490] Wrapping output streams.
21
+ 2025-07-02 13:01:05,215 INFO MainThread:2341 [wandb_run.py:_redirect():2513] Redirects installed.
22
+ 2025-07-02 13:01:05,218 INFO MainThread:2341 [wandb_init.py:init():1150] run started, returning control to user process
23
+ 2025-07-02 13:01:12,114 INFO MainThread:2341 [wandb_run.py:_config_callback():1436] config_cb None None {'filename': 'stage2_07021249', 'seed': 42, 'mode': 'train', 'strategy': 'deepspeed', 'accelerator': 'gpu', 'devices': '0,1,2,3,4,5,6,7', 'precision': 'bf16-mixed', 'max_epochs': 10, 'accumulate_grad_batches': 1, 'check_val_every_n_epoch': 1, 'enable_flash': False, 'use_wandb_logger': True, 'mix_dataset': True, 'save_every_n_epochs': 5, 'bert_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft', 'cross_attention_freq': 2, 'num_query_token': 8, 'llm_name': '/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300', 'num_beams': 5, 'do_sample': False, 'max_inference_len': 128, 'min_inference_len': 1, 'llm_tune': 'mid_lora', 'peft_config': '', 'peft_dir': '', 'plm_model': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m', 'plm_tune': 'freeze', 'lora_r': 8, 'lora_alpha': 16, 'lora_dropout': 0.1, 'enbale_gradient_checkpointing': False, 'weight_decay': 0.05, 'init_lr': 0.0001, 'min_lr': 1e-05, 'warmup_lr': 1e-06, 'warmup_steps': 1000, 'lr_decay_rate': 0.9, 'scheduler': 'linear_warmup_cosine_lr', 'stage1_path': '/nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt', 'stage2_path': '', 'init_checkpoint': '', 'caption_eval_epoch': 10, 'num_workers': 8, 'batch_size': 32, 'inference_batch_size': 4, 'root': 'data', 'text_max_len': 128, 'q_max_len': 29, 'a_max_len': 36, 'prot_max_len': 1024, 'prompt': 'The protein has the following properties: ', 'filter_side_qa': False}
24
+ 2025-07-02 17:37:30,072 INFO MsgRouterThr:2341 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
ProtT3/all_checkpoints/stage2_07041521/wandb/debug-internal.log ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-07-04T15:46:08.966654664+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage2_07041521/wandb/run-20250704_154608-ds7lrt6r/logs/debug-core.log"}
2
+ {"time":"2025-07-04T15:46:10.62729617+08:00","level":"INFO","msg":"created new stream","id":"ds7lrt6r"}
3
+ {"time":"2025-07-04T15:46:10.627339189+08:00","level":"INFO","msg":"stream: started","id":"ds7lrt6r"}
4
+ {"time":"2025-07-04T15:46:10.627374947+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"ds7lrt6r"}
5
+ {"time":"2025-07-04T15:46:10.627404904+08:00","level":"INFO","msg":"handler: started","stream_id":"ds7lrt6r"}
6
+ {"time":"2025-07-04T15:46:10.627398441+08:00","level":"INFO","msg":"sender: started","stream_id":"ds7lrt6r"}
7
+ {"time":"2025-07-04T15:46:12.482815718+08:00","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-07-04T21:58:22.739226816+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:36512->172.67.193.61:443: read: connection timed out"}
9
+ {"time":"2025-07-04T22:01:34.73927758+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:56674->172.67.193.61:443: read: connection timed out"}
10
+ {"time":"2025-07-04T22:01:43.328655381+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
11
+ {"time":"2025-07-04T22:02:15.686026868+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
12
+ {"time":"2025-07-04T22:02:50.214808897+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
13
+ {"time":"2025-07-04T22:03:28.950188225+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
14
+ {"time":"2025-07-04T22:04:14.982810813+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
15
+ {"time":"2025-07-04T22:05:22.734102314+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
16
+ {"time":"2025-07-04T22:06:52.750047504+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
17
+ {"time":"2025-07-04T22:08:22.775414283+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
18
+ {"time":"2025-07-04T22:08:43.307492899+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": unexpected EOF"}
19
+ {"time":"2025-07-04T22:09:52.781192276+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
20
+ {"time":"2025-07-04T22:11:13.328660156+08:00","level":"WARN","msg":"sender: taking a long time","seconds":600.000386663,"work":"WorkRecord(*service_go_proto.Request_StopStatus); Control(local:true mailbox_slot:\"2nedcn0bl5yp\" connection_id:\"127.0.0.1:57318\")"}
21
+ {"time":"2025-07-04T22:11:22.78819562+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
22
+ {"time":"2025-07-04T22:12:52.789417986+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
23
+ {"time":"2025-07-04T22:13:05.234783312+08:00","level":"WARN","msg":"runwork: taking a long time","seconds":600.000850585,"work":"WorkRecord(*service_go_proto.Record_OutputRaw); Control(connection_id:\"127.0.0.1:57318\")"}
24
+ {"time":"2025-07-04T22:13:12.48514539+08:00","level":"WARN","msg":"runwork: taking a long time","seconds":600.000329899,"work":"WorkRecord(*service_go_proto.Record_Stats); Control(always_send:true)"}
25
+ {"time":"2025-07-04T22:13:12.516285255+08:00","level":"WARN","msg":"runwork: taking a long time","seconds":600.00042574,"work":"WorkRecord(*service_go_proto.Record_Stats); Control(always_send:true)"}
26
+ {"time":"2025-07-04T22:14:22.815202117+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
27
+ {"time":"2025-07-04T22:15:52.82570124+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
28
+ {"time":"2025-07-04T22:17:22.890129793+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
29
+ {"time":"2025-07-04T22:18:29.532924447+08:00","level":"INFO","msg":"sender: succeeded after taking longer than expected","seconds":1036.204664967,"work":"WorkRecord(*service_go_proto.Request_StopStatus); Control(local:true mailbox_slot:\"2nedcn0bl5yp\" connection_id:\"127.0.0.1:57318\")"}
30
+ {"time":"2025-07-04T22:18:29.532969506+08:00","level":"INFO","msg":"runwork: succeeded after taking longer than expected","seconds":924.299032896,"work":"WorkRecord(*service_go_proto.Record_OutputRaw); Control(connection_id:\"127.0.0.1:57318\")"}
31
+ {"time":"2025-07-04T22:18:29.532991216+08:00","level":"INFO","msg":"runwork: succeeded after taking longer than expected","seconds":917.048223115,"work":"WorkRecord(*service_go_proto.Record_Stats); Control(always_send:true)"}
32
+ {"time":"2025-07-04T22:18:29.532997449+08:00","level":"INFO","msg":"runwork: succeeded after taking longer than expected","seconds":917.017162756,"work":"WorkRecord(*service_go_proto.Record_Stats); Control(always_send:true)"}
33
+ {"time":"2025-07-04T22:22:02.242147225+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:53384->104.21.20.172:443: read: connection reset by peer"}
34
+ {"time":"2025-07-04T22:26:53.14669432+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": unexpected EOF"}
35
+ {"time":"2025-07-04T22:27:48.780316277+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": unexpected EOF"}
36
+ {"time":"2025-07-04T22:31:07.795211328+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:59586->104.21.20.172:443: read: connection timed out"}
37
+ {"time":"2025-07-04T22:34:24.403211244+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:36792->172.67.193.61:443: read: connection timed out"}
38
+ {"time":"2025-07-04T22:42:22.611231819+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:52098->172.67.193.61:443: read: connection timed out"}
39
+ {"time":"2025-07-04T22:43:30.389313147+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:48310->104.21.20.172:443: read: connection reset by peer"}
40
+ {"time":"2025-07-04T22:44:01.015059936+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": unexpected EOF"}
41
+ {"time":"2025-07-04T22:46:14.846692259+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": unexpected EOF"}
42
+ {"time":"2025-07-04T22:48:31.349968366+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:39256->104.21.20.172:443: read: connection reset by peer"}
43
+ {"time":"2025-07-04T22:49:38.015743829+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:38292->172.67.193.61:443: read: connection reset by peer"}
44
+ {"time":"2025-07-04T22:50:09.683679573+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": http2: client conn is closed"}
45
+ {"time":"2025-07-04T22:52:08.558045187+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:59988->104.21.20.172:443: read: connection reset by peer"}
46
+ {"time":"2025-07-04T22:54:22.824301514+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:38894->172.67.193.61:443: read: connection reset by peer"}
47
+ {"time":"2025-07-04T22:59:26.307679579+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:51684->104.21.20.172:443: read: connection reset by peer"}
48
+ {"time":"2025-07-04T23:02:32.979198883+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:58210->172.67.193.61:443: read: connection timed out"}
49
+ {"time":"2025-07-04T23:10:21.459215862+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:35172->172.67.193.61:443: read: connection timed out"}
50
+ {"time":"2025-07-04T23:11:15.167490198+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:54942->104.21.20.172:443: read: connection reset by peer"}
51
+ {"time":"2025-07-04T23:16:10.863292487+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:43428->104.21.20.172:443: read: connection reset by peer"}
52
+ {"time":"2025-07-04T23:20:22.264503678+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)"}
53
+ {"time":"2025-07-04T23:23:20.723212059+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:38048->104.21.20.172:443: read: connection timed out"}
54
+ {"time":"2025-07-04T23:25:26.885991394+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:42070->172.67.193.61:443: read: connection reset by peer"}
55
+ {"time":"2025-07-04T23:26:43.632088162+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
56
+ {"time":"2025-07-04T23:29:40.115213912+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:39740->104.21.20.172:443: read: connection timed out"}
57
+ {"time":"2025-07-04T23:30:43.633929839+08:00","level":"ERROR","msg":"sender: sendStopStatus: failed to get run stopped status: net/http: request canceled (Client.Timeout or context cancellation while reading body)"}
58
+ {"time":"2025-07-04T23:31:28.634946467+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
59
+ {"time":"2025-07-04T23:32:54.67521322+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:51016->172.67.193.61:443: read: connection timed out"}
60
+ {"time":"2025-07-04T23:39:42.739210995+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:58242->104.21.20.172:443: read: connection timed out"}
61
+ {"time":"2025-07-04T23:45:14.003206141+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:41730->172.67.193.61:443: read: connection timed out"}
62
+ {"time":"2025-07-04T23:48:41.876197491+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:39056->172.67.193.61:443: read: connection timed out"}
63
+ {"time":"2025-07-04T23:52:10.25916891+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:56968->172.67.193.61:443: read: connection timed out"}
64
+ {"time":"2025-07-04T23:55:53.491192209+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:52384->172.67.193.61:443: read: connection timed out"}
65
+ {"time":"2025-07-04T23:58:33.235181122+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:41192->172.67.193.61:443: read: connection timed out"}
66
+ {"time":"2025-07-05T00:01:39.09118175+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:54870->172.67.193.61:443: read: connection timed out"}
67
+ {"time":"2025-07-05T00:03:30.660362141+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": unexpected EOF"}
68
+ {"time":"2025-07-05T00:06:22.739197171+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:56300->104.21.20.172:443: read: connection timed out"}
69
+ {"time":"2025-07-05T00:07:01.140306954+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:33830->172.67.193.61:443: read: connection reset by peer"}
70
+ {"time":"2025-07-05T00:10:11.603177492+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:38990->104.21.20.172:443: read: connection timed out"}
71
+ {"time":"2025-07-05T00:17:09.907203145+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:58654->104.21.20.172:443: read: connection timed out"}
72
+ {"time":"2025-07-05T00:20:10.131225125+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:39570->172.67.193.61:443: read: connection timed out"}
73
+ {"time":"2025-07-05T00:23:22.643197817+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:57940->172.67.193.61:443: read: connection timed out"}
74
+ {"time":"2025-07-05T00:26:13.651201419+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:37792->172.67.193.61:443: read: connection timed out"}
75
+ {"time":"2025-07-05T00:28:56.467221564+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:38640->104.21.20.172:443: read: connection timed out"}
76
+ {"time":"2025-07-05T00:32:27.924195852+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:33266->104.21.20.172:443: read: connection timed out"}
77
+ {"time":"2025-07-05T00:33:00.356828932+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:56794->104.21.20.172:443: read: connection reset by peer"}
78
+ {"time":"2025-07-05T00:36:52.115188168+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:44060->104.21.20.172:443: read: connection timed out"}
79
+ {"time":"2025-07-05T00:40:28.83076072+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
80
+ {"time":"2025-07-05T00:41:16.304855216+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": unexpected EOF"}
81
+ {"time":"2025-07-05T00:44:59.539204741+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:54050->104.21.20.172:443: read: connection timed out"}
82
+ {"time":"2025-07-05T00:46:47.38055068+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:47426->172.67.193.61:443: read: connection reset by peer"}
83
+ {"time":"2025-07-05T00:48:09.874069624+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:60378->104.21.20.172:443: read: connection reset by peer"}
84
+ {"time":"2025-07-05T00:50:24.818784704+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": unexpected EOF"}
85
+ {"time":"2025-07-05T01:02:13.78019116+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:44352->104.21.20.172:443: read: connection timed out"}
86
+ {"time":"2025-07-05T01:10:57.377024443+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": context deadline exceeded"}
87
+ {"time":"2025-07-05T01:20:40.211190451+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2_07041521/ds7lrt6r/file_stream\": read tcp 10.1.8.17:50414->104.21.20.172:443: read: connection timed out"}
88
+ {"time":"2025-07-05T01:21:51.097875981+08:00","level":"INFO","msg":"stream: closing","id":"ds7lrt6r"}
89
+ {"time":"2025-07-05T01:21:51.097937445+08:00","level":"INFO","msg":"Stopping system monitor"}
90
+ {"time":"2025-07-05T01:21:51.099273597+08:00","level":"INFO","msg":"Stopped system monitor"}
91
+ {"time":"2025-07-05T01:21:58.380081154+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
92
+ {"time":"2025-07-05T01:22:06.296069471+08:00","level":"INFO","msg":"handler: closed","stream_id":"ds7lrt6r"}
93
+ {"time":"2025-07-05T01:22:06.296102451+08:00","level":"INFO","msg":"sender: closed","stream_id":"ds7lrt6r"}
94
+ {"time":"2025-07-05T01:22:06.296100202+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"ds7lrt6r"}
95
+ {"time":"2025-07-05T01:22:06.302257653+08:00","level":"INFO","msg":"stream: closed","id":"ds7lrt6r"}
ProtT3/all_checkpoints/stage2_07041521/wandb/debug.log ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-07-04 15:46:08,903 INFO MainThread:56865 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
2
+ 2025-07-04 15:46:08,903 INFO MainThread:56865 [wandb_setup.py:_flush():70] Configure stats pid to 56865
3
+ 2025-07-04 15:46:08,903 INFO MainThread:56865 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
4
+ 2025-07-04 15:46:08,903 INFO MainThread:56865 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
5
+ 2025-07-04 15:46:08,903 INFO MainThread:56865 [wandb_setup.py:_flush():70] Loading settings from environment variables
6
+ 2025-07-04 15:46:08,903 INFO MainThread:56865 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage2_07041521/wandb/run-20250704_154608-ds7lrt6r/logs/debug.log
7
+ 2025-07-04 15:46:08,903 INFO MainThread:56865 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage2_07041521/wandb/run-20250704_154608-ds7lrt6r/logs/debug-internal.log
8
+ 2025-07-04 15:46:08,903 INFO MainThread:56865 [wandb_init.py:init():852] calling init triggers
9
+ 2025-07-04 15:46:08,903 INFO MainThread:56865 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-07-04 15:46:08,903 INFO MainThread:56865 [wandb_init.py:init():893] starting backend
12
+ 2025-07-04 15:46:08,903 INFO MainThread:56865 [wandb_init.py:init():897] sending inform_init request
13
+ 2025-07-04 15:46:08,914 INFO MainThread:56865 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-07-04 15:46:08,916 INFO MainThread:56865 [wandb_init.py:init():907] backend started and connected
15
+ 2025-07-04 15:46:08,917 INFO MainThread:56865 [wandb_init.py:init():1005] updated telemetry
16
+ 2025-07-04 15:46:08,922 INFO MainThread:56865 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
17
+ 2025-07-04 15:46:12,402 INFO MainThread:56865 [wandb_init.py:init():1104] starting run threads in backend
18
+ 2025-07-04 15:46:12,679 INFO MainThread:56865 [wandb_run.py:_console_start():2573] atexit reg
19
+ 2025-07-04 15:46:12,680 INFO MainThread:56865 [wandb_run.py:_redirect():2421] redirect: wrap_raw
20
+ 2025-07-04 15:46:12,685 INFO MainThread:56865 [wandb_run.py:_redirect():2490] Wrapping output streams.
21
+ 2025-07-04 15:46:12,686 INFO MainThread:56865 [wandb_run.py:_redirect():2513] Redirects installed.
22
+ 2025-07-04 15:46:12,697 INFO MainThread:56865 [wandb_init.py:init():1150] run started, returning control to user process
23
+ 2025-07-04 15:46:21,744 INFO MainThread:56865 [wandb_run.py:_config_callback():1436] config_cb None None {'filename': 'stage2_07041521', 'seed': 42, 'mode': 'train', 'strategy': 'deepspeed', 'accelerator': 'gpu', 'devices': '0,1,2,3,4,5,6,7', 'precision': 'bf16-mixed', 'max_epochs': 15, 'accumulate_grad_batches': 1, 'check_val_every_n_epoch': 1, 'enable_flash': False, 'use_wandb_logger': True, 'mix_dataset': True, 'save_every_n_epochs': 5, 'bert_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft', 'cross_attention_freq': 2, 'num_query_token': 8, 'llm_name': '/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300', 'num_beams': 5, 'do_sample': False, 'max_inference_len': 128, 'min_inference_len': 1, 'llm_tune': 'mid_lora', 'peft_config': '', 'peft_dir': '', 'plm_model': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m', 'plm_tune': 'freeze', 'lora_r': 8, 'lora_alpha': 16, 'lora_dropout': 0.1, 'enbale_gradient_checkpointing': False, 'weight_decay': 0.05, 'init_lr': 0.0001, 'min_lr': 1e-05, 'warmup_lr': 1e-06, 'warmup_steps': 1000, 'lr_decay_rate': 0.9, 'scheduler': 'linear_warmup_cosine_lr', 'stage1_path': '/nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt', 'stage2_path': '', 'init_checkpoint': '', 'caption_eval_epoch': 10, 'num_workers': 8, 'batch_size': 32, 'inference_batch_size': 4, 'root': 'data', 'text_max_len': 128, 'q_max_len': 29, 'a_max_len': 36, 'prot_max_len': 1024, 'prompt': 'The protein has the following properties: ', 'filter_side_qa': False}
24
+ 2025-07-05 01:21:51,095 INFO MsgRouterThr:56865 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_152253-wgyb9m42/files/config.yaml ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.19.11
4
+ m:
5
+ - "1": trainer/global_step
6
+ "6":
7
+ - 3
8
+ "7": []
9
+ python_version: 3.10.0
10
+ t:
11
+ "1":
12
+ - 1
13
+ - 5
14
+ - 9
15
+ - 11
16
+ - 33
17
+ - 41
18
+ - 49
19
+ - 53
20
+ - 55
21
+ - 63
22
+ - 103
23
+ "2":
24
+ - 1
25
+ - 5
26
+ - 9
27
+ - 11
28
+ - 33
29
+ - 41
30
+ - 49
31
+ - 53
32
+ - 55
33
+ - 63
34
+ - 103
35
+ "3":
36
+ - 7
37
+ - 23
38
+ - 55
39
+ - 66
40
+ "4": 3.10.0
41
+ "5": 0.19.11
42
+ "6": 4.52.3
43
+ "8":
44
+ - 5
45
+ "12": 0.19.11
46
+ "13": linux-x86_64
47
+ a_max_len:
48
+ value: 36
49
+ accelerator:
50
+ value: gpu
51
+ accumulate_grad_batches:
52
+ value: 1
53
+ batch_size:
54
+ value: 32
55
+ bert_name:
56
+ value: /nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft
57
+ caption_eval_epoch:
58
+ value: 10
59
+ check_val_every_n_epoch:
60
+ value: 1
61
+ cross_attention_freq:
62
+ value: 2
63
+ devices:
64
+ value: 0,1,2,3,4,5,6,7
65
+ do_sample:
66
+ value: false
67
+ enable_flash:
68
+ value: false
69
+ enbale_gradient_checkpointing:
70
+ value: false
71
+ filename:
72
+ value: stage2_07041521
73
+ filter_side_qa:
74
+ value: false
75
+ inference_batch_size:
76
+ value: 4
77
+ init_checkpoint:
78
+ value: ""
79
+ init_lr:
80
+ value: 0.0001
81
+ llm_name:
82
+ value: /oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300
83
+ llm_tune:
84
+ value: mid_lora
85
+ lora_alpha:
86
+ value: 16
87
+ lora_dropout:
88
+ value: 0.1
89
+ lora_r:
90
+ value: 8
91
+ lr_decay_rate:
92
+ value: 0.9
93
+ max_epochs:
94
+ value: 15
95
+ max_inference_len:
96
+ value: 128
97
+ min_inference_len:
98
+ value: 1
99
+ min_lr:
100
+ value: 1e-05
101
+ mix_dataset:
102
+ value: true
103
+ mode:
104
+ value: train
105
+ num_beams:
106
+ value: 5
107
+ num_query_token:
108
+ value: 8
109
+ num_workers:
110
+ value: 8
111
+ peft_config:
112
+ value: ""
113
+ peft_dir:
114
+ value: ""
115
+ plm_model:
116
+ value: /nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m
117
+ plm_tune:
118
+ value: freeze
119
+ precision:
120
+ value: bf16-mixed
121
+ prompt:
122
+ value: 'The protein has the following properties: '
123
+ prot_max_len:
124
+ value: 1024
125
+ q_max_len:
126
+ value: 29
127
+ root:
128
+ value: data
129
+ save_every_n_epochs:
130
+ value: 5
131
+ scheduler:
132
+ value: linear_warmup_cosine_lr
133
+ seed:
134
+ value: 42
135
+ stage1_path:
136
+ value: /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt
137
+ stage2_path:
138
+ value: ""
139
+ strategy:
140
+ value: deepspeed
141
+ text_max_len:
142
+ value: 128
143
+ use_wandb_logger:
144
+ value: true
145
+ warmup_lr:
146
+ value: 1e-06
147
+ warmup_steps:
148
+ value: 1000
149
+ weight_decay:
150
+ value: 0.05
ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_152253-wgyb9m42/files/output.log ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2_07041521 exists and is not empty.
2
+ Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.
3
+ LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
4
+
5
+ | Name | Type | Params | Mode
6
+ -------------------------------------------
7
+ 0 | blip2 | Blip2OPT | 7.9 B | train
8
+ -------------------------------------------
9
+ 104 M Trainable params
10
+ 7.8 B Non-trainable params
11
+ 7.9 B Total params
12
+ 31,459.025Total estimated model params size (MB)
13
+ 174 Modules in train mode
14
+ 1203 Modules in eval mode
15
+ Sanity Checking DataLoader 0: 0%| | 0/2 [00:00<?, ?it/s]
16
+ Traceback (most recent call last):
17
+ File "/nas/shared/kilab/wangyujia/ProtT3/stage2.py", line 130, in <module>
18
+ main(get_args())
19
+ File "/nas/shared/kilab/wangyujia/ProtT3/stage2.py", line 92, in main
20
+ trainer.fit(model, datamodule=dm)#, ckpt_path=args.ckpt_path)
21
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
22
+ call._call_and_handle_interrupt(
23
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py", line 47, in _call_and_handle_interrupt
24
+ return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
25
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/launchers/subprocess_script.py", line 105, in launch
26
+ return function(*args, **kwargs)
27
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 599, in _fit_impl
28
+ self._run(model, ckpt_path=ckpt_path)
29
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1012, in _run
30
+ results = self._run_stage()
31
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1054, in _run_stage
32
+ self._run_sanity_check()
33
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1083, in _run_sanity_check
34
+ val_loop.run()
35
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/loops/utilities.py", line 179, in _decorator
36
+ return loop_run(self, *args, **kwargs)
37
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/loops/evaluation_loop.py", line 145, in run
38
+ self._evaluation_step(batch, batch_idx, dataloader_idx, dataloader_iter)
39
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/loops/evaluation_loop.py", line 437, in _evaluation_step
40
+ output = call._call_strategy_hook(trainer, hook_name, *step_args)
41
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py", line 328, in _call_strategy_hook
42
+ output = fn(*args, **kwargs)
43
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py", line 411, in validation_step
44
+ return self._forward_redirection(self.model, self.lightning_module, "validation_step", *args, **kwargs)
45
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py", line 641, in __call__
46
+ wrapper_output = wrapper_module(*args, **kwargs)
47
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
48
+ return self._call_impl(*args, **kwargs)
49
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
50
+ return forward_call(*args, **kwargs)
51
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/deepspeed/utils/nvtx.py", line 20, in wrapped_fn
52
+ ret_val = func(*args, **kwargs)
53
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/deepspeed/runtime/engine.py", line 2054, in forward
54
+ loss = self.module(*inputs, **kwargs)
55
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
56
+ return self._call_impl(*args, **kwargs)
57
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1845, in _call_impl
58
+ return inner()
59
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1793, in inner
60
+ result = forward_call(*args, **kwargs)
61
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py", line 634, in wrapped_forward
62
+ out = method(*_args, **_kwargs)
63
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
64
+ return func(*args, **kwargs)
65
+ File "/nas/shared/kilab/wangyujia/ProtT3/model/blip2_stage2.py", line 119, in validation_step
66
+ loss = self.blip2(batch)
67
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
68
+ return self._call_impl(*args, **kwargs)
69
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
70
+ return forward_call(*args, **kwargs)
71
+ File "/nas/shared/kilab/wangyujia/ProtT3/model/blip2_opt.py", line 222, in forward
72
+ prot_batch, prompt_batch, text_batch = batch
73
+ ValueError: not enough values to unpack (expected 3, got 2)
74
+ [rank0]: Traceback (most recent call last):
75
+ [rank0]: File "/nas/shared/kilab/wangyujia/ProtT3/stage2.py", line 130, in <module>
76
+ [rank0]: main(get_args())
77
+ [rank0]: File "/nas/shared/kilab/wangyujia/ProtT3/stage2.py", line 92, in main
78
+ [rank0]: trainer.fit(model, datamodule=dm)#, ckpt_path=args.ckpt_path)
79
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
80
+ [rank0]: call._call_and_handle_interrupt(
81
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py", line 47, in _call_and_handle_interrupt
82
+ [rank0]: return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
83
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/launchers/subprocess_script.py", line 105, in launch
84
+ [rank0]: return function(*args, **kwargs)
85
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 599, in _fit_impl
86
+ [rank0]: self._run(model, ckpt_path=ckpt_path)
87
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1012, in _run
88
+ [rank0]: results = self._run_stage()
89
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1054, in _run_stage
90
+ [rank0]: self._run_sanity_check()
91
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1083, in _run_sanity_check
92
+ [rank0]: val_loop.run()
93
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/loops/utilities.py", line 179, in _decorator
94
+ [rank0]: return loop_run(self, *args, **kwargs)
95
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/loops/evaluation_loop.py", line 145, in run
96
+ [rank0]: self._evaluation_step(batch, batch_idx, dataloader_idx, dataloader_iter)
97
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/loops/evaluation_loop.py", line 437, in _evaluation_step
98
+ [rank0]: output = call._call_strategy_hook(trainer, hook_name, *step_args)
99
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py", line 328, in _call_strategy_hook
100
+ [rank0]: output = fn(*args, **kwargs)
101
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py", line 411, in validation_step
102
+ [rank0]: return self._forward_redirection(self.model, self.lightning_module, "validation_step", *args, **kwargs)
103
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py", line 641, in __call__
104
+ [rank0]: wrapper_output = wrapper_module(*args, **kwargs)
105
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
106
+ [rank0]: return self._call_impl(*args, **kwargs)
107
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
108
+ [rank0]: return forward_call(*args, **kwargs)
109
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/deepspeed/utils/nvtx.py", line 20, in wrapped_fn
110
+ [rank0]: ret_val = func(*args, **kwargs)
111
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/deepspeed/runtime/engine.py", line 2054, in forward
112
+ [rank0]: loss = self.module(*inputs, **kwargs)
113
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
114
+ [rank0]: return self._call_impl(*args, **kwargs)
115
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1845, in _call_impl
116
+ [rank0]: return inner()
117
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1793, in inner
118
+ [rank0]: result = forward_call(*args, **kwargs)
119
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py", line 634, in wrapped_forward
120
+ [rank0]: out = method(*_args, **_kwargs)
121
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
122
+ [rank0]: return func(*args, **kwargs)
123
+ [rank0]: File "/nas/shared/kilab/wangyujia/ProtT3/model/blip2_stage2.py", line 119, in validation_step
124
+ [rank0]: loss = self.blip2(batch)
125
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
126
+ [rank0]: return self._call_impl(*args, **kwargs)
127
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
128
+ [rank0]: return forward_call(*args, **kwargs)
129
+ [rank0]: File "/nas/shared/kilab/wangyujia/ProtT3/model/blip2_opt.py", line 222, in forward
130
+ [rank0]: prot_batch, prompt_batch, text_batch = batch
131
+ [rank0]: ValueError: not enough values to unpack (expected 3, got 2)
ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_152253-wgyb9m42/files/requirements.txt ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pydantic_core==2.33.2
2
+ psutil==7.0.0
3
+ nvidia-cuda-nvrtc-cu12==12.4.127
4
+ mpmath==1.3.0
5
+ tzdata==2025.2
6
+ contexttimer==0.3.3
7
+ cycler==0.12.1
8
+ python-magic==0.4.27
9
+ pexpect==4.9.0
10
+ sympy==1.13.1
11
+ wrapt==1.17.2
12
+ marisa-trie==1.2.1
13
+ langcodes==3.5.0
14
+ nvidia-nvtx-cu12==12.4.127
15
+ ipython==8.36.0
16
+ opencv-python-headless==4.5.5.64
17
+ MarkupSafe==3.0.2
18
+ jsonschema-specifications==2025.4.1
19
+ wasabi==1.1.3
20
+ blinker==1.9.0
21
+ cfgv==3.4.0
22
+ numpy==2.2.6
23
+ idna==3.10
24
+ nvidia-cufile-cu12==1.11.1.6
25
+ ninja==1.11.1.4
26
+ nvidia-nccl-cu12==2.21.5
27
+ networkx==3.4.2
28
+ certifi==2025.4.26
29
+ deepspeed==0.16.10+b666844f
30
+ pure_eval==0.2.3
31
+ packaging==24.2
32
+ nltk==3.9.1
33
+ contourpy==1.3.2
34
+ pre_commit==4.2.0
35
+ nodeenv==1.9.1
36
+ setuptools==78.1.1
37
+ annotated-types==0.7.0
38
+ multidict==6.4.4
39
+ requests==2.32.3
40
+ tornado==6.5.1
41
+ triton==3.2.0
42
+ pillow==11.2.1
43
+ decord==0.6.0
44
+ shellingham==1.5.4
45
+ streamlit==1.45.1
46
+ pydeck==0.9.1
47
+ confection==0.1.5
48
+ exceptiongroup==1.3.0
49
+ prompt_toolkit==3.0.51
50
+ text-unidecode==1.3
51
+ nvidia-cufft-cu12==11.2.1.3
52
+ antlr4-python3-runtime==4.9.3
53
+ fairscale==0.4.4
54
+ rouge_score==0.1.2
55
+ nvidia-cudnn-cu12==9.1.0.70
56
+ tqdm==4.67.1
57
+ rich==14.0.0
58
+ frozenlist==1.6.0
59
+ webencodings==0.5.1
60
+ altair==5.5.0
61
+ opendatasets==0.1.22
62
+ nvidia-curand-cu12==10.3.5.147
63
+ protobuf==6.31.0
64
+ asttokens==3.0.0
65
+ wheel==0.45.1
66
+ hf-xet==1.1.2
67
+ weasel==0.4.1
68
+ aiosignal==1.3.2
69
+ absl-py==2.2.2
70
+ thinc==8.3.6
71
+ torchvision==0.21.0
72
+ pandas==2.2.3
73
+ fonttools==4.58.0
74
+ bleach==6.2.0
75
+ typing-inspection==0.4.1
76
+ ftfy==6.3.1
77
+ typing_extensions==4.13.2
78
+ nvidia-ml-py==12.575.51
79
+ python-slugify==8.0.4
80
+ lightning-utilities==0.14.3
81
+ py-cpuinfo==9.0.0
82
+ smmap==5.0.2
83
+ regex==2024.11.6
84
+ scikit-image==0.25.2
85
+ iopath==0.1.10
86
+ spacy-legacy==3.0.12
87
+ hjson==3.1.0
88
+ executing==2.2.0
89
+ kiwisolver==1.4.8
90
+ scipy==1.15.3
91
+ aiohappyeyeballs==2.6.1
92
+ toml==0.10.2
93
+ jedi==0.19.2
94
+ GitPython==3.1.44
95
+ ptyprocess==0.7.0
96
+ kaggle==1.7.4.5
97
+ braceexpand==0.1.7
98
+ wcwidth==0.2.13
99
+ nvidia-cuda-runtime-cu12==12.4.127
100
+ pytorch-lightning==2.5.1.post0
101
+ Jinja2==3.1.6
102
+ urllib3==2.4.0
103
+ watchdog==6.0.0
104
+ filelock==3.18.0
105
+ propcache==0.3.1
106
+ torch==2.6.0
107
+ nvidia-cusparse-cu12==12.3.1.170
108
+ cymem==2.0.11
109
+ nvidia-cusolver-cu12==11.6.1.9
110
+ murmurhash==1.0.13
111
+ catalogue==2.0.10
112
+ yarl==1.20.0
113
+ charset-normalizer==3.4.2
114
+ gitdb==4.0.12
115
+ matplotlib==3.10.3
116
+ portalocker==3.1.1
117
+ platformdirs==4.3.8
118
+ async-timeout==5.0.1
119
+ parso==0.8.4
120
+ markdown-it-py==3.0.0
121
+ omegaconf==2.3.0
122
+ cloudpathlib==0.21.1
123
+ nvidia-cusparselt-cu12==0.6.2
124
+ spacy-loggers==1.0.5
125
+ srsly==2.5.1
126
+ identify==2.6.12
127
+ rpds-py==0.25.1
128
+ spacy==3.8.7
129
+ matplotlib-inline==0.1.7
130
+ smart-open==7.1.0
131
+ pydantic==2.11.5
132
+ mdurl==0.1.2
133
+ virtualenv==20.31.2
134
+ pytz==2025.2
135
+ pycocotools==2.0.8
136
+ six==1.17.0
137
+ decorator==5.2.1
138
+ referencing==0.36.2
139
+ sentencepiece==0.2.0
140
+ PyYAML==6.0.2
141
+ pycocoevalcap==1.2
142
+ imageio==2.37.0
143
+ distlib==0.3.9
144
+ pyarrow==20.0.0
145
+ tenacity==9.1.2
146
+ language_data==1.3.0
147
+ nvidia-cuda-cupti-cu12==12.4.127
148
+ blis==1.3.0
149
+ Pygments==2.19.1
150
+ tifffile==2025.5.10
151
+ pyparsing==3.2.3
152
+ cachetools==5.5.2
153
+ safetensors==0.5.3
154
+ attrs==25.3.0
155
+ webdataset==0.2.111
156
+ plotly==6.1.1
157
+ nvidia-cublas-cu12==12.4.5.8
158
+ timm==0.4.12
159
+ torchmetrics==1.7.1
160
+ nvidia-nvjitlink-cu12==12.4.127
161
+ stack-data==0.6.3
162
+ python-dateutil==2.9.0.post0
163
+ lazy_loader==0.4
164
+ traitlets==5.14.3
165
+ einops==0.8.1
166
+ salesforce-lavis==1.0.2
167
+ joblib==1.5.1
168
+ msgpack==1.1.0
169
+ tokenizers==0.21.1
170
+ sentry-sdk==2.29.1
171
+ oss2==2.15.0
172
+ setproctitle==1.3.6
173
+ pip==25.1.1
174
+ cffi==1.17.1
175
+ transformers==4.52.3
176
+ narwhals==1.41.0
177
+ aliyun-python-sdk-core==2.16.0
178
+ jsonschema==4.24.0
179
+ flash-attn==2.7.1.post1
180
+ preshed==3.0.10
181
+ multiprocess==0.70.16
182
+ cryptography==45.0.3
183
+ aliyun-python-sdk-kms==2.16.5
184
+ scikit-learn==1.6.1
185
+ huggingface-hub==0.32.1
186
+ crcmod==1.7
187
+ typer==0.16.0
188
+ web.py==0.62
189
+ docker-pycreds==0.4.0
190
+ xxhash==3.5.0
191
+ bigmodelvis==0.0.1
192
+ datasets==3.6.0
193
+ more-itertools==10.7.0
194
+ yacs==0.1.8
195
+ jmespath==0.10.0
196
+ aiohttp==3.12.2
197
+ opencv-python==4.11.0.86
198
+ pycparser==2.22
199
+ threadpoolctl==3.6.0
200
+ jaraco.functools==4.1.0
201
+ click==8.2.1
202
+ wandb==0.19.11
203
+ opendelta==0.3.2
204
+ pycryptodome==3.23.0
205
+ pathlib==1.0.1
206
+ dill==0.3.8
207
+ fsspec==2025.3.0
208
+ delta-center-client==0.0.4
209
+ cheroot==10.0.1
210
+ typing_extensions==4.12.2
211
+ platformdirs==4.2.2
212
+ jaraco.text==3.12.1
213
+ packaging==24.2
214
+ inflect==7.3.1
215
+ jaraco.context==5.3.0
216
+ wheel==0.45.1
217
+ typeguard==4.3.0
218
+ more-itertools==10.3.0
219
+ tomli==2.0.1
220
+ importlib_metadata==8.0.0
221
+ backports.tarfile==1.2.0
222
+ zipp==3.19.2
223
+ jaraco.collections==5.1.0
224
+ autocommand==2.2.2
225
+ jaraco.functools==4.0.1
ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_152253-wgyb9m42/files/wandb-metadata.json ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.0",
4
+ "startedAt": "2025-07-04T07:22:53.352152Z",
5
+ "args": [
6
+ "--devices",
7
+ "0,1,2,3,4,5,6,7",
8
+ "--mode",
9
+ "train",
10
+ "--filename",
11
+ "stage2_07041521",
12
+ "--num_query_token",
13
+ "8",
14
+ "--save_every_n_epochs",
15
+ "5",
16
+ "--max_epochs",
17
+ "15",
18
+ "--batch_size",
19
+ "32",
20
+ "--precision",
21
+ "bf16-mixed",
22
+ "--num_workers",
23
+ "8",
24
+ "--plm_model",
25
+ "/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m",
26
+ "--bert_name",
27
+ "/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft",
28
+ "--llm_name",
29
+ "/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300",
30
+ "--llm_tune",
31
+ "mid_lora",
32
+ "--mix_dataset",
33
+ "--stage1_path",
34
+ "/nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt",
35
+ "--use_wandb_logger"
36
+ ],
37
+ "program": "/nas/shared/kilab/wangyujia/ProtT3/stage2.py",
38
+ "codePath": "stage2.py",
39
+ "email": "gia0603yucca@gmail.com",
40
+ "root": "./all_checkpoints/stage2_07041521/",
41
+ "host": "dsw-265304-b8d7644bb-bs7r7",
42
+ "executable": "/root/miniconda3/envs/protT3/bin/python",
43
+ "codePathLocal": "stage2.py",
44
+ "cpu_count": 64,
45
+ "cpu_count_logical": 64,
46
+ "gpu": "NVIDIA A800-SXM4-80GB",
47
+ "gpu_count": 8,
48
+ "disk": {
49
+ "/": {
50
+ "total": "1623302262784",
51
+ "used": "1266487296"
52
+ }
53
+ },
54
+ "memory": {
55
+ "total": "549755813888"
56
+ },
57
+ "cpu": {
58
+ "count": 64,
59
+ "countLogical": 64
60
+ },
61
+ "gpu_nvidia": [
62
+ {
63
+ "name": "NVIDIA A800-SXM4-80GB",
64
+ "memoryTotal": "85198045184",
65
+ "architecture": "Ampere"
66
+ },
67
+ {
68
+ "name": "NVIDIA A800-SXM4-80GB",
69
+ "memoryTotal": "85198045184",
70
+ "architecture": "Ampere"
71
+ },
72
+ {
73
+ "name": "NVIDIA A800-SXM4-80GB",
74
+ "memoryTotal": "85198045184",
75
+ "architecture": "Ampere"
76
+ },
77
+ {
78
+ "name": "NVIDIA A800-SXM4-80GB",
79
+ "memoryTotal": "85198045184",
80
+ "architecture": "Ampere"
81
+ },
82
+ {
83
+ "name": "NVIDIA A800-SXM4-80GB",
84
+ "memoryTotal": "85198045184",
85
+ "architecture": "Ampere"
86
+ },
87
+ {
88
+ "name": "NVIDIA A800-SXM4-80GB",
89
+ "memoryTotal": "85198045184",
90
+ "architecture": "Ampere"
91
+ },
92
+ {
93
+ "name": "NVIDIA A800-SXM4-80GB",
94
+ "memoryTotal": "85198045184",
95
+ "architecture": "Ampere"
96
+ },
97
+ {
98
+ "name": "NVIDIA A800-SXM4-80GB",
99
+ "memoryTotal": "85198045184",
100
+ "architecture": "Ampere"
101
+ }
102
+ ],
103
+ "cudaVersion": "12.1"
104
+ }
ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_152253-wgyb9m42/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":21}}
ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_152253-wgyb9m42/logs/debug-internal.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-07-04T15:22:53.388896985+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage2_07041521/wandb/run-20250704_152253-wgyb9m42/logs/debug-core.log"}
2
+ {"time":"2025-07-04T15:22:56.842915105+08:00","level":"INFO","msg":"created new stream","id":"wgyb9m42"}
3
+ {"time":"2025-07-04T15:22:56.861110866+08:00","level":"INFO","msg":"stream: started","id":"wgyb9m42"}
4
+ {"time":"2025-07-04T15:22:56.861147295+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"wgyb9m42"}
5
+ {"time":"2025-07-04T15:22:56.861334961+08:00","level":"INFO","msg":"sender: started","stream_id":"wgyb9m42"}
6
+ {"time":"2025-07-04T15:22:56.86133724+08:00","level":"INFO","msg":"handler: started","stream_id":"wgyb9m42"}
7
+ {"time":"2025-07-04T15:22:58.128508153+08:00","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-07-04T15:23:14.819973413+08:00","level":"INFO","msg":"stream: closing","id":"wgyb9m42"}
9
+ {"time":"2025-07-04T15:23:14.820051202+08:00","level":"INFO","msg":"Stopping system monitor"}
10
+ {"time":"2025-07-04T15:23:14.852874205+08:00","level":"INFO","msg":"Stopped system monitor"}
11
+ {"time":"2025-07-04T15:23:17.850226248+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
12
+ {"time":"2025-07-04T15:23:18.890479762+08:00","level":"INFO","msg":"handler: closed","stream_id":"wgyb9m42"}
13
+ {"time":"2025-07-04T15:23:18.890514395+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"wgyb9m42"}
14
+ {"time":"2025-07-04T15:23:18.890532968+08:00","level":"INFO","msg":"sender: closed","stream_id":"wgyb9m42"}
15
+ {"time":"2025-07-04T15:23:18.901418972+08:00","level":"INFO","msg":"stream: closed","id":"wgyb9m42"}
ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_152253-wgyb9m42/logs/debug.log ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-07-04 15:22:53,343 INFO MainThread:41518 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
2
+ 2025-07-04 15:22:53,343 INFO MainThread:41518 [wandb_setup.py:_flush():70] Configure stats pid to 41518
3
+ 2025-07-04 15:22:53,343 INFO MainThread:41518 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
4
+ 2025-07-04 15:22:53,343 INFO MainThread:41518 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
5
+ 2025-07-04 15:22:53,343 INFO MainThread:41518 [wandb_setup.py:_flush():70] Loading settings from environment variables
6
+ 2025-07-04 15:22:53,343 INFO MainThread:41518 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage2_07041521/wandb/run-20250704_152253-wgyb9m42/logs/debug.log
7
+ 2025-07-04 15:22:53,343 INFO MainThread:41518 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage2_07041521/wandb/run-20250704_152253-wgyb9m42/logs/debug-internal.log
8
+ 2025-07-04 15:22:53,344 INFO MainThread:41518 [wandb_init.py:init():852] calling init triggers
9
+ 2025-07-04 15:22:53,344 INFO MainThread:41518 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-07-04 15:22:53,344 INFO MainThread:41518 [wandb_init.py:init():893] starting backend
12
+ 2025-07-04 15:22:53,344 INFO MainThread:41518 [wandb_init.py:init():897] sending inform_init request
13
+ 2025-07-04 15:22:53,346 INFO MainThread:41518 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-07-04 15:22:53,346 INFO MainThread:41518 [wandb_init.py:init():907] backend started and connected
15
+ 2025-07-04 15:22:53,353 INFO MainThread:41518 [wandb_init.py:init():1005] updated telemetry
16
+ 2025-07-04 15:22:53,357 INFO MainThread:41518 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
17
+ 2025-07-04 15:22:58,087 INFO MainThread:41518 [wandb_init.py:init():1104] starting run threads in backend
18
+ 2025-07-04 15:22:58,276 INFO MainThread:41518 [wandb_run.py:_console_start():2573] atexit reg
19
+ 2025-07-04 15:22:58,276 INFO MainThread:41518 [wandb_run.py:_redirect():2421] redirect: wrap_raw
20
+ 2025-07-04 15:22:58,282 INFO MainThread:41518 [wandb_run.py:_redirect():2490] Wrapping output streams.
21
+ 2025-07-04 15:22:58,282 INFO MainThread:41518 [wandb_run.py:_redirect():2513] Redirects installed.
22
+ 2025-07-04 15:22:58,284 INFO MainThread:41518 [wandb_init.py:init():1150] run started, returning control to user process
23
+ 2025-07-04 15:23:06,942 INFO MainThread:41518 [wandb_run.py:_config_callback():1436] config_cb None None {'filename': 'stage2_07041521', 'seed': 42, 'mode': 'train', 'strategy': 'deepspeed', 'accelerator': 'gpu', 'devices': '0,1,2,3,4,5,6,7', 'precision': 'bf16-mixed', 'max_epochs': 15, 'accumulate_grad_batches': 1, 'check_val_every_n_epoch': 1, 'enable_flash': False, 'use_wandb_logger': True, 'mix_dataset': True, 'save_every_n_epochs': 5, 'bert_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft', 'cross_attention_freq': 2, 'num_query_token': 8, 'llm_name': '/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300', 'num_beams': 5, 'do_sample': False, 'max_inference_len': 128, 'min_inference_len': 1, 'llm_tune': 'mid_lora', 'peft_config': '', 'peft_dir': '', 'plm_model': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m', 'plm_tune': 'freeze', 'lora_r': 8, 'lora_alpha': 16, 'lora_dropout': 0.1, 'enbale_gradient_checkpointing': False, 'weight_decay': 0.05, 'init_lr': 0.0001, 'min_lr': 1e-05, 'warmup_lr': 1e-06, 'warmup_steps': 1000, 'lr_decay_rate': 0.9, 'scheduler': 'linear_warmup_cosine_lr', 'stage1_path': '/nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt', 'stage2_path': '', 'init_checkpoint': '', 'caption_eval_epoch': 10, 'num_workers': 8, 'batch_size': 32, 'inference_batch_size': 4, 'root': 'data', 'text_max_len': 128, 'q_max_len': 29, 'a_max_len': 36, 'prot_max_len': 1024, 'prompt': 'The protein has the following properties: ', 'filter_side_qa': False}
24
+ 2025-07-04 15:23:14,819 INFO MsgRouterThr:41518 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_152253-wgyb9m42/run-wgyb9m42.wandb ADDED
Binary file (21.3 kB). View file
 
ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153023-ggclx68a/files/config.yaml ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.19.11
4
+ m:
5
+ - "1": trainer/global_step
6
+ "6":
7
+ - 3
8
+ "7": []
9
+ python_version: 3.10.0
10
+ t:
11
+ "1":
12
+ - 1
13
+ - 5
14
+ - 9
15
+ - 11
16
+ - 33
17
+ - 41
18
+ - 49
19
+ - 53
20
+ - 55
21
+ - 63
22
+ - 103
23
+ "2":
24
+ - 1
25
+ - 5
26
+ - 9
27
+ - 11
28
+ - 33
29
+ - 41
30
+ - 49
31
+ - 53
32
+ - 55
33
+ - 63
34
+ - 103
35
+ "3":
36
+ - 7
37
+ - 23
38
+ - 55
39
+ - 66
40
+ "4": 3.10.0
41
+ "5": 0.19.11
42
+ "6": 4.52.3
43
+ "8":
44
+ - 5
45
+ "12": 0.19.11
46
+ "13": linux-x86_64
47
+ a_max_len:
48
+ value: 36
49
+ accelerator:
50
+ value: gpu
51
+ accumulate_grad_batches:
52
+ value: 1
53
+ batch_size:
54
+ value: 32
55
+ bert_name:
56
+ value: /nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft
57
+ caption_eval_epoch:
58
+ value: 10
59
+ check_val_every_n_epoch:
60
+ value: 1
61
+ cross_attention_freq:
62
+ value: 2
63
+ devices:
64
+ value: 0,1,2,3,4,5,6,7
65
+ do_sample:
66
+ value: false
67
+ enable_flash:
68
+ value: false
69
+ enbale_gradient_checkpointing:
70
+ value: false
71
+ filename:
72
+ value: stage2_07041521
73
+ filter_side_qa:
74
+ value: false
75
+ inference_batch_size:
76
+ value: 4
77
+ init_checkpoint:
78
+ value: ""
79
+ init_lr:
80
+ value: 0.0001
81
+ llm_name:
82
+ value: /oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300
83
+ llm_tune:
84
+ value: mid_lora
85
+ lora_alpha:
86
+ value: 16
87
+ lora_dropout:
88
+ value: 0.1
89
+ lora_r:
90
+ value: 8
91
+ lr_decay_rate:
92
+ value: 0.9
93
+ max_epochs:
94
+ value: 15
95
+ max_inference_len:
96
+ value: 128
97
+ min_inference_len:
98
+ value: 1
99
+ min_lr:
100
+ value: 1e-05
101
+ mix_dataset:
102
+ value: true
103
+ mode:
104
+ value: train
105
+ num_beams:
106
+ value: 5
107
+ num_query_token:
108
+ value: 8
109
+ num_workers:
110
+ value: 8
111
+ peft_config:
112
+ value: ""
113
+ peft_dir:
114
+ value: ""
115
+ plm_model:
116
+ value: /nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m
117
+ plm_tune:
118
+ value: freeze
119
+ precision:
120
+ value: bf16-mixed
121
+ prompt:
122
+ value: 'The protein has the following properties: '
123
+ prot_max_len:
124
+ value: 1024
125
+ q_max_len:
126
+ value: 29
127
+ root:
128
+ value: data
129
+ save_every_n_epochs:
130
+ value: 5
131
+ scheduler:
132
+ value: linear_warmup_cosine_lr
133
+ seed:
134
+ value: 42
135
+ stage1_path:
136
+ value: /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt
137
+ stage2_path:
138
+ value: ""
139
+ strategy:
140
+ value: deepspeed
141
+ text_max_len:
142
+ value: 128
143
+ use_wandb_logger:
144
+ value: true
145
+ warmup_lr:
146
+ value: 1e-06
147
+ warmup_steps:
148
+ value: 1000
149
+ weight_decay:
150
+ value: 0.05
ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153023-ggclx68a/files/output.log ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2_07041521 exists and is not empty.
2
+ Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.
3
+ LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
4
+
5
+ | Name | Type | Params | Mode
6
+ -------------------------------------------
7
+ 0 | blip2 | Blip2OPT | 7.9 B | train
8
+ -------------------------------------------
9
+ 104 M Trainable params
10
+ 7.8 B Non-trainable params
11
+ 7.9 B Total params
12
+ 31,459.025Total estimated model params size (MB)
13
+ 174 Modules in train mode
14
+ 1203 Modules in eval mode
15
+ Sanity Checking: | | 0/? [00:00<?, ?it/s]
16
+ Traceback (most recent call last):
17
+ File "/nas/shared/kilab/wangyujia/ProtT3/stage2.py", line 130, in <module>
18
+ main(get_args())
19
+ File "/nas/shared/kilab/wangyujia/ProtT3/stage2.py", line 92, in main
20
+ trainer.fit(model, datamodule=dm)#, ckpt_path=args.ckpt_path)
21
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
22
+ call._call_and_handle_interrupt(
23
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py", line 47, in _call_and_handle_interrupt
24
+ return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
25
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/launchers/subprocess_script.py", line 105, in launch
26
+ return function(*args, **kwargs)
27
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 599, in _fit_impl
28
+ self._run(model, ckpt_path=ckpt_path)
29
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1012, in _run
30
+ results = self._run_stage()
31
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1054, in _run_stage
32
+ self._run_sanity_check()
33
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1083, in _run_sanity_check
34
+ val_loop.run()
35
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/loops/utilities.py", line 179, in _decorator
36
+ return loop_run(self, *args, **kwargs)
37
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/loops/evaluation_loop.py", line 138, in run
38
+ batch, batch_idx, dataloader_idx = next(data_fetcher)
39
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/loops/fetchers.py", line 134, in __next__
40
+ batch = super().__next__()
41
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/loops/fetchers.py", line 61, in __next__
42
+ batch = next(self.iterator)
43
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/utilities/combined_loader.py", line 341, in __next__
44
+ out = next(self._iterator)
45
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/utilities/combined_loader.py", line 142, in __next__
46
+ out = next(self.iterators[0])
47
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 708, in __next__
48
+ data = self._next_data()
49
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1480, in _next_data
50
+ return self._process_data(data)
51
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1505, in _process_data
52
+ data.reraise()
53
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/_utils.py", line 733, in reraise
54
+ raise exception
55
+ NameError: Caught NameError in DataLoader worker process 0.
56
+ Original Traceback (most recent call last):
57
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 349, in _worker_loop
58
+ data = fetcher.fetch(index) # type: ignore[possibly-undefined]
59
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
60
+ data = [self.dataset[idx] for idx in possibly_batched_index]
61
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in <listcomp>
62
+ data = [self.dataset[idx] for idx in possibly_batched_index]
63
+ File "/nas/shared/kilab/wangyujia/ProtT3/data_provider/stage1_dm.py", line 397, in __getitem__
64
+ print(return_prompt)
65
+ NameError: name 'return_prompt' is not defined
66
+
67
+ [rank0]: Traceback (most recent call last):
68
+ [rank0]: File "/nas/shared/kilab/wangyujia/ProtT3/stage2.py", line 130, in <module>
69
+ [rank0]: main(get_args())
70
+ [rank0]: File "/nas/shared/kilab/wangyujia/ProtT3/stage2.py", line 92, in main
71
+ [rank0]: trainer.fit(model, datamodule=dm)#, ckpt_path=args.ckpt_path)
72
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
73
+ [rank0]: call._call_and_handle_interrupt(
74
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py", line 47, in _call_and_handle_interrupt
75
+ [rank0]: return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
76
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/launchers/subprocess_script.py", line 105, in launch
77
+ [rank0]: return function(*args, **kwargs)
78
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 599, in _fit_impl
79
+ [rank0]: self._run(model, ckpt_path=ckpt_path)
80
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1012, in _run
81
+ [rank0]: results = self._run_stage()
82
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1054, in _run_stage
83
+ [rank0]: self._run_sanity_check()
84
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1083, in _run_sanity_check
85
+ [rank0]: val_loop.run()
86
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/loops/utilities.py", line 179, in _decorator
87
+ [rank0]: return loop_run(self, *args, **kwargs)
88
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/loops/evaluation_loop.py", line 138, in run
89
+ [rank0]: batch, batch_idx, dataloader_idx = next(data_fetcher)
90
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/loops/fetchers.py", line 134, in __next__
91
+ [rank0]: batch = super().__next__()
92
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/loops/fetchers.py", line 61, in __next__
93
+ [rank0]: batch = next(self.iterator)
94
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/utilities/combined_loader.py", line 341, in __next__
95
+ [rank0]: out = next(self._iterator)
96
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/utilities/combined_loader.py", line 142, in __next__
97
+ [rank0]: out = next(self.iterators[0])
98
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 708, in __next__
99
+ [rank0]: data = self._next_data()
100
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1480, in _next_data
101
+ [rank0]: return self._process_data(data)
102
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1505, in _process_data
103
+ [rank0]: data.reraise()
104
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/_utils.py", line 733, in reraise
105
+ [rank0]: raise exception
106
+ [rank0]: NameError: Caught NameError in DataLoader worker process 0.
107
+ [rank0]: Original Traceback (most recent call last):
108
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 349, in _worker_loop
109
+ [rank0]: data = fetcher.fetch(index) # type: ignore[possibly-undefined]
110
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
111
+ [rank0]: data = [self.dataset[idx] for idx in possibly_batched_index]
112
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in <listcomp>
113
+ [rank0]: data = [self.dataset[idx] for idx in possibly_batched_index]
114
+ [rank0]: File "/nas/shared/kilab/wangyujia/ProtT3/data_provider/stage1_dm.py", line 397, in __getitem__
115
+ [rank0]: print(return_prompt)
116
+ [rank0]: NameError: name 'return_prompt' is not defined
ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153023-ggclx68a/files/requirements.txt ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pydantic_core==2.33.2
2
+ psutil==7.0.0
3
+ nvidia-cuda-nvrtc-cu12==12.4.127
4
+ mpmath==1.3.0
5
+ tzdata==2025.2
6
+ contexttimer==0.3.3
7
+ cycler==0.12.1
8
+ python-magic==0.4.27
9
+ pexpect==4.9.0
10
+ sympy==1.13.1
11
+ wrapt==1.17.2
12
+ marisa-trie==1.2.1
13
+ langcodes==3.5.0
14
+ nvidia-nvtx-cu12==12.4.127
15
+ ipython==8.36.0
16
+ opencv-python-headless==4.5.5.64
17
+ MarkupSafe==3.0.2
18
+ jsonschema-specifications==2025.4.1
19
+ wasabi==1.1.3
20
+ blinker==1.9.0
21
+ cfgv==3.4.0
22
+ numpy==2.2.6
23
+ idna==3.10
24
+ nvidia-cufile-cu12==1.11.1.6
25
+ ninja==1.11.1.4
26
+ nvidia-nccl-cu12==2.21.5
27
+ networkx==3.4.2
28
+ certifi==2025.4.26
29
+ deepspeed==0.16.10+b666844f
30
+ pure_eval==0.2.3
31
+ packaging==24.2
32
+ nltk==3.9.1
33
+ contourpy==1.3.2
34
+ pre_commit==4.2.0
35
+ nodeenv==1.9.1
36
+ setuptools==78.1.1
37
+ annotated-types==0.7.0
38
+ multidict==6.4.4
39
+ requests==2.32.3
40
+ tornado==6.5.1
41
+ triton==3.2.0
42
+ pillow==11.2.1
43
+ decord==0.6.0
44
+ shellingham==1.5.4
45
+ streamlit==1.45.1
46
+ pydeck==0.9.1
47
+ confection==0.1.5
48
+ exceptiongroup==1.3.0
49
+ prompt_toolkit==3.0.51
50
+ text-unidecode==1.3
51
+ nvidia-cufft-cu12==11.2.1.3
52
+ antlr4-python3-runtime==4.9.3
53
+ fairscale==0.4.4
54
+ rouge_score==0.1.2
55
+ nvidia-cudnn-cu12==9.1.0.70
56
+ tqdm==4.67.1
57
+ rich==14.0.0
58
+ frozenlist==1.6.0
59
+ webencodings==0.5.1
60
+ altair==5.5.0
61
+ opendatasets==0.1.22
62
+ nvidia-curand-cu12==10.3.5.147
63
+ protobuf==6.31.0
64
+ asttokens==3.0.0
65
+ wheel==0.45.1
66
+ hf-xet==1.1.2
67
+ weasel==0.4.1
68
+ aiosignal==1.3.2
69
+ absl-py==2.2.2
70
+ thinc==8.3.6
71
+ torchvision==0.21.0
72
+ pandas==2.2.3
73
+ fonttools==4.58.0
74
+ bleach==6.2.0
75
+ typing-inspection==0.4.1
76
+ ftfy==6.3.1
77
+ typing_extensions==4.13.2
78
+ nvidia-ml-py==12.575.51
79
+ python-slugify==8.0.4
80
+ lightning-utilities==0.14.3
81
+ py-cpuinfo==9.0.0
82
+ smmap==5.0.2
83
+ regex==2024.11.6
84
+ scikit-image==0.25.2
85
+ iopath==0.1.10
86
+ spacy-legacy==3.0.12
87
+ hjson==3.1.0
88
+ executing==2.2.0
89
+ kiwisolver==1.4.8
90
+ scipy==1.15.3
91
+ aiohappyeyeballs==2.6.1
92
+ toml==0.10.2
93
+ jedi==0.19.2
94
+ GitPython==3.1.44
95
+ ptyprocess==0.7.0
96
+ kaggle==1.7.4.5
97
+ braceexpand==0.1.7
98
+ wcwidth==0.2.13
99
+ nvidia-cuda-runtime-cu12==12.4.127
100
+ pytorch-lightning==2.5.1.post0
101
+ Jinja2==3.1.6
102
+ urllib3==2.4.0
103
+ watchdog==6.0.0
104
+ filelock==3.18.0
105
+ propcache==0.3.1
106
+ torch==2.6.0
107
+ nvidia-cusparse-cu12==12.3.1.170
108
+ cymem==2.0.11
109
+ nvidia-cusolver-cu12==11.6.1.9
110
+ murmurhash==1.0.13
111
+ catalogue==2.0.10
112
+ yarl==1.20.0
113
+ charset-normalizer==3.4.2
114
+ gitdb==4.0.12
115
+ matplotlib==3.10.3
116
+ portalocker==3.1.1
117
+ platformdirs==4.3.8
118
+ async-timeout==5.0.1
119
+ parso==0.8.4
120
+ markdown-it-py==3.0.0
121
+ omegaconf==2.3.0
122
+ cloudpathlib==0.21.1
123
+ nvidia-cusparselt-cu12==0.6.2
124
+ spacy-loggers==1.0.5
125
+ srsly==2.5.1
126
+ identify==2.6.12
127
+ rpds-py==0.25.1
128
+ spacy==3.8.7
129
+ matplotlib-inline==0.1.7
130
+ smart-open==7.1.0
131
+ pydantic==2.11.5
132
+ mdurl==0.1.2
133
+ virtualenv==20.31.2
134
+ pytz==2025.2
135
+ pycocotools==2.0.8
136
+ six==1.17.0
137
+ decorator==5.2.1
138
+ referencing==0.36.2
139
+ sentencepiece==0.2.0
140
+ PyYAML==6.0.2
141
+ pycocoevalcap==1.2
142
+ imageio==2.37.0
143
+ distlib==0.3.9
144
+ pyarrow==20.0.0
145
+ tenacity==9.1.2
146
+ language_data==1.3.0
147
+ nvidia-cuda-cupti-cu12==12.4.127
148
+ blis==1.3.0
149
+ Pygments==2.19.1
150
+ tifffile==2025.5.10
151
+ pyparsing==3.2.3
152
+ cachetools==5.5.2
153
+ safetensors==0.5.3
154
+ attrs==25.3.0
155
+ webdataset==0.2.111
156
+ plotly==6.1.1
157
+ nvidia-cublas-cu12==12.4.5.8
158
+ timm==0.4.12
159
+ torchmetrics==1.7.1
160
+ nvidia-nvjitlink-cu12==12.4.127
161
+ stack-data==0.6.3
162
+ python-dateutil==2.9.0.post0
163
+ lazy_loader==0.4
164
+ traitlets==5.14.3
165
+ einops==0.8.1
166
+ salesforce-lavis==1.0.2
167
+ joblib==1.5.1
168
+ msgpack==1.1.0
169
+ tokenizers==0.21.1
170
+ sentry-sdk==2.29.1
171
+ oss2==2.15.0
172
+ setproctitle==1.3.6
173
+ pip==25.1.1
174
+ cffi==1.17.1
175
+ transformers==4.52.3
176
+ narwhals==1.41.0
177
+ aliyun-python-sdk-core==2.16.0
178
+ jsonschema==4.24.0
179
+ flash-attn==2.7.1.post1
180
+ preshed==3.0.10
181
+ multiprocess==0.70.16
182
+ cryptography==45.0.3
183
+ aliyun-python-sdk-kms==2.16.5
184
+ scikit-learn==1.6.1
185
+ huggingface-hub==0.32.1
186
+ crcmod==1.7
187
+ typer==0.16.0
188
+ web.py==0.62
189
+ docker-pycreds==0.4.0
190
+ xxhash==3.5.0
191
+ bigmodelvis==0.0.1
192
+ datasets==3.6.0
193
+ more-itertools==10.7.0
194
+ yacs==0.1.8
195
+ jmespath==0.10.0
196
+ aiohttp==3.12.2
197
+ opencv-python==4.11.0.86
198
+ pycparser==2.22
199
+ threadpoolctl==3.6.0
200
+ jaraco.functools==4.1.0
201
+ click==8.2.1
202
+ wandb==0.19.11
203
+ opendelta==0.3.2
204
+ pycryptodome==3.23.0
205
+ pathlib==1.0.1
206
+ dill==0.3.8
207
+ fsspec==2025.3.0
208
+ delta-center-client==0.0.4
209
+ cheroot==10.0.1
210
+ typing_extensions==4.12.2
211
+ platformdirs==4.2.2
212
+ jaraco.text==3.12.1
213
+ packaging==24.2
214
+ inflect==7.3.1
215
+ jaraco.context==5.3.0
216
+ wheel==0.45.1
217
+ typeguard==4.3.0
218
+ more-itertools==10.3.0
219
+ tomli==2.0.1
220
+ importlib_metadata==8.0.0
221
+ backports.tarfile==1.2.0
222
+ zipp==3.19.2
223
+ jaraco.collections==5.1.0
224
+ autocommand==2.2.2
225
+ jaraco.functools==4.0.1
ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153023-ggclx68a/files/wandb-metadata.json ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.0",
4
+ "startedAt": "2025-07-04T07:30:24.142799Z",
5
+ "args": [
6
+ "--devices",
7
+ "0,1,2,3,4,5,6,7",
8
+ "--mode",
9
+ "train",
10
+ "--filename",
11
+ "stage2_07041521",
12
+ "--num_query_token",
13
+ "8",
14
+ "--save_every_n_epochs",
15
+ "5",
16
+ "--max_epochs",
17
+ "15",
18
+ "--batch_size",
19
+ "32",
20
+ "--precision",
21
+ "bf16-mixed",
22
+ "--num_workers",
23
+ "8",
24
+ "--plm_model",
25
+ "/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m",
26
+ "--bert_name",
27
+ "/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft",
28
+ "--llm_name",
29
+ "/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300",
30
+ "--llm_tune",
31
+ "mid_lora",
32
+ "--mix_dataset",
33
+ "--stage1_path",
34
+ "/nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt",
35
+ "--use_wandb_logger"
36
+ ],
37
+ "program": "/nas/shared/kilab/wangyujia/ProtT3/stage2.py",
38
+ "codePath": "stage2.py",
39
+ "email": "gia0603yucca@gmail.com",
40
+ "root": "./all_checkpoints/stage2_07041521/",
41
+ "host": "dsw-265304-b8d7644bb-bs7r7",
42
+ "executable": "/root/miniconda3/envs/protT3/bin/python",
43
+ "codePathLocal": "stage2.py",
44
+ "cpu_count": 64,
45
+ "cpu_count_logical": 64,
46
+ "gpu": "NVIDIA A800-SXM4-80GB",
47
+ "gpu_count": 8,
48
+ "disk": {
49
+ "/": {
50
+ "total": "1623302262784",
51
+ "used": "1266515968"
52
+ }
53
+ },
54
+ "memory": {
55
+ "total": "549755813888"
56
+ },
57
+ "cpu": {
58
+ "count": 64,
59
+ "countLogical": 64
60
+ },
61
+ "gpu_nvidia": [
62
+ {
63
+ "name": "NVIDIA A800-SXM4-80GB",
64
+ "memoryTotal": "85198045184",
65
+ "architecture": "Ampere"
66
+ },
67
+ {
68
+ "name": "NVIDIA A800-SXM4-80GB",
69
+ "memoryTotal": "85198045184",
70
+ "architecture": "Ampere"
71
+ },
72
+ {
73
+ "name": "NVIDIA A800-SXM4-80GB",
74
+ "memoryTotal": "85198045184",
75
+ "architecture": "Ampere"
76
+ },
77
+ {
78
+ "name": "NVIDIA A800-SXM4-80GB",
79
+ "memoryTotal": "85198045184",
80
+ "architecture": "Ampere"
81
+ },
82
+ {
83
+ "name": "NVIDIA A800-SXM4-80GB",
84
+ "memoryTotal": "85198045184",
85
+ "architecture": "Ampere"
86
+ },
87
+ {
88
+ "name": "NVIDIA A800-SXM4-80GB",
89
+ "memoryTotal": "85198045184",
90
+ "architecture": "Ampere"
91
+ },
92
+ {
93
+ "name": "NVIDIA A800-SXM4-80GB",
94
+ "memoryTotal": "85198045184",
95
+ "architecture": "Ampere"
96
+ },
97
+ {
98
+ "name": "NVIDIA A800-SXM4-80GB",
99
+ "memoryTotal": "85198045184",
100
+ "architecture": "Ampere"
101
+ }
102
+ ],
103
+ "cudaVersion": "12.1"
104
+ }
ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153023-ggclx68a/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":23}}
ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153023-ggclx68a/logs/debug-internal.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-07-04T15:30:24.192995842+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage2_07041521/wandb/run-20250704_153023-ggclx68a/logs/debug-core.log"}
2
+ {"time":"2025-07-04T15:30:27.170454248+08:00","level":"INFO","msg":"created new stream","id":"ggclx68a"}
3
+ {"time":"2025-07-04T15:30:27.170510911+08:00","level":"INFO","msg":"stream: started","id":"ggclx68a"}
4
+ {"time":"2025-07-04T15:30:27.170588716+08:00","level":"INFO","msg":"handler: started","stream_id":"ggclx68a"}
5
+ {"time":"2025-07-04T15:30:27.170589658+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"ggclx68a"}
6
+ {"time":"2025-07-04T15:30:27.170662113+08:00","level":"INFO","msg":"sender: started","stream_id":"ggclx68a"}
7
+ {"time":"2025-07-04T15:30:30.523167583+08:00","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-07-04T15:30:47.370064277+08:00","level":"INFO","msg":"stream: closing","id":"ggclx68a"}
9
+ {"time":"2025-07-04T15:30:47.370150308+08:00","level":"INFO","msg":"Stopping system monitor"}
10
+ {"time":"2025-07-04T15:30:47.378162244+08:00","level":"INFO","msg":"Stopped system monitor"}
11
+ {"time":"2025-07-04T15:30:50.622126954+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
12
+ {"time":"2025-07-04T15:30:52.339883158+08:00","level":"INFO","msg":"handler: closed","stream_id":"ggclx68a"}
13
+ {"time":"2025-07-04T15:30:52.339922114+08:00","level":"INFO","msg":"sender: closed","stream_id":"ggclx68a"}
14
+ {"time":"2025-07-04T15:30:52.33991556+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"ggclx68a"}
15
+ {"time":"2025-07-04T15:30:52.350227845+08:00","level":"INFO","msg":"stream: closed","id":"ggclx68a"}
ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153023-ggclx68a/logs/debug.log ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-07-04 15:30:24,140 INFO MainThread:46730 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
2
+ 2025-07-04 15:30:24,140 INFO MainThread:46730 [wandb_setup.py:_flush():70] Configure stats pid to 46730
3
+ 2025-07-04 15:30:24,140 INFO MainThread:46730 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
4
+ 2025-07-04 15:30:24,140 INFO MainThread:46730 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
5
+ 2025-07-04 15:30:24,140 INFO MainThread:46730 [wandb_setup.py:_flush():70] Loading settings from environment variables
6
+ 2025-07-04 15:30:24,140 INFO MainThread:46730 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage2_07041521/wandb/run-20250704_153023-ggclx68a/logs/debug.log
7
+ 2025-07-04 15:30:24,140 INFO MainThread:46730 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage2_07041521/wandb/run-20250704_153023-ggclx68a/logs/debug-internal.log
8
+ 2025-07-04 15:30:24,140 INFO MainThread:46730 [wandb_init.py:init():852] calling init triggers
9
+ 2025-07-04 15:30:24,140 INFO MainThread:46730 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-07-04 15:30:24,140 INFO MainThread:46730 [wandb_init.py:init():893] starting backend
12
+ 2025-07-04 15:30:24,140 INFO MainThread:46730 [wandb_init.py:init():897] sending inform_init request
13
+ 2025-07-04 15:30:24,142 INFO MainThread:46730 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-07-04 15:30:24,142 INFO MainThread:46730 [wandb_init.py:init():907] backend started and connected
15
+ 2025-07-04 15:30:24,143 INFO MainThread:46730 [wandb_init.py:init():1005] updated telemetry
16
+ 2025-07-04 15:30:24,144 INFO MainThread:46730 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
17
+ 2025-07-04 15:30:30,435 INFO MainThread:46730 [wandb_init.py:init():1104] starting run threads in backend
18
+ 2025-07-04 15:30:30,681 INFO MainThread:46730 [wandb_run.py:_console_start():2573] atexit reg
19
+ 2025-07-04 15:30:30,681 INFO MainThread:46730 [wandb_run.py:_redirect():2421] redirect: wrap_raw
20
+ 2025-07-04 15:30:30,687 INFO MainThread:46730 [wandb_run.py:_redirect():2490] Wrapping output streams.
21
+ 2025-07-04 15:30:30,692 INFO MainThread:46730 [wandb_run.py:_redirect():2513] Redirects installed.
22
+ 2025-07-04 15:30:30,698 INFO MainThread:46730 [wandb_init.py:init():1150] run started, returning control to user process
23
+ 2025-07-04 15:30:39,328 INFO MainThread:46730 [wandb_run.py:_config_callback():1436] config_cb None None {'filename': 'stage2_07041521', 'seed': 42, 'mode': 'train', 'strategy': 'deepspeed', 'accelerator': 'gpu', 'devices': '0,1,2,3,4,5,6,7', 'precision': 'bf16-mixed', 'max_epochs': 15, 'accumulate_grad_batches': 1, 'check_val_every_n_epoch': 1, 'enable_flash': False, 'use_wandb_logger': True, 'mix_dataset': True, 'save_every_n_epochs': 5, 'bert_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft', 'cross_attention_freq': 2, 'num_query_token': 8, 'llm_name': '/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300', 'num_beams': 5, 'do_sample': False, 'max_inference_len': 128, 'min_inference_len': 1, 'llm_tune': 'mid_lora', 'peft_config': '', 'peft_dir': '', 'plm_model': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m', 'plm_tune': 'freeze', 'lora_r': 8, 'lora_alpha': 16, 'lora_dropout': 0.1, 'enbale_gradient_checkpointing': False, 'weight_decay': 0.05, 'init_lr': 0.0001, 'min_lr': 1e-05, 'warmup_lr': 1e-06, 'warmup_steps': 1000, 'lr_decay_rate': 0.9, 'scheduler': 'linear_warmup_cosine_lr', 'stage1_path': '/nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt', 'stage2_path': '', 'init_checkpoint': '', 'caption_eval_epoch': 10, 'num_workers': 8, 'batch_size': 32, 'inference_batch_size': 4, 'root': 'data', 'text_max_len': 128, 'q_max_len': 29, 'a_max_len': 36, 'prot_max_len': 1024, 'prompt': 'The protein has the following properties: ', 'filter_side_qa': False}
24
+ 2025-07-04 15:30:47,369 INFO MsgRouterThr:46730 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153023-ggclx68a/run-ggclx68a.wandb ADDED
Binary file (18.1 kB). View file
 
ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153250-690krh73/files/config.yaml ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.19.11
4
+ m:
5
+ - "1": trainer/global_step
6
+ "6":
7
+ - 3
8
+ "7": []
9
+ python_version: 3.10.0
10
+ t:
11
+ "1":
12
+ - 1
13
+ - 5
14
+ - 9
15
+ - 11
16
+ - 33
17
+ - 41
18
+ - 49
19
+ - 53
20
+ - 55
21
+ - 63
22
+ - 103
23
+ "2":
24
+ - 1
25
+ - 5
26
+ - 9
27
+ - 11
28
+ - 33
29
+ - 41
30
+ - 49
31
+ - 53
32
+ - 55
33
+ - 63
34
+ - 103
35
+ "3":
36
+ - 7
37
+ - 23
38
+ - 55
39
+ - 66
40
+ "4": 3.10.0
41
+ "5": 0.19.11
42
+ "6": 4.52.3
43
+ "8":
44
+ - 5
45
+ "12": 0.19.11
46
+ "13": linux-x86_64
47
+ a_max_len:
48
+ value: 36
49
+ accelerator:
50
+ value: gpu
51
+ accumulate_grad_batches:
52
+ value: 1
53
+ batch_size:
54
+ value: 32
55
+ bert_name:
56
+ value: /nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft
57
+ caption_eval_epoch:
58
+ value: 10
59
+ check_val_every_n_epoch:
60
+ value: 1
61
+ cross_attention_freq:
62
+ value: 2
63
+ devices:
64
+ value: 0,1,2,3,4,5,6,7
65
+ do_sample:
66
+ value: false
67
+ enable_flash:
68
+ value: false
69
+ enbale_gradient_checkpointing:
70
+ value: false
71
+ filename:
72
+ value: stage2_07041521
73
+ filter_side_qa:
74
+ value: false
75
+ inference_batch_size:
76
+ value: 4
77
+ init_checkpoint:
78
+ value: ""
79
+ init_lr:
80
+ value: 0.0001
81
+ llm_name:
82
+ value: /oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300
83
+ llm_tune:
84
+ value: mid_lora
85
+ lora_alpha:
86
+ value: 16
87
+ lora_dropout:
88
+ value: 0.1
89
+ lora_r:
90
+ value: 8
91
+ lr_decay_rate:
92
+ value: 0.9
93
+ max_epochs:
94
+ value: 15
95
+ max_inference_len:
96
+ value: 128
97
+ min_inference_len:
98
+ value: 1
99
+ min_lr:
100
+ value: 1e-05
101
+ mix_dataset:
102
+ value: true
103
+ mode:
104
+ value: train
105
+ num_beams:
106
+ value: 5
107
+ num_query_token:
108
+ value: 8
109
+ num_workers:
110
+ value: 8
111
+ peft_config:
112
+ value: ""
113
+ peft_dir:
114
+ value: ""
115
+ plm_model:
116
+ value: /nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m
117
+ plm_tune:
118
+ value: freeze
119
+ precision:
120
+ value: bf16-mixed
121
+ prompt:
122
+ value: 'The protein has the following properties: '
123
+ prot_max_len:
124
+ value: 1024
125
+ q_max_len:
126
+ value: 29
127
+ root:
128
+ value: data
129
+ save_every_n_epochs:
130
+ value: 5
131
+ scheduler:
132
+ value: linear_warmup_cosine_lr
133
+ seed:
134
+ value: 42
135
+ stage1_path:
136
+ value: /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt
137
+ stage2_path:
138
+ value: ""
139
+ strategy:
140
+ value: deepspeed
141
+ text_max_len:
142
+ value: 128
143
+ use_wandb_logger:
144
+ value: true
145
+ warmup_lr:
146
+ value: 1e-06
147
+ warmup_steps:
148
+ value: 1000
149
+ weight_decay:
150
+ value: 0.05
ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153250-690krh73/files/output.log ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2_07041521 exists and is not empty.
2
+ Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.
3
+ LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
4
+
5
+ | Name | Type | Params | Mode
6
+ -------------------------------------------
7
+ 0 | blip2 | Blip2OPT | 7.9 B | train
8
+ -------------------------------------------
9
+ 104 M Trainable params
10
+ 7.8 B Non-trainable params
11
+ 7.9 B Total params
12
+ 31,459.025Total estimated model params size (MB)
13
+ 174 Modules in train mode
14
+ 1203 Modules in eval mode
15
+ Sanity Checking DataLoader 0: 0%| | 0/2 [00:00<?, ?it/s]
16
+ Traceback (most recent call last):
17
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py", line 47, in _call_and_handle_interrupt
18
+ return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
19
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/launchers/subprocess_script.py", line 105, in launch
20
+ return function(*args, **kwargs)
21
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 599, in _fit_impl
22
+ self._run(model, ckpt_path=ckpt_path)
23
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1012, in _run
24
+ results = self._run_stage()
25
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1054, in _run_stage
26
+ self._run_sanity_check()
27
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1083, in _run_sanity_check
28
+ val_loop.run()
29
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/loops/utilities.py", line 179, in _decorator
30
+ return loop_run(self, *args, **kwargs)
31
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/loops/evaluation_loop.py", line 145, in run
32
+ self._evaluation_step(batch, batch_idx, dataloader_idx, dataloader_iter)
33
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/loops/evaluation_loop.py", line 437, in _evaluation_step
34
+ output = call._call_strategy_hook(trainer, hook_name, *step_args)
35
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py", line 328, in _call_strategy_hook
36
+ output = fn(*args, **kwargs)
37
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py", line 411, in validation_step
38
+ return self._forward_redirection(self.model, self.lightning_module, "validation_step", *args, **kwargs)
39
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py", line 641, in __call__
40
+ wrapper_output = wrapper_module(*args, **kwargs)
41
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
42
+ return self._call_impl(*args, **kwargs)
43
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
44
+ return forward_call(*args, **kwargs)
45
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/deepspeed/utils/nvtx.py", line 20, in wrapped_fn
46
+ ret_val = func(*args, **kwargs)
47
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/deepspeed/runtime/engine.py", line 2054, in forward
48
+ loss = self.module(*inputs, **kwargs)
49
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
50
+ return self._call_impl(*args, **kwargs)
51
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1845, in _call_impl
52
+ return inner()
53
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1793, in inner
54
+ result = forward_call(*args, **kwargs)
55
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py", line 634, in wrapped_forward
56
+ out = method(*_args, **_kwargs)
57
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
58
+ return func(*args, **kwargs)
59
+ File "/nas/shared/kilab/wangyujia/ProtT3/model/blip2_stage2.py", line 119, in validation_step
60
+ loss = self.blip2(batch)
61
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
62
+ return self._call_impl(*args, **kwargs)
63
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
64
+ return forward_call(*args, **kwargs)
65
+ File "/nas/shared/kilab/wangyujia/ProtT3/model/blip2_opt.py", line 222, in forward
66
+ prot_batch, prompt_batch, text_batch = batch
67
+ ValueError: not enough values to unpack (expected 3, got 2)
68
+
69
+ During handling of the above exception, another exception occurred:
70
+
71
+ Traceback (most recent call last):
72
+ File "/nas/shared/kilab/wangyujia/ProtT3/stage2.py", line 130, in <module>
73
+ main(get_args())
74
+ File "/nas/shared/kilab/wangyujia/ProtT3/stage2.py", line 92, in main
75
+ trainer.fit(model, datamodule=dm)#, ckpt_path=args.ckpt_path)
76
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
77
+ call._call_and_handle_interrupt(
78
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py", line 69, in _call_and_handle_interrupt
79
+ trainer._teardown()
80
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1035, in _teardown
81
+ self.strategy.teardown()
82
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/ddp.py", line 419, in teardown
83
+ super().teardown()
84
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/parallel.py", line 134, in teardown
85
+ super().teardown()
86
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py", line 536, in teardown
87
+ self.lightning_module.cpu()
88
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/lightning_fabric/utilities/device_dtype_mixin.py", line 82, in cpu
89
+ return super().cpu()
90
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1121, in cpu
91
+ return self._apply(lambda t: t.cpu())
92
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 903, in _apply
93
+ module._apply(fn)
94
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 903, in _apply
95
+ module._apply(fn)
96
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 903, in _apply
97
+ module._apply(fn)
98
+ [Previous line repeated 4 more times]
99
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 930, in _apply
100
+ param_applied = fn(param)
101
+ File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1121, in <lambda>
102
+ return self._apply(lambda t: t.cpu())
103
+ KeyboardInterrupt
104
+ [rank0]: Traceback (most recent call last):
105
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py", line 47, in _call_and_handle_interrupt
106
+ [rank0]: return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
107
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/launchers/subprocess_script.py", line 105, in launch
108
+ [rank0]: return function(*args, **kwargs)
109
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 599, in _fit_impl
110
+ [rank0]: self._run(model, ckpt_path=ckpt_path)
111
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1012, in _run
112
+ [rank0]: results = self._run_stage()
113
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1054, in _run_stage
114
+ [rank0]: self._run_sanity_check()
115
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1083, in _run_sanity_check
116
+ [rank0]: val_loop.run()
117
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/loops/utilities.py", line 179, in _decorator
118
+ [rank0]: return loop_run(self, *args, **kwargs)
119
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/loops/evaluation_loop.py", line 145, in run
120
+ [rank0]: self._evaluation_step(batch, batch_idx, dataloader_idx, dataloader_iter)
121
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/loops/evaluation_loop.py", line 437, in _evaluation_step
122
+ [rank0]: output = call._call_strategy_hook(trainer, hook_name, *step_args)
123
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py", line 328, in _call_strategy_hook
124
+ [rank0]: output = fn(*args, **kwargs)
125
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py", line 411, in validation_step
126
+ [rank0]: return self._forward_redirection(self.model, self.lightning_module, "validation_step", *args, **kwargs)
127
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py", line 641, in __call__
128
+ [rank0]: wrapper_output = wrapper_module(*args, **kwargs)
129
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
130
+ [rank0]: return self._call_impl(*args, **kwargs)
131
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
132
+ [rank0]: return forward_call(*args, **kwargs)
133
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/deepspeed/utils/nvtx.py", line 20, in wrapped_fn
134
+ [rank0]: ret_val = func(*args, **kwargs)
135
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/deepspeed/runtime/engine.py", line 2054, in forward
136
+ [rank0]: loss = self.module(*inputs, **kwargs)
137
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
138
+ [rank0]: return self._call_impl(*args, **kwargs)
139
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1845, in _call_impl
140
+ [rank0]: return inner()
141
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1793, in inner
142
+ [rank0]: result = forward_call(*args, **kwargs)
143
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py", line 634, in wrapped_forward
144
+ [rank0]: out = method(*_args, **_kwargs)
145
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
146
+ [rank0]: return func(*args, **kwargs)
147
+ [rank0]: File "/nas/shared/kilab/wangyujia/ProtT3/model/blip2_stage2.py", line 119, in validation_step
148
+ [rank0]: loss = self.blip2(batch)
149
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
150
+ [rank0]: return self._call_impl(*args, **kwargs)
151
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
152
+ [rank0]: return forward_call(*args, **kwargs)
153
+ [rank0]: File "/nas/shared/kilab/wangyujia/ProtT3/model/blip2_opt.py", line 222, in forward
154
+ [rank0]: prot_batch, prompt_batch, text_batch = batch
155
+ [rank0]: ValueError: not enough values to unpack (expected 3, got 2)
156
+
157
+ [rank0]: During handling of the above exception, another exception occurred:
158
+
159
+ [rank0]: Traceback (most recent call last):
160
+ [rank0]: File "/nas/shared/kilab/wangyujia/ProtT3/stage2.py", line 130, in <module>
161
+ [rank0]: main(get_args())
162
+ [rank0]: File "/nas/shared/kilab/wangyujia/ProtT3/stage2.py", line 92, in main
163
+ [rank0]: trainer.fit(model, datamodule=dm)#, ckpt_path=args.ckpt_path)
164
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
165
+ [rank0]: call._call_and_handle_interrupt(
166
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py", line 69, in _call_and_handle_interrupt
167
+ [rank0]: trainer._teardown()
168
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1035, in _teardown
169
+ [rank0]: self.strategy.teardown()
170
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/ddp.py", line 419, in teardown
171
+ [rank0]: super().teardown()
172
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/parallel.py", line 134, in teardown
173
+ [rank0]: super().teardown()
174
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py", line 536, in teardown
175
+ [rank0]: self.lightning_module.cpu()
176
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/lightning_fabric/utilities/device_dtype_mixin.py", line 82, in cpu
177
+ [rank0]: return super().cpu()
178
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1121, in cpu
179
+ [rank0]: return self._apply(lambda t: t.cpu())
180
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 903, in _apply
181
+ [rank0]: module._apply(fn)
182
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 903, in _apply
183
+ [rank0]: module._apply(fn)
184
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 903, in _apply
185
+ [rank0]: module._apply(fn)
186
+ [rank0]: [Previous line repeated 4 more times]
187
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 930, in _apply
188
+ [rank0]: param_applied = fn(param)
189
+ [rank0]: File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1121, in <lambda>
190
+ [rank0]: return self._apply(lambda t: t.cpu())
191
+ [rank0]: KeyboardInterrupt
ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153250-690krh73/files/requirements.txt ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pydantic_core==2.33.2
2
+ psutil==7.0.0
3
+ nvidia-cuda-nvrtc-cu12==12.4.127
4
+ mpmath==1.3.0
5
+ tzdata==2025.2
6
+ contexttimer==0.3.3
7
+ cycler==0.12.1
8
+ python-magic==0.4.27
9
+ pexpect==4.9.0
10
+ sympy==1.13.1
11
+ wrapt==1.17.2
12
+ marisa-trie==1.2.1
13
+ langcodes==3.5.0
14
+ nvidia-nvtx-cu12==12.4.127
15
+ ipython==8.36.0
16
+ opencv-python-headless==4.5.5.64
17
+ MarkupSafe==3.0.2
18
+ jsonschema-specifications==2025.4.1
19
+ wasabi==1.1.3
20
+ blinker==1.9.0
21
+ cfgv==3.4.0
22
+ numpy==2.2.6
23
+ idna==3.10
24
+ nvidia-cufile-cu12==1.11.1.6
25
+ ninja==1.11.1.4
26
+ nvidia-nccl-cu12==2.21.5
27
+ networkx==3.4.2
28
+ certifi==2025.4.26
29
+ deepspeed==0.16.10+b666844f
30
+ pure_eval==0.2.3
31
+ packaging==24.2
32
+ nltk==3.9.1
33
+ contourpy==1.3.2
34
+ pre_commit==4.2.0
35
+ nodeenv==1.9.1
36
+ setuptools==78.1.1
37
+ annotated-types==0.7.0
38
+ multidict==6.4.4
39
+ requests==2.32.3
40
+ tornado==6.5.1
41
+ triton==3.2.0
42
+ pillow==11.2.1
43
+ decord==0.6.0
44
+ shellingham==1.5.4
45
+ streamlit==1.45.1
46
+ pydeck==0.9.1
47
+ confection==0.1.5
48
+ exceptiongroup==1.3.0
49
+ prompt_toolkit==3.0.51
50
+ text-unidecode==1.3
51
+ nvidia-cufft-cu12==11.2.1.3
52
+ antlr4-python3-runtime==4.9.3
53
+ fairscale==0.4.4
54
+ rouge_score==0.1.2
55
+ nvidia-cudnn-cu12==9.1.0.70
56
+ tqdm==4.67.1
57
+ rich==14.0.0
58
+ frozenlist==1.6.0
59
+ webencodings==0.5.1
60
+ altair==5.5.0
61
+ opendatasets==0.1.22
62
+ nvidia-curand-cu12==10.3.5.147
63
+ protobuf==6.31.0
64
+ asttokens==3.0.0
65
+ wheel==0.45.1
66
+ hf-xet==1.1.2
67
+ weasel==0.4.1
68
+ aiosignal==1.3.2
69
+ absl-py==2.2.2
70
+ thinc==8.3.6
71
+ torchvision==0.21.0
72
+ pandas==2.2.3
73
+ fonttools==4.58.0
74
+ bleach==6.2.0
75
+ typing-inspection==0.4.1
76
+ ftfy==6.3.1
77
+ typing_extensions==4.13.2
78
+ nvidia-ml-py==12.575.51
79
+ python-slugify==8.0.4
80
+ lightning-utilities==0.14.3
81
+ py-cpuinfo==9.0.0
82
+ smmap==5.0.2
83
+ regex==2024.11.6
84
+ scikit-image==0.25.2
85
+ iopath==0.1.10
86
+ spacy-legacy==3.0.12
87
+ hjson==3.1.0
88
+ executing==2.2.0
89
+ kiwisolver==1.4.8
90
+ scipy==1.15.3
91
+ aiohappyeyeballs==2.6.1
92
+ toml==0.10.2
93
+ jedi==0.19.2
94
+ GitPython==3.1.44
95
+ ptyprocess==0.7.0
96
+ kaggle==1.7.4.5
97
+ braceexpand==0.1.7
98
+ wcwidth==0.2.13
99
+ nvidia-cuda-runtime-cu12==12.4.127
100
+ pytorch-lightning==2.5.1.post0
101
+ Jinja2==3.1.6
102
+ urllib3==2.4.0
103
+ watchdog==6.0.0
104
+ filelock==3.18.0
105
+ propcache==0.3.1
106
+ torch==2.6.0
107
+ nvidia-cusparse-cu12==12.3.1.170
108
+ cymem==2.0.11
109
+ nvidia-cusolver-cu12==11.6.1.9
110
+ murmurhash==1.0.13
111
+ catalogue==2.0.10
112
+ yarl==1.20.0
113
+ charset-normalizer==3.4.2
114
+ gitdb==4.0.12
115
+ matplotlib==3.10.3
116
+ portalocker==3.1.1
117
+ platformdirs==4.3.8
118
+ async-timeout==5.0.1
119
+ parso==0.8.4
120
+ markdown-it-py==3.0.0
121
+ omegaconf==2.3.0
122
+ cloudpathlib==0.21.1
123
+ nvidia-cusparselt-cu12==0.6.2
124
+ spacy-loggers==1.0.5
125
+ srsly==2.5.1
126
+ identify==2.6.12
127
+ rpds-py==0.25.1
128
+ spacy==3.8.7
129
+ matplotlib-inline==0.1.7
130
+ smart-open==7.1.0
131
+ pydantic==2.11.5
132
+ mdurl==0.1.2
133
+ virtualenv==20.31.2
134
+ pytz==2025.2
135
+ pycocotools==2.0.8
136
+ six==1.17.0
137
+ decorator==5.2.1
138
+ referencing==0.36.2
139
+ sentencepiece==0.2.0
140
+ PyYAML==6.0.2
141
+ pycocoevalcap==1.2
142
+ imageio==2.37.0
143
+ distlib==0.3.9
144
+ pyarrow==20.0.0
145
+ tenacity==9.1.2
146
+ language_data==1.3.0
147
+ nvidia-cuda-cupti-cu12==12.4.127
148
+ blis==1.3.0
149
+ Pygments==2.19.1
150
+ tifffile==2025.5.10
151
+ pyparsing==3.2.3
152
+ cachetools==5.5.2
153
+ safetensors==0.5.3
154
+ attrs==25.3.0
155
+ webdataset==0.2.111
156
+ plotly==6.1.1
157
+ nvidia-cublas-cu12==12.4.5.8
158
+ timm==0.4.12
159
+ torchmetrics==1.7.1
160
+ nvidia-nvjitlink-cu12==12.4.127
161
+ stack-data==0.6.3
162
+ python-dateutil==2.9.0.post0
163
+ lazy_loader==0.4
164
+ traitlets==5.14.3
165
+ einops==0.8.1
166
+ salesforce-lavis==1.0.2
167
+ joblib==1.5.1
168
+ msgpack==1.1.0
169
+ tokenizers==0.21.1
170
+ sentry-sdk==2.29.1
171
+ oss2==2.15.0
172
+ setproctitle==1.3.6
173
+ pip==25.1.1
174
+ cffi==1.17.1
175
+ transformers==4.52.3
176
+ narwhals==1.41.0
177
+ aliyun-python-sdk-core==2.16.0
178
+ jsonschema==4.24.0
179
+ flash-attn==2.7.1.post1
180
+ preshed==3.0.10
181
+ multiprocess==0.70.16
182
+ cryptography==45.0.3
183
+ aliyun-python-sdk-kms==2.16.5
184
+ scikit-learn==1.6.1
185
+ huggingface-hub==0.32.1
186
+ crcmod==1.7
187
+ typer==0.16.0
188
+ web.py==0.62
189
+ docker-pycreds==0.4.0
190
+ xxhash==3.5.0
191
+ bigmodelvis==0.0.1
192
+ datasets==3.6.0
193
+ more-itertools==10.7.0
194
+ yacs==0.1.8
195
+ jmespath==0.10.0
196
+ aiohttp==3.12.2
197
+ opencv-python==4.11.0.86
198
+ pycparser==2.22
199
+ threadpoolctl==3.6.0
200
+ jaraco.functools==4.1.0
201
+ click==8.2.1
202
+ wandb==0.19.11
203
+ opendelta==0.3.2
204
+ pycryptodome==3.23.0
205
+ pathlib==1.0.1
206
+ dill==0.3.8
207
+ fsspec==2025.3.0
208
+ delta-center-client==0.0.4
209
+ cheroot==10.0.1
210
+ typing_extensions==4.12.2
211
+ platformdirs==4.2.2
212
+ jaraco.text==3.12.1
213
+ packaging==24.2
214
+ inflect==7.3.1
215
+ jaraco.context==5.3.0
216
+ wheel==0.45.1
217
+ typeguard==4.3.0
218
+ more-itertools==10.3.0
219
+ tomli==2.0.1
220
+ importlib_metadata==8.0.0
221
+ backports.tarfile==1.2.0
222
+ zipp==3.19.2
223
+ jaraco.collections==5.1.0
224
+ autocommand==2.2.2
225
+ jaraco.functools==4.0.1
ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153250-690krh73/files/wandb-metadata.json ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.0",
4
+ "startedAt": "2025-07-04T07:32:50.809990Z",
5
+ "args": [
6
+ "--devices",
7
+ "0,1,2,3,4,5,6,7",
8
+ "--mode",
9
+ "train",
10
+ "--filename",
11
+ "stage2_07041521",
12
+ "--num_query_token",
13
+ "8",
14
+ "--save_every_n_epochs",
15
+ "5",
16
+ "--max_epochs",
17
+ "15",
18
+ "--batch_size",
19
+ "32",
20
+ "--precision",
21
+ "bf16-mixed",
22
+ "--num_workers",
23
+ "8",
24
+ "--plm_model",
25
+ "/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m",
26
+ "--bert_name",
27
+ "/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft",
28
+ "--llm_name",
29
+ "/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300",
30
+ "--llm_tune",
31
+ "mid_lora",
32
+ "--mix_dataset",
33
+ "--stage1_path",
34
+ "/nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt",
35
+ "--use_wandb_logger"
36
+ ],
37
+ "program": "/nas/shared/kilab/wangyujia/ProtT3/stage2.py",
38
+ "codePath": "stage2.py",
39
+ "email": "gia0603yucca@gmail.com",
40
+ "root": "./all_checkpoints/stage2_07041521/",
41
+ "host": "dsw-265304-b8d7644bb-bs7r7",
42
+ "executable": "/root/miniconda3/envs/protT3/bin/python",
43
+ "codePathLocal": "stage2.py",
44
+ "cpu_count": 64,
45
+ "cpu_count_logical": 64,
46
+ "gpu": "NVIDIA A800-SXM4-80GB",
47
+ "gpu_count": 8,
48
+ "disk": {
49
+ "/": {
50
+ "total": "1623302262784",
51
+ "used": "1266544640"
52
+ }
53
+ },
54
+ "memory": {
55
+ "total": "549755813888"
56
+ },
57
+ "cpu": {
58
+ "count": 64,
59
+ "countLogical": 64
60
+ },
61
+ "gpu_nvidia": [
62
+ {
63
+ "name": "NVIDIA A800-SXM4-80GB",
64
+ "memoryTotal": "85198045184",
65
+ "architecture": "Ampere"
66
+ },
67
+ {
68
+ "name": "NVIDIA A800-SXM4-80GB",
69
+ "memoryTotal": "85198045184",
70
+ "architecture": "Ampere"
71
+ },
72
+ {
73
+ "name": "NVIDIA A800-SXM4-80GB",
74
+ "memoryTotal": "85198045184",
75
+ "architecture": "Ampere"
76
+ },
77
+ {
78
+ "name": "NVIDIA A800-SXM4-80GB",
79
+ "memoryTotal": "85198045184",
80
+ "architecture": "Ampere"
81
+ },
82
+ {
83
+ "name": "NVIDIA A800-SXM4-80GB",
84
+ "memoryTotal": "85198045184",
85
+ "architecture": "Ampere"
86
+ },
87
+ {
88
+ "name": "NVIDIA A800-SXM4-80GB",
89
+ "memoryTotal": "85198045184",
90
+ "architecture": "Ampere"
91
+ },
92
+ {
93
+ "name": "NVIDIA A800-SXM4-80GB",
94
+ "memoryTotal": "85198045184",
95
+ "architecture": "Ampere"
96
+ },
97
+ {
98
+ "name": "NVIDIA A800-SXM4-80GB",
99
+ "memoryTotal": "85198045184",
100
+ "architecture": "Ampere"
101
+ }
102
+ ],
103
+ "cudaVersion": "12.1"
104
+ }
ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153250-690krh73/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":16}}
ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153250-690krh73/logs/debug-internal.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-07-04T15:32:50.852026927+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage2_07041521/wandb/run-20250704_153250-690krh73/logs/debug-core.log"}
2
+ {"time":"2025-07-04T15:32:52.202641273+08:00","level":"INFO","msg":"created new stream","id":"690krh73"}
3
+ {"time":"2025-07-04T15:32:52.202708431+08:00","level":"INFO","msg":"handler: started","stream_id":"690krh73"}
4
+ {"time":"2025-07-04T15:32:52.202727749+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"690krh73"}
5
+ {"time":"2025-07-04T15:32:52.202728265+08:00","level":"INFO","msg":"stream: started","id":"690krh73"}
6
+ {"time":"2025-07-04T15:32:52.202837136+08:00","level":"INFO","msg":"sender: started","stream_id":"690krh73"}
7
+ {"time":"2025-07-04T15:32:53.598926263+08:00","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-07-04T15:33:07.744102695+08:00","level":"INFO","msg":"stream: closing","id":"690krh73"}
9
+ {"time":"2025-07-04T15:33:07.74416603+08:00","level":"INFO","msg":"Stopping system monitor"}
10
+ {"time":"2025-07-04T15:33:07.76280695+08:00","level":"INFO","msg":"Stopped system monitor"}
11
+ {"time":"2025-07-04T15:33:09.484600235+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
12
+ {"time":"2025-07-04T15:33:11.350265605+08:00","level":"INFO","msg":"handler: closed","stream_id":"690krh73"}
13
+ {"time":"2025-07-04T15:33:11.35028236+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"690krh73"}
14
+ {"time":"2025-07-04T15:33:11.350307246+08:00","level":"INFO","msg":"sender: closed","stream_id":"690krh73"}
15
+ {"time":"2025-07-04T15:33:11.363954115+08:00","level":"INFO","msg":"stream: closed","id":"690krh73"}
ProtT3/all_checkpoints/stage2_07041521/wandb/run-20250704_153250-690krh73/run-690krh73.wandb ADDED
Binary file (24 kB). View file