Add files using upload-large-folder tool
Browse files- BIO/ablation/metal_ion_binding_test.jsonl +718 -0
- ProtT3/all_checkpoints/stage1_06221723/wandb/run-20250622_192645-zmuhvn72/files/wandb-summary.json +1 -0
- ProtT3/all_checkpoints/stage1_06221723/wandb/run-20250622_192645-zmuhvn72/logs/debug-internal.log +15 -0
- ProtT3/all_checkpoints/stage1_06221723/wandb/run-20250622_192645-zmuhvn72/logs/debug.log +24 -0
- ProtT3/all_checkpoints/stage1_06261435/wandb/debug-internal.log +15 -0
- ProtT3/all_checkpoints/stage1_06261435/wandb/debug.log +23 -0
- ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_143705-0goy7nyw/logs/debug-internal.log +3 -0
- ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_143705-0goy7nyw/logs/debug.log +94 -0
- ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/files/output.log +19 -0
- ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/files/requirements.txt +225 -0
- ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/files/wandb-metadata.json +98 -0
- ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/files/wandb-summary.json +1 -0
- ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/logs/debug-internal.log +12 -0
- ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/logs/debug.log +24 -0
- ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/run-coknhy79.wandb +0 -0
- ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/files/output.log +28 -0
- ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/files/requirements.txt +225 -0
- ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/files/wandb-metadata.json +98 -0
- ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/logs/debug-internal.log +15 -0
- ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/logs/debug.log +23 -0
- ProtT3/all_checkpoints/stage1_06262112/wandb/debug-internal.log +73 -0
- ProtT3/all_checkpoints/stage1_06262112/wandb/debug.log +23 -0
- ProtT3/all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/files/output.log +28 -0
- ProtT3/all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/files/requirements.txt +225 -0
- ProtT3/all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/files/wandb-metadata.json +98 -0
- ProtT3/all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/logs/debug-internal.log +73 -0
- ProtT3/all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/logs/debug.log +23 -0
- ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/debug-internal.log +17 -0
- ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/debug.log +24 -0
- ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/files/output.log +16 -0
- ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/files/requirements.txt +225 -0
- ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/files/wandb-metadata.json +103 -0
- ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/logs/debug-internal.log +7 -0
- ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/logs/debug.log +23 -0
- ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/run-2bo0nfvt.wandb +0 -0
- ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/files/output.log +4 -0
- ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/files/requirements.txt +225 -0
- ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/files/wandb-metadata.json +103 -0
- ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/logs/debug-internal.log +7 -0
- ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/logs/debug.log +22 -0
- ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/run-ftp1v3gy.wandb +0 -0
- ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/files/output.log +4 -0
- ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/files/requirements.txt +225 -0
- ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/files/wandb-metadata.json +103 -0
- ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/logs/debug-internal.log +7 -0
- ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/logs/debug.log +22 -0
- ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/run-p1815hm9.wandb +0 -0
- ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_190145-vu5mgolt/files/output.log +4 -0
- ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_190145-vu5mgolt/files/requirements.txt +225 -0
- ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_190145-vu5mgolt/run-vu5mgolt.wandb +0 -0
BIO/ablation/metal_ion_binding_test.jsonl
ADDED
|
@@ -0,0 +1,718 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 2 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 3 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 4 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 5 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 6 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 7 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 8 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 9 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 10 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 11 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 12 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 13 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 14 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 15 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 16 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 17 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 18 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 19 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 20 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 21 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 22 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 23 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 24 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 25 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 26 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 27 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 28 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 29 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 30 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 31 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 32 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 33 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 34 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 35 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 36 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 37 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 38 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 39 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 40 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 41 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 42 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 43 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 44 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 45 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 46 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 47 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 48 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 49 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 50 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 51 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 52 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 53 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 54 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 55 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 56 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 57 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 58 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 59 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 60 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 61 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 62 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 63 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 64 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 65 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 66 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 67 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 68 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 69 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 70 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 71 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 72 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 73 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 74 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 75 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 76 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 77 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 78 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 79 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 80 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 81 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 82 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 83 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 84 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 85 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 86 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 87 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 88 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 89 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 90 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 91 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 92 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 93 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 94 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 95 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 96 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 97 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 98 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 99 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 100 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 101 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 102 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 103 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 104 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 105 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 106 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 107 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 108 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 109 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 110 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 111 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 112 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 113 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 114 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 115 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 116 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 117 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 118 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 119 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 120 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 121 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 122 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 123 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 124 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 125 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 126 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 127 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 128 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 129 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 130 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 131 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 132 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 133 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 134 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 135 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 136 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 137 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 138 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 139 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 140 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 141 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 142 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 143 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 144 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 145 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 146 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 147 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 148 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 149 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 150 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 151 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 152 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 153 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 154 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 155 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 156 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 157 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 158 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 159 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 160 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 161 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 162 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 163 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 164 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 165 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 166 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 167 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 168 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 169 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 170 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 171 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 172 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 173 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 174 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 175 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 176 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 177 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 178 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 179 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 180 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 181 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 182 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 183 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 184 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 185 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 186 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 187 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 188 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 189 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 190 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 191 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 192 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 193 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 194 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 195 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 196 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 197 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 198 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 199 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 200 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 201 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 202 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 203 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 204 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 205 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 206 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 207 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 208 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 209 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 210 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 211 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 212 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 213 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 214 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 215 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 216 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 217 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 218 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 219 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 220 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 221 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 222 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 223 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 224 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 225 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 226 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 227 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 228 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 229 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 230 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 231 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 232 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 233 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 234 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 235 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 236 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 237 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 238 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 239 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 240 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 241 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 242 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 243 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 244 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 245 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 246 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 247 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 248 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 249 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 250 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 251 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 252 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 253 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 254 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 255 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 256 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 257 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 258 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 259 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 260 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 261 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 262 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 263 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 264 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 265 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 266 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 267 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 268 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 269 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 270 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 271 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 272 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 273 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 274 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 275 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 276 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 277 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 278 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 279 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 280 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 281 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 282 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 283 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 284 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 285 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 286 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 287 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 288 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 289 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 290 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 291 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 292 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 293 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 294 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 295 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 296 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 297 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 298 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 299 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 300 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 301 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 302 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 303 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 304 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 305 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 306 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 307 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 308 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 309 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 310 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 311 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 312 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 313 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 314 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 315 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 316 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 317 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 318 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 319 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 320 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 321 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 322 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 323 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 324 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 325 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 326 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 327 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 328 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 329 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 330 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 331 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 332 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 333 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 334 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 335 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 336 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 337 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 338 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 339 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 340 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 341 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 342 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 343 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 344 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 345 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 346 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 347 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 348 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 349 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 350 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 351 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 352 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 353 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 354 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 355 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 356 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 357 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 358 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 359 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 360 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 361 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 362 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 363 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 364 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 365 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 366 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 367 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 368 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 369 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 370 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 371 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 372 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 373 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 374 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 375 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 376 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 377 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 378 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 379 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 380 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 381 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 382 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 383 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 384 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 385 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 386 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 387 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 388 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 389 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 390 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 391 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 392 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 393 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 394 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 395 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 396 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 397 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 398 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 399 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 400 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 401 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 402 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 403 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 404 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 405 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 406 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 407 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 408 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 409 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 410 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 411 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 412 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 413 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 414 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 415 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 416 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 417 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 418 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 419 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 420 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 421 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 422 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 423 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 424 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 425 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 426 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 427 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 428 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 429 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 430 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 431 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 432 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 433 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 434 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 435 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 436 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 437 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 438 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 439 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 440 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 441 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 442 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 443 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 444 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 445 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 446 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 447 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 448 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 449 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 450 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 451 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 452 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 453 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 454 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 455 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 456 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 457 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 458 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 459 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 460 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 461 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 462 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 463 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 464 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 465 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 466 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 467 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 468 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 469 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 470 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 471 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 472 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 473 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 474 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 475 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 476 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 477 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 478 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 479 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 480 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 481 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 482 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 483 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 484 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 485 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 486 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 487 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 488 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 489 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 490 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 491 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 492 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 493 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 494 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 495 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 496 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 497 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 498 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 499 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 500 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 501 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 502 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 503 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 504 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 505 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 506 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 507 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 508 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 509 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 510 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 511 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 512 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 513 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 514 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 515 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 516 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 517 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 518 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 519 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 520 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 521 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 522 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 523 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 524 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 525 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 526 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 527 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 528 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 529 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 530 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 531 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 532 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 533 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 534 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 535 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 536 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 537 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 538 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 539 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 540 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 541 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 542 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 543 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 544 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 545 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 546 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 547 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 548 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 549 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 550 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 551 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 552 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 553 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 554 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 555 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 556 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 557 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 558 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 559 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 560 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 561 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 562 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 563 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 564 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 565 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 566 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 567 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 568 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 569 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 570 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 571 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 572 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 573 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 574 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 575 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 576 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 577 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 578 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 579 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 580 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 581 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 582 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 583 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 584 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 585 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 586 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 587 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 588 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 589 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 590 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 591 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 592 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 593 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 594 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 595 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 596 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 597 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 598 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 599 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 600 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 601 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 602 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 603 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 604 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 605 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 606 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 607 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 608 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 609 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 610 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 611 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 612 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 613 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 614 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 615 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 616 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 617 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 618 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 619 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 620 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 621 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 622 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 623 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 624 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 625 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 626 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 627 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 628 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 629 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 630 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 631 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 632 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 633 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 634 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 635 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 636 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 637 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 638 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 639 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 640 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 641 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 642 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 643 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 644 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 645 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 646 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 647 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 648 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 649 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 650 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 651 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 652 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 653 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 654 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 655 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 656 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 657 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 658 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 659 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 660 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 661 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 662 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 663 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 664 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 665 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 666 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 667 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 668 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 669 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 670 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 671 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 672 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 673 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 674 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 675 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 676 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 677 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 678 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 679 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 680 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 681 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 682 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 683 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 684 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 685 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 686 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 687 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 688 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 689 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 690 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 691 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 692 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 693 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 694 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 695 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 696 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 697 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 698 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 699 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 700 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 701 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 702 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 703 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 704 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 705 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 706 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 707 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 708 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 709 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 710 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 711 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 712 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 713 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 714 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 715 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 716 |
+
{"reference_answer": "0", "generated_answer": "1"}
|
| 717 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
| 718 |
+
{"reference_answer": "1", "generated_answer": "1"}
|
ProtT3/all_checkpoints/stage1_06221723/wandb/run-20250622_192645-zmuhvn72/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_wandb":{"runtime":42}}
|
ProtT3/all_checkpoints/stage1_06221723/wandb/run-20250622_192645-zmuhvn72/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-06-22T19:26:45.459589355+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage1_06221723/wandb/run-20250622_192645-zmuhvn72/logs/debug-core.log"}
|
| 2 |
+
{"time":"2025-06-22T19:26:46.458621861+08:00","level":"INFO","msg":"created new stream","id":"zmuhvn72"}
|
| 3 |
+
{"time":"2025-06-22T19:26:46.458658451+08:00","level":"INFO","msg":"stream: started","id":"zmuhvn72"}
|
| 4 |
+
{"time":"2025-06-22T19:26:46.458694604+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"zmuhvn72"}
|
| 5 |
+
{"time":"2025-06-22T19:26:46.458731318+08:00","level":"INFO","msg":"sender: started","stream_id":"zmuhvn72"}
|
| 6 |
+
{"time":"2025-06-22T19:26:46.45883181+08:00","level":"INFO","msg":"handler: started","stream_id":"zmuhvn72"}
|
| 7 |
+
{"time":"2025-06-22T19:26:47.693527925+08:00","level":"INFO","msg":"Starting system monitor"}
|
| 8 |
+
{"time":"2025-06-22T19:27:27.828788085+08:00","level":"INFO","msg":"stream: closing","id":"zmuhvn72"}
|
| 9 |
+
{"time":"2025-06-22T19:27:27.828880453+08:00","level":"INFO","msg":"Stopping system monitor"}
|
| 10 |
+
{"time":"2025-06-22T19:27:27.829610297+08:00","level":"INFO","msg":"Stopped system monitor"}
|
| 11 |
+
{"time":"2025-06-22T19:27:29.428896311+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 12 |
+
{"time":"2025-06-22T19:27:30.389945053+08:00","level":"INFO","msg":"handler: closed","stream_id":"zmuhvn72"}
|
| 13 |
+
{"time":"2025-06-22T19:27:30.38999824+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"zmuhvn72"}
|
| 14 |
+
{"time":"2025-06-22T19:27:30.390015358+08:00","level":"INFO","msg":"sender: closed","stream_id":"zmuhvn72"}
|
| 15 |
+
{"time":"2025-06-22T19:27:30.395367992+08:00","level":"INFO","msg":"stream: closed","id":"zmuhvn72"}
|
ProtT3/all_checkpoints/stage1_06221723/wandb/run-20250622_192645-zmuhvn72/logs/debug.log
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-06-22 19:26:45,451 INFO MainThread:104393 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
|
| 2 |
+
2025-06-22 19:26:45,451 INFO MainThread:104393 [wandb_setup.py:_flush():70] Configure stats pid to 104393
|
| 3 |
+
2025-06-22 19:26:45,451 INFO MainThread:104393 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
|
| 4 |
+
2025-06-22 19:26:45,451 INFO MainThread:104393 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
|
| 5 |
+
2025-06-22 19:26:45,451 INFO MainThread:104393 [wandb_setup.py:_flush():70] Loading settings from environment variables
|
| 6 |
+
2025-06-22 19:26:45,451 INFO MainThread:104393 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage1_06221723/wandb/run-20250622_192645-zmuhvn72/logs/debug.log
|
| 7 |
+
2025-06-22 19:26:45,451 INFO MainThread:104393 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage1_06221723/wandb/run-20250622_192645-zmuhvn72/logs/debug-internal.log
|
| 8 |
+
2025-06-22 19:26:45,451 INFO MainThread:104393 [wandb_init.py:init():852] calling init triggers
|
| 9 |
+
2025-06-22 19:26:45,451 INFO MainThread:104393 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'_wandb': {}}
|
| 11 |
+
2025-06-22 19:26:45,451 INFO MainThread:104393 [wandb_init.py:init():893] starting backend
|
| 12 |
+
2025-06-22 19:26:45,451 INFO MainThread:104393 [wandb_init.py:init():897] sending inform_init request
|
| 13 |
+
2025-06-22 19:26:45,453 INFO MainThread:104393 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 14 |
+
2025-06-22 19:26:45,454 INFO MainThread:104393 [wandb_init.py:init():907] backend started and connected
|
| 15 |
+
2025-06-22 19:26:45,458 INFO MainThread:104393 [wandb_init.py:init():1005] updated telemetry
|
| 16 |
+
2025-06-22 19:26:45,461 INFO MainThread:104393 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
|
| 17 |
+
2025-06-22 19:26:47,645 INFO MainThread:104393 [wandb_init.py:init():1104] starting run threads in backend
|
| 18 |
+
2025-06-22 19:26:47,826 INFO MainThread:104393 [wandb_run.py:_console_start():2573] atexit reg
|
| 19 |
+
2025-06-22 19:26:47,826 INFO MainThread:104393 [wandb_run.py:_redirect():2421] redirect: wrap_raw
|
| 20 |
+
2025-06-22 19:26:47,829 INFO MainThread:104393 [wandb_run.py:_redirect():2490] Wrapping output streams.
|
| 21 |
+
2025-06-22 19:26:47,830 INFO MainThread:104393 [wandb_run.py:_redirect():2513] Redirects installed.
|
| 22 |
+
2025-06-22 19:26:47,831 INFO MainThread:104393 [wandb_init.py:init():1150] run started, returning control to user process
|
| 23 |
+
2025-06-22 19:26:54,211 INFO MainThread:104393 [wandb_run.py:_config_callback():1436] config_cb None None {'filename': 'stage1_06221723', 'seed': 42, 'mode': 'train', 'strategy': 'deepspeed', 'accelerator': 'gpu', 'devices': '0,1,2,3,4,5,6,7', 'precision': 'bf16-mixed', 'max_epochs': 20, 'check_val_every_n_epoch': 1, 'use_wandb_logger': True, 'mix_dataset': True, 'temperature': 0.1, 'save_every_n_epochs': 5, 'ptm': True, 'lm': True, 'rerank_cand_num': 128, 'plm_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m', 'plm_tune': 'freeze', 'load_4bit': False, 'pool_size': 0, 'bert_hidden_dim': 768, 'bert_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft', 'projection_dim': 256, 'cross_attention_freq': 2, 'num_query_token': 8, 'weight_decay': 0.05, 'init_lr': 0.0001, 'min_lr': 1e-05, 'warmup_lr': 1e-06, 'warmup_steps': 1000, 'lr_decay_rate': 0.9, 'scheduler': 'linear_warmup_cosine_lr', 'init_checkpoint': '', 'retrieval_eval_epoch': 10, 'num_workers': 8, 'batch_size': 32, 'match_batch_size': 64, 'root': 'data', 'text_max_len': 128, 'prot_max_len': 1024, 'prot_aug': 'None'}
|
| 24 |
+
2025-06-22 19:27:27,827 INFO MsgRouterThr:104393 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
|
ProtT3/all_checkpoints/stage1_06261435/wandb/debug-internal.log
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-06-26T14:47:31.30118788+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/logs/debug-core.log"}
|
| 2 |
+
{"time":"2025-06-26T14:47:33.763139479+08:00","level":"INFO","msg":"created new stream","id":"1bz1vyyf"}
|
| 3 |
+
{"time":"2025-06-26T14:47:33.763180996+08:00","level":"INFO","msg":"stream: started","id":"1bz1vyyf"}
|
| 4 |
+
{"time":"2025-06-26T14:47:33.76320552+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"1bz1vyyf"}
|
| 5 |
+
{"time":"2025-06-26T14:47:33.763263895+08:00","level":"INFO","msg":"handler: started","stream_id":"1bz1vyyf"}
|
| 6 |
+
{"time":"2025-06-26T14:47:33.763302435+08:00","level":"INFO","msg":"sender: started","stream_id":"1bz1vyyf"}
|
| 7 |
+
{"time":"2025-06-26T14:47:35.049823143+08:00","level":"INFO","msg":"Starting system monitor"}
|
| 8 |
+
{"time":"2025-06-26T14:59:24.18296941+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06261435/1bz1vyyf/file_stream\": EOF"}
|
| 9 |
+
{"time":"2025-06-26T15:47:20.217171547+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 10 |
+
{"time":"2025-06-26T15:47:52.39492821+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
|
| 11 |
+
{"time":"2025-06-26T15:48:27.272812301+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
|
| 12 |
+
{"time":"2025-06-26T15:49:05.914803533+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 13 |
+
{"time":"2025-06-26T15:49:51.452193247+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06261435/1bz1vyyf/file_stream\": net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)"}
|
| 14 |
+
{"time":"2025-06-26T15:49:52.900302871+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
|
| 15 |
+
{"time":"2025-06-26T15:50:55.486728897+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
ProtT3/all_checkpoints/stage1_06261435/wandb/debug.log
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
|
| 2 |
+
2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_setup.py:_flush():70] Configure stats pid to 13641
|
| 3 |
+
2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
|
| 4 |
+
2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
|
| 5 |
+
2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_setup.py:_flush():70] Loading settings from environment variables
|
| 6 |
+
2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/logs/debug.log
|
| 7 |
+
2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/logs/debug-internal.log
|
| 8 |
+
2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:init():852] calling init triggers
|
| 9 |
+
2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'_wandb': {}}
|
| 11 |
+
2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:init():893] starting backend
|
| 12 |
+
2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:init():897] sending inform_init request
|
| 13 |
+
2025-06-26 14:47:31,294 INFO MainThread:13641 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 14 |
+
2025-06-26 14:47:31,296 INFO MainThread:13641 [wandb_init.py:init():907] backend started and connected
|
| 15 |
+
2025-06-26 14:47:31,300 INFO MainThread:13641 [wandb_init.py:init():1005] updated telemetry
|
| 16 |
+
2025-06-26 14:47:31,303 INFO MainThread:13641 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
|
| 17 |
+
2025-06-26 14:47:35,041 INFO MainThread:13641 [wandb_init.py:init():1104] starting run threads in backend
|
| 18 |
+
2025-06-26 14:47:35,187 INFO MainThread:13641 [wandb_run.py:_console_start():2573] atexit reg
|
| 19 |
+
2025-06-26 14:47:35,187 INFO MainThread:13641 [wandb_run.py:_redirect():2421] redirect: wrap_raw
|
| 20 |
+
2025-06-26 14:47:35,192 INFO MainThread:13641 [wandb_run.py:_redirect():2490] Wrapping output streams.
|
| 21 |
+
2025-06-26 14:47:35,192 INFO MainThread:13641 [wandb_run.py:_redirect():2513] Redirects installed.
|
| 22 |
+
2025-06-26 14:47:35,197 INFO MainThread:13641 [wandb_init.py:init():1150] run started, returning control to user process
|
| 23 |
+
2025-06-26 14:47:41,914 INFO MainThread:13641 [wandb_run.py:_config_callback():1436] config_cb None None {'filename': 'stage1_06261435', 'seed': 42, 'mode': 'train', 'strategy': 'deepspeed', 'accelerator': 'gpu', 'devices': '0,1,2,3,4,5,6,7', 'precision': 'bf16-mixed', 'max_epochs': 30, 'check_val_every_n_epoch': 1, 'use_wandb_logger': True, 'mix_dataset': True, 'temperature': 0.1, 'save_every_n_epochs': 5, 'ptm': True, 'lm': True, 'rerank_cand_num': 128, 'plm_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m', 'plm_tune': 'freeze', 'load_4bit': False, 'pool_size': 0, 'bert_hidden_dim': 768, 'bert_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft', 'projection_dim': 256, 'cross_attention_freq': 2, 'num_query_token': 8, 'weight_decay': 0.05, 'init_lr': 0.0001, 'min_lr': 1e-05, 'warmup_lr': 1e-06, 'warmup_steps': 1000, 'lr_decay_rate': 0.9, 'scheduler': 'linear_warmup_cosine_lr', 'init_checkpoint': '', 'retrieval_eval_epoch': 10, 'num_workers': 8, 'batch_size': 160, 'match_batch_size': 64, 'root': 'data', 'text_max_len': 128, 'prot_max_len': 1024, 'prot_aug': 'None'}
|
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_143705-0goy7nyw/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-06-26T14:37:05.188561402+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage1_06261435/wandb/run-20250626_143705-0goy7nyw/logs/debug-core.log"}
|
| 2 |
+
{"time":"2025-06-26T14:37:35.294367385+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)"}
|
| 3 |
+
{"time":"2025-06-26T14:38:07.797500016+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_143705-0goy7nyw/logs/debug.log
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-06-26 14:37:05,174 INFO MainThread:2555 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
|
| 2 |
+
2025-06-26 14:37:05,174 INFO MainThread:2555 [wandb_setup.py:_flush():70] Configure stats pid to 2555
|
| 3 |
+
2025-06-26 14:37:05,174 INFO MainThread:2555 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
|
| 4 |
+
2025-06-26 14:37:05,174 INFO MainThread:2555 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
|
| 5 |
+
2025-06-26 14:37:05,174 INFO MainThread:2555 [wandb_setup.py:_flush():70] Loading settings from environment variables
|
| 6 |
+
2025-06-26 14:37:05,174 INFO MainThread:2555 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage1_06261435/wandb/run-20250626_143705-0goy7nyw/logs/debug.log
|
| 7 |
+
2025-06-26 14:37:05,174 INFO MainThread:2555 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage1_06261435/wandb/run-20250626_143705-0goy7nyw/logs/debug-internal.log
|
| 8 |
+
2025-06-26 14:37:05,174 INFO MainThread:2555 [wandb_init.py:init():852] calling init triggers
|
| 9 |
+
2025-06-26 14:37:05,174 INFO MainThread:2555 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'_wandb': {}}
|
| 11 |
+
2025-06-26 14:37:05,174 INFO MainThread:2555 [wandb_init.py:init():893] starting backend
|
| 12 |
+
2025-06-26 14:37:05,174 INFO MainThread:2555 [wandb_init.py:init():897] sending inform_init request
|
| 13 |
+
2025-06-26 14:37:05,178 INFO MainThread:2555 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 14 |
+
2025-06-26 14:37:05,178 INFO MainThread:2555 [wandb_init.py:init():907] backend started and connected
|
| 15 |
+
2025-06-26 14:37:05,180 INFO MainThread:2555 [wandb_init.py:init():1005] updated telemetry
|
| 16 |
+
2025-06-26 14:37:05,182 INFO MainThread:2555 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
|
| 17 |
+
2025-06-26 14:37:37,418 INFO Thread-3 (wrapped_target):2555 [retry.py:__call__():175] [no run ID] Retry attempt failed:
|
| 18 |
+
Traceback (most recent call last):
|
| 19 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/urllib3/connection.py", line 198, in _new_conn
|
| 20 |
+
sock = connection.create_connection(
|
| 21 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/urllib3/util/connection.py", line 85, in create_connection
|
| 22 |
+
raise err
|
| 23 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/urllib3/util/connection.py", line 73, in create_connection
|
| 24 |
+
sock.connect(sa)
|
| 25 |
+
TimeoutError: timed out
|
| 26 |
+
|
| 27 |
+
The above exception was the direct cause of the following exception:
|
| 28 |
+
|
| 29 |
+
Traceback (most recent call last):
|
| 30 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/urllib3/connectionpool.py", line 787, in urlopen
|
| 31 |
+
response = self._make_request(
|
| 32 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/urllib3/connectionpool.py", line 488, in _make_request
|
| 33 |
+
raise new_e
|
| 34 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/urllib3/connectionpool.py", line 464, in _make_request
|
| 35 |
+
self._validate_conn(conn)
|
| 36 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
|
| 37 |
+
conn.connect()
|
| 38 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/urllib3/connection.py", line 704, in connect
|
| 39 |
+
self.sock = sock = self._new_conn()
|
| 40 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/urllib3/connection.py", line 207, in _new_conn
|
| 41 |
+
raise ConnectTimeoutError(
|
| 42 |
+
urllib3.exceptions.ConnectTimeoutError: (<urllib3.connection.HTTPSConnection object at 0x7fa5a7f7b9a0>, 'Connection to api.wandb.ai timed out. (connect timeout=20)')
|
| 43 |
+
|
| 44 |
+
The above exception was the direct cause of the following exception:
|
| 45 |
+
|
| 46 |
+
Traceback (most recent call last):
|
| 47 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/requests/adapters.py", line 667, in send
|
| 48 |
+
resp = conn.urlopen(
|
| 49 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/urllib3/connectionpool.py", line 841, in urlopen
|
| 50 |
+
retries = retries.increment(
|
| 51 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/urllib3/util/retry.py", line 519, in increment
|
| 52 |
+
raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
|
| 53 |
+
urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='api.wandb.ai', port=443): Max retries exceeded with url: /graphql (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7fa5a7f7b9a0>, 'Connection to api.wandb.ai timed out. (connect timeout=20)'))
|
| 54 |
+
|
| 55 |
+
During handling of the above exception, another exception occurred:
|
| 56 |
+
|
| 57 |
+
Traceback (most recent call last):
|
| 58 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/wandb/sdk/lib/retry.py", line 134, in __call__
|
| 59 |
+
result = self._call_fn(*args, **kwargs)
|
| 60 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/wandb/sdk/internal/internal_api.py", line 398, in execute
|
| 61 |
+
return self.client.execute(*args, **kwargs) # type: ignore
|
| 62 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/wandb/vendor/gql-0.2.0/wandb_gql/client.py", line 52, in execute
|
| 63 |
+
result = self._get_result(document, *args, **kwargs)
|
| 64 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/wandb/vendor/gql-0.2.0/wandb_gql/client.py", line 60, in _get_result
|
| 65 |
+
return self.transport.execute(document, *args, **kwargs)
|
| 66 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/wandb/sdk/lib/gql_request.py", line 58, in execute
|
| 67 |
+
request = self.session.post(self.url, **post_args)
|
| 68 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/requests/sessions.py", line 637, in post
|
| 69 |
+
return self.request("POST", url, data=data, json=json, **kwargs)
|
| 70 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/requests/sessions.py", line 589, in request
|
| 71 |
+
resp = self.send(prep, **send_kwargs)
|
| 72 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/requests/sessions.py", line 703, in send
|
| 73 |
+
r = adapter.send(request, **kwargs)
|
| 74 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/requests/adapters.py", line 688, in send
|
| 75 |
+
raise ConnectTimeout(e, request=request)
|
| 76 |
+
requests.exceptions.ConnectTimeout: HTTPSConnectionPool(host='api.wandb.ai', port=443): Max retries exceeded with url: /graphql (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7fa5a7f7b9a0>, 'Connection to api.wandb.ai timed out. (connect timeout=20)'))
|
| 77 |
+
2025-06-26 14:38:28,356 WARNING MainThread:2555 [wandb_init.py:init():1681] [no run ID] interrupted
|
| 78 |
+
Traceback (most recent call last):
|
| 79 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/wandb/sdk/wandb_init.py", line 1677, in init
|
| 80 |
+
return wi.init(run_settings, run_config, run_printer)
|
| 81 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/wandb/sdk/wandb_init.py", line 1055, in init
|
| 82 |
+
result = wait_with_progress(
|
| 83 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/wandb/sdk/mailbox/wait_with_progress.py", line 24, in wait_with_progress
|
| 84 |
+
return wait_all_with_progress(
|
| 85 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/wandb/sdk/mailbox/wait_with_progress.py", line 87, in wait_all_with_progress
|
| 86 |
+
return asyncio_compat.run(progress_loop_with_timeout)
|
| 87 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/site-packages/wandb/sdk/lib/asyncio_compat.py", line 30, in run
|
| 88 |
+
return future.result()
|
| 89 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/concurrent/futures/_base.py", line 440, in result
|
| 90 |
+
self._condition.wait(timeout)
|
| 91 |
+
File "/root/miniconda3/envs/protT3/lib/python3.10/threading.py", line 320, in wait
|
| 92 |
+
waiter.acquire()
|
| 93 |
+
KeyboardInterrupt
|
| 94 |
+
2025-06-26 14:38:29,216 INFO MsgRouterThr:2555 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 2 handles.
|
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/files/output.log
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06261435 exists and is not empty.
|
| 2 |
+
Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.
|
| 3 |
+
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
|
| 4 |
+
|
| 5 |
+
| Name | Type | Params | Mode
|
| 6 |
+
------------------------------------------------------
|
| 7 |
+
0 | blip2qformer | Blip2Qformer | 327 M | train
|
| 8 |
+
------------------------------------------------------
|
| 9 |
+
179 M Trainable params
|
| 10 |
+
147 M Non-trainable params
|
| 11 |
+
327 M Total params
|
| 12 |
+
1,309.467 Total estimated model params size (MB)
|
| 13 |
+
5 Modules in train mode
|
| 14 |
+
926 Modules in eval mode
|
| 15 |
+
Epoch 0: 3%|██████▉ | 48/1665 [00:58<32:34, 0.83it/s, v_num=hy79]
|
| 16 |
+
/nas/shared/kilab/wangyujia/ProtT3/model/blip2qformer.py:220: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
|
| 17 |
+
with torch.cuda.amp.autocast(enable_autocast, dtype=torch.float32):
|
| 18 |
+
|
| 19 |
+
Detected KeyboardInterrupt, attempting graceful shutdown ...
|
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/files/requirements.txt
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
attrs==25.3.0
|
| 2 |
+
tqdm==4.67.1
|
| 3 |
+
langcodes==3.5.0
|
| 4 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 5 |
+
tifffile==2025.5.10
|
| 6 |
+
nvidia-cufile-cu12==1.11.1.6
|
| 7 |
+
nltk==3.9.1
|
| 8 |
+
salesforce-lavis==1.0.2
|
| 9 |
+
tzdata==2025.2
|
| 10 |
+
pyparsing==3.2.3
|
| 11 |
+
six==1.17.0
|
| 12 |
+
python-dateutil==2.9.0.post0
|
| 13 |
+
pandas==2.2.3
|
| 14 |
+
pytorch-lightning==2.5.1.post0
|
| 15 |
+
blinker==1.9.0
|
| 16 |
+
opencv-python-headless==4.5.5.64
|
| 17 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 18 |
+
pytz==2025.2
|
| 19 |
+
async-timeout==5.0.1
|
| 20 |
+
pillow==11.2.1
|
| 21 |
+
parso==0.8.4
|
| 22 |
+
joblib==1.5.1
|
| 23 |
+
contourpy==1.3.2
|
| 24 |
+
triton==3.2.0
|
| 25 |
+
marisa-trie==1.2.1
|
| 26 |
+
PyYAML==6.0.2
|
| 27 |
+
regex==2024.11.6
|
| 28 |
+
idna==3.10
|
| 29 |
+
nvidia-curand-cu12==10.3.5.147
|
| 30 |
+
rpds-py==0.25.1
|
| 31 |
+
aiosignal==1.3.2
|
| 32 |
+
srsly==2.5.1
|
| 33 |
+
confection==0.1.5
|
| 34 |
+
typing-inspection==0.4.1
|
| 35 |
+
packaging==24.2
|
| 36 |
+
distlib==0.3.9
|
| 37 |
+
networkx==3.4.2
|
| 38 |
+
absl-py==2.2.2
|
| 39 |
+
yarl==1.20.0
|
| 40 |
+
lightning-utilities==0.14.3
|
| 41 |
+
executing==2.2.0
|
| 42 |
+
pycocoevalcap==1.2
|
| 43 |
+
wheel==0.45.1
|
| 44 |
+
nvidia-ml-py==12.575.51
|
| 45 |
+
cycler==0.12.1
|
| 46 |
+
wrapt==1.17.2
|
| 47 |
+
jsonschema-specifications==2025.4.1
|
| 48 |
+
protobuf==6.31.0
|
| 49 |
+
mpmath==1.3.0
|
| 50 |
+
certifi==2025.4.26
|
| 51 |
+
py-cpuinfo==9.0.0
|
| 52 |
+
contexttimer==0.3.3
|
| 53 |
+
watchdog==6.0.0
|
| 54 |
+
pexpect==4.9.0
|
| 55 |
+
webencodings==0.5.1
|
| 56 |
+
hf-xet==1.1.2
|
| 57 |
+
cymem==2.0.11
|
| 58 |
+
requests==2.32.3
|
| 59 |
+
timm==0.4.12
|
| 60 |
+
omegaconf==2.3.0
|
| 61 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 62 |
+
webdataset==0.2.111
|
| 63 |
+
nodeenv==1.9.1
|
| 64 |
+
frozenlist==1.6.0
|
| 65 |
+
annotated-types==0.7.0
|
| 66 |
+
matplotlib-inline==0.1.7
|
| 67 |
+
urllib3==2.4.0
|
| 68 |
+
rich==14.0.0
|
| 69 |
+
GitPython==3.1.44
|
| 70 |
+
lazy_loader==0.4
|
| 71 |
+
msgpack==1.1.0
|
| 72 |
+
prompt_toolkit==3.0.51
|
| 73 |
+
fonttools==4.58.0
|
| 74 |
+
multidict==6.4.4
|
| 75 |
+
blis==1.3.0
|
| 76 |
+
thinc==8.3.6
|
| 77 |
+
nvidia-nvtx-cu12==12.4.127
|
| 78 |
+
torchmetrics==1.7.1
|
| 79 |
+
weasel==0.4.1
|
| 80 |
+
numpy==2.2.6
|
| 81 |
+
cachetools==5.5.2
|
| 82 |
+
Jinja2==3.1.6
|
| 83 |
+
matplotlib==3.10.3
|
| 84 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 85 |
+
Pygments==2.19.1
|
| 86 |
+
tornado==6.5.1
|
| 87 |
+
scipy==1.15.3
|
| 88 |
+
rouge_score==0.1.2
|
| 89 |
+
cloudpathlib==0.21.1
|
| 90 |
+
jedi==0.19.2
|
| 91 |
+
referencing==0.36.2
|
| 92 |
+
decord==0.6.0
|
| 93 |
+
setuptools==78.1.1
|
| 94 |
+
mdurl==0.1.2
|
| 95 |
+
identify==2.6.12
|
| 96 |
+
python-slugify==8.0.4
|
| 97 |
+
portalocker==3.1.1
|
| 98 |
+
catalogue==2.0.10
|
| 99 |
+
platformdirs==4.3.8
|
| 100 |
+
antlr4-python3-runtime==4.9.3
|
| 101 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 102 |
+
kaggle==1.7.4.5
|
| 103 |
+
pydeck==0.9.1
|
| 104 |
+
pydantic==2.11.5
|
| 105 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 106 |
+
pyarrow==20.0.0
|
| 107 |
+
nvidia-nccl-cu12==2.21.5
|
| 108 |
+
markdown-it-py==3.0.0
|
| 109 |
+
gitdb==4.0.12
|
| 110 |
+
altair==5.5.0
|
| 111 |
+
torchvision==0.21.0
|
| 112 |
+
python-magic==0.4.27
|
| 113 |
+
iopath==0.1.10
|
| 114 |
+
smart-open==7.1.0
|
| 115 |
+
torch==2.6.0
|
| 116 |
+
pycocotools==2.0.8
|
| 117 |
+
fairscale==0.4.4
|
| 118 |
+
traitlets==5.14.3
|
| 119 |
+
pure_eval==0.2.3
|
| 120 |
+
sympy==1.13.1
|
| 121 |
+
nvidia-cusparselt-cu12==0.6.2
|
| 122 |
+
imageio==2.37.0
|
| 123 |
+
stack-data==0.6.3
|
| 124 |
+
shellingham==1.5.4
|
| 125 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 126 |
+
einops==0.8.1
|
| 127 |
+
tenacity==9.1.2
|
| 128 |
+
virtualenv==20.31.2
|
| 129 |
+
ptyprocess==0.7.0
|
| 130 |
+
cfgv==3.4.0
|
| 131 |
+
pre_commit==4.2.0
|
| 132 |
+
language_data==1.3.0
|
| 133 |
+
typing_extensions==4.13.2
|
| 134 |
+
propcache==0.3.1
|
| 135 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 136 |
+
safetensors==0.5.3
|
| 137 |
+
text-unidecode==1.3
|
| 138 |
+
wcwidth==0.2.13
|
| 139 |
+
charset-normalizer==3.4.2
|
| 140 |
+
aiohappyeyeballs==2.6.1
|
| 141 |
+
ipython==8.36.0
|
| 142 |
+
streamlit==1.45.1
|
| 143 |
+
asttokens==3.0.0
|
| 144 |
+
psutil==7.0.0
|
| 145 |
+
smmap==5.0.2
|
| 146 |
+
exceptiongroup==1.3.0
|
| 147 |
+
murmurhash==1.0.13
|
| 148 |
+
filelock==3.18.0
|
| 149 |
+
plotly==6.1.1
|
| 150 |
+
hjson==3.1.0
|
| 151 |
+
pydantic_core==2.33.2
|
| 152 |
+
ninja==1.11.1.4
|
| 153 |
+
kiwisolver==1.4.8
|
| 154 |
+
spacy-legacy==3.0.12
|
| 155 |
+
opendatasets==0.1.22
|
| 156 |
+
decorator==5.2.1
|
| 157 |
+
spacy==3.8.7
|
| 158 |
+
wasabi==1.1.3
|
| 159 |
+
sentencepiece==0.2.0
|
| 160 |
+
toml==0.10.2
|
| 161 |
+
scikit-image==0.25.2
|
| 162 |
+
deepspeed==0.16.10+b666844f
|
| 163 |
+
ftfy==6.3.1
|
| 164 |
+
bleach==6.2.0
|
| 165 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 166 |
+
spacy-loggers==1.0.5
|
| 167 |
+
MarkupSafe==3.0.2
|
| 168 |
+
braceexpand==0.1.7
|
| 169 |
+
oss2==2.15.0
|
| 170 |
+
preshed==3.0.10
|
| 171 |
+
transformers==4.52.3
|
| 172 |
+
aiohttp==3.12.2
|
| 173 |
+
web.py==0.62
|
| 174 |
+
threadpoolctl==3.6.0
|
| 175 |
+
jaraco.functools==4.1.0
|
| 176 |
+
wandb==0.19.11
|
| 177 |
+
sentry-sdk==2.29.1
|
| 178 |
+
tokenizers==0.21.1
|
| 179 |
+
fsspec==2025.3.0
|
| 180 |
+
flash-attn==2.7.1.post1
|
| 181 |
+
opendelta==0.3.2
|
| 182 |
+
opencv-python==4.11.0.86
|
| 183 |
+
click==8.2.1
|
| 184 |
+
docker-pycreds==0.4.0
|
| 185 |
+
typer==0.16.0
|
| 186 |
+
xxhash==3.5.0
|
| 187 |
+
pathlib==1.0.1
|
| 188 |
+
dill==0.3.8
|
| 189 |
+
crcmod==1.7
|
| 190 |
+
bigmodelvis==0.0.1
|
| 191 |
+
datasets==3.6.0
|
| 192 |
+
pycryptodome==3.23.0
|
| 193 |
+
jsonschema==4.24.0
|
| 194 |
+
aliyun-python-sdk-core==2.16.0
|
| 195 |
+
jmespath==0.10.0
|
| 196 |
+
more-itertools==10.7.0
|
| 197 |
+
scikit-learn==1.6.1
|
| 198 |
+
huggingface-hub==0.32.1
|
| 199 |
+
cryptography==45.0.3
|
| 200 |
+
pycparser==2.22
|
| 201 |
+
yacs==0.1.8
|
| 202 |
+
aliyun-python-sdk-kms==2.16.5
|
| 203 |
+
cffi==1.17.1
|
| 204 |
+
delta-center-client==0.0.4
|
| 205 |
+
multiprocess==0.70.16
|
| 206 |
+
setproctitle==1.3.6
|
| 207 |
+
narwhals==1.41.0
|
| 208 |
+
pip==25.1.1
|
| 209 |
+
cheroot==10.0.1
|
| 210 |
+
jaraco.context==5.3.0
|
| 211 |
+
more-itertools==10.3.0
|
| 212 |
+
jaraco.functools==4.0.1
|
| 213 |
+
jaraco.text==3.12.1
|
| 214 |
+
platformdirs==4.2.2
|
| 215 |
+
packaging==24.2
|
| 216 |
+
wheel==0.45.1
|
| 217 |
+
zipp==3.19.2
|
| 218 |
+
inflect==7.3.1
|
| 219 |
+
autocommand==2.2.2
|
| 220 |
+
typeguard==4.3.0
|
| 221 |
+
jaraco.collections==5.1.0
|
| 222 |
+
backports.tarfile==1.2.0
|
| 223 |
+
tomli==2.0.1
|
| 224 |
+
importlib_metadata==8.0.0
|
| 225 |
+
typing_extensions==4.12.2
|
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
|
| 3 |
+
"python": "CPython 3.10.0",
|
| 4 |
+
"startedAt": "2025-06-26T06:40:00.459849Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--devices",
|
| 7 |
+
"0,1,2,3,4,5,6,7",
|
| 8 |
+
"--mode",
|
| 9 |
+
"train",
|
| 10 |
+
"--filename",
|
| 11 |
+
"stage1_06261435",
|
| 12 |
+
"--num_query_token",
|
| 13 |
+
"8",
|
| 14 |
+
"--plm_name",
|
| 15 |
+
"/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m",
|
| 16 |
+
"--bert_name",
|
| 17 |
+
"/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft",
|
| 18 |
+
"--save_every_n_epochs",
|
| 19 |
+
"5",
|
| 20 |
+
"--max_epochs",
|
| 21 |
+
"30",
|
| 22 |
+
"--batch_size",
|
| 23 |
+
"64",
|
| 24 |
+
"--precision",
|
| 25 |
+
"bf16-mixed",
|
| 26 |
+
"--mix_dataset",
|
| 27 |
+
"--num_workers",
|
| 28 |
+
"8",
|
| 29 |
+
"--use_wandb_logger"
|
| 30 |
+
],
|
| 31 |
+
"program": "/nas/shared/kilab/wangyujia/ProtT3/stage1.py",
|
| 32 |
+
"codePath": "stage1.py",
|
| 33 |
+
"email": "gia0603yucca@gmail.com",
|
| 34 |
+
"root": "./all_checkpoints/stage1_06261435/",
|
| 35 |
+
"host": "dsw-265304-cd576ddc5-gh74w",
|
| 36 |
+
"executable": "/root/miniconda3/envs/protT3/bin/python",
|
| 37 |
+
"codePathLocal": "stage1.py",
|
| 38 |
+
"cpu_count": 64,
|
| 39 |
+
"cpu_count_logical": 64,
|
| 40 |
+
"gpu": "NVIDIA A800-SXM4-80GB",
|
| 41 |
+
"gpu_count": 8,
|
| 42 |
+
"disk": {
|
| 43 |
+
"/": {
|
| 44 |
+
"total": "1623302262784",
|
| 45 |
+
"used": "1290833920"
|
| 46 |
+
}
|
| 47 |
+
},
|
| 48 |
+
"memory": {
|
| 49 |
+
"total": "549755813888"
|
| 50 |
+
},
|
| 51 |
+
"cpu": {
|
| 52 |
+
"count": 64,
|
| 53 |
+
"countLogical": 64
|
| 54 |
+
},
|
| 55 |
+
"gpu_nvidia": [
|
| 56 |
+
{
|
| 57 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 58 |
+
"memoryTotal": "85198045184",
|
| 59 |
+
"architecture": "Ampere"
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 63 |
+
"memoryTotal": "85198045184",
|
| 64 |
+
"architecture": "Ampere"
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 68 |
+
"memoryTotal": "85198045184",
|
| 69 |
+
"architecture": "Ampere"
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 73 |
+
"memoryTotal": "85198045184",
|
| 74 |
+
"architecture": "Ampere"
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 78 |
+
"memoryTotal": "85198045184",
|
| 79 |
+
"architecture": "Ampere"
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 83 |
+
"memoryTotal": "85198045184",
|
| 84 |
+
"architecture": "Ampere"
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 88 |
+
"memoryTotal": "85198045184",
|
| 89 |
+
"architecture": "Ampere"
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 93 |
+
"memoryTotal": "85198045184",
|
| 94 |
+
"architecture": "Ampere"
|
| 95 |
+
}
|
| 96 |
+
],
|
| 97 |
+
"cudaVersion": "12.1"
|
| 98 |
+
}
|
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_wandb":{"runtime":87}}
|
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-06-26T14:40:00.459807265+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/logs/debug-core.log"}
|
| 2 |
+
{"time":"2025-06-26T14:40:02.083580433+08:00","level":"INFO","msg":"created new stream","id":"coknhy79"}
|
| 3 |
+
{"time":"2025-06-26T14:40:02.083623333+08:00","level":"INFO","msg":"stream: started","id":"coknhy79"}
|
| 4 |
+
{"time":"2025-06-26T14:40:02.083652289+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"coknhy79"}
|
| 5 |
+
{"time":"2025-06-26T14:40:02.083685885+08:00","level":"INFO","msg":"sender: started","stream_id":"coknhy79"}
|
| 6 |
+
{"time":"2025-06-26T14:40:02.083706361+08:00","level":"INFO","msg":"handler: started","stream_id":"coknhy79"}
|
| 7 |
+
{"time":"2025-06-26T14:40:05.653872904+08:00","level":"INFO","msg":"Starting system monitor"}
|
| 8 |
+
{"time":"2025-06-26T14:40:21.95203173+08:00","level":"ERROR","msg":"request failed","error":"Put \"https://storage.googleapis.com/wandb-production.appspot.com/gia0603yucca/stage1_06261435/coknhy79/wandb-metadata.json?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gorilla-files-url-signer-man%40wandb-production.iam.gserviceaccount.com%2F20250626%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250626T064005Z&X-Goog-Expires=86399&X-Goog-Signature=2ebb3b79b72a33f0dea008298dd64586dedd8e6bc4d1254ca4ee427dd9bc9031e9639976842b04c6f1e621bb4977fcba6d4c94c21e92008c662e898226494154965b113c395f94636cbca6cfd2249046ec06117750eebbc838bade9d9fa21ae04a11b6eb67dacc0319fb3bb297deca6c80a588a6a41a7b1f22b20c94a59d1f800926208ec84aa369a0a265f5cb9507e60248cf60d0ca950966ead524dec91e347bdb50ed95b2daa080e4e381b4e3dd1d85267b1297dc3c33d3bc683cac980ad150cf1033532af34d80aef288235efe863a3246dad1f2b656d6dbea37ff48485a03307ab8f7d65f6827ff79a95ebc8be7283ace09ee4bdd580361dd389452900a&X-Goog-SignedHeaders=host&X-User=gia0603yucca\": read tcp 10.1.8.160:47468->142.250.73.155:443: read: connection reset by peer","method":"PUT","url":"https://storage.googleapis.com/wandb-production.appspot.com/gia0603yucca/stage1_06261435/coknhy79/wandb-metadata.json?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gorilla-files-url-signer-man%40wandb-production.iam.gserviceaccount.com%2F20250626%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250626T064005Z&X-Goog-Expires=86399&X-Goog-Signature=2ebb3b79b72a33f0dea008298dd64586dedd8e6bc4d1254ca4ee427dd9bc9031e9639976842b04c6f1e621bb4977fcba6d4c94c21e92008c662e898226494154965b113c395f94636cbca6cfd2249046ec06117750eebbc838bade9d9fa21ae04a11b6eb67dacc0319fb3bb297deca6c80a588a6a41a7b1f22b20c94a59d1f800926208ec84aa369a0a265f5cb9507e60248cf60d0ca950966ead524dec91e347bdb50ed95b2daa080e4e381b4e3dd1d85267b1297dc3c33d3bc683cac980ad150cf1033532af34d80aef288235efe863a3246dad1f2b656d6dbea37ff48485a03307ab8f7d65f6827ff79a95ebc8be7283ace09ee4bdd580361dd389452900a&X-Goog-SignedHeaders=host&X-User=gia0603yucca"}
|
| 9 |
+
{"time":"2025-06-26T14:40:21.952032953+08:00","level":"ERROR","msg":"request failed","error":"Put \"https://storage.googleapis.com/wandb-production.appspot.com/gia0603yucca/stage1_06261435/coknhy79/requirements.txt?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gorilla-files-url-signer-man%40wandb-production.iam.gserviceaccount.com%2F20250626%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250626T064006Z&X-Goog-Expires=86399&X-Goog-Signature=78ae20776b7f86131b9272da30634723f339c53fbe35baa579e4d622bae104bf8b38c0c3cd5e64009eab167fa1132f4e5d02a2124a12f56f53a2fa10277fb6a7c582a81981d7372e6f2b2b33ff175bb4af1fc83ed04c00275ef4e90cec3133057f39515a0b0b4d55f5e5257bb5e28f9d142c819f9f4848bd11229d9c73dee989c0f69232f269e7efca958b28ec14404ae411af486b22b4b581a6f37ad7a8593025d9d20519451abe9ea912dac4130082c228f58687eb52f37b0911bf5fff35f7c2f75597dc0beebc355bb0a83b0f71cb06e8fadec0b9f798cf0da7e329a991260240180d651b91d4129bbd2f503247ad12cbb6e080076b2824d51383d713ed3b&X-Goog-SignedHeaders=host&X-User=gia0603yucca\": read tcp 10.1.8.160:47468->142.250.73.155:443: read: connection reset by peer","method":"PUT","url":"https://storage.googleapis.com/wandb-production.appspot.com/gia0603yucca/stage1_06261435/coknhy79/requirements.txt?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gorilla-files-url-signer-man%40wandb-production.iam.gserviceaccount.com%2F20250626%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250626T064006Z&X-Goog-Expires=86399&X-Goog-Signature=78ae20776b7f86131b9272da30634723f339c53fbe35baa579e4d622bae104bf8b38c0c3cd5e64009eab167fa1132f4e5d02a2124a12f56f53a2fa10277fb6a7c582a81981d7372e6f2b2b33ff175bb4af1fc83ed04c00275ef4e90cec3133057f39515a0b0b4d55f5e5257bb5e28f9d142c819f9f4848bd11229d9c73dee989c0f69232f269e7efca958b28ec14404ae411af486b22b4b581a6f37ad7a8593025d9d20519451abe9ea912dac4130082c228f58687eb52f37b0911bf5fff35f7c2f75597dc0beebc355bb0a83b0f71cb06e8fadec0b9f798cf0da7e329a991260240180d651b91d4129bbd2f503247ad12cbb6e080076b2824d51383d713ed3b&X-Goog-SignedHeaders=host&X-User=gia0603yucca"}
|
| 10 |
+
{"time":"2025-06-26T14:41:27.871959935+08:00","level":"INFO","msg":"stream: closing","id":"coknhy79"}
|
| 11 |
+
{"time":"2025-06-26T14:41:27.872028784+08:00","level":"INFO","msg":"Stopping system monitor"}
|
| 12 |
+
{"time":"2025-06-26T14:41:27.940516252+08:00","level":"INFO","msg":"Stopped system monitor"}
|
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/logs/debug.log
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-06-26 14:40:00,443 INFO MainThread:5872 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
|
| 2 |
+
2025-06-26 14:40:00,443 INFO MainThread:5872 [wandb_setup.py:_flush():70] Configure stats pid to 5872
|
| 3 |
+
2025-06-26 14:40:00,443 INFO MainThread:5872 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
|
| 4 |
+
2025-06-26 14:40:00,443 INFO MainThread:5872 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
|
| 5 |
+
2025-06-26 14:40:00,443 INFO MainThread:5872 [wandb_setup.py:_flush():70] Loading settings from environment variables
|
| 6 |
+
2025-06-26 14:40:00,444 INFO MainThread:5872 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/logs/debug.log
|
| 7 |
+
2025-06-26 14:40:00,444 INFO MainThread:5872 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/logs/debug-internal.log
|
| 8 |
+
2025-06-26 14:40:00,444 INFO MainThread:5872 [wandb_init.py:init():852] calling init triggers
|
| 9 |
+
2025-06-26 14:40:00,444 INFO MainThread:5872 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'_wandb': {}}
|
| 11 |
+
2025-06-26 14:40:00,444 INFO MainThread:5872 [wandb_init.py:init():893] starting backend
|
| 12 |
+
2025-06-26 14:40:00,444 INFO MainThread:5872 [wandb_init.py:init():897] sending inform_init request
|
| 13 |
+
2025-06-26 14:40:00,446 INFO MainThread:5872 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 14 |
+
2025-06-26 14:40:00,452 INFO MainThread:5872 [wandb_init.py:init():907] backend started and connected
|
| 15 |
+
2025-06-26 14:40:00,460 INFO MainThread:5872 [wandb_init.py:init():1005] updated telemetry
|
| 16 |
+
2025-06-26 14:40:00,464 INFO MainThread:5872 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
|
| 17 |
+
2025-06-26 14:40:05,603 INFO MainThread:5872 [wandb_init.py:init():1104] starting run threads in backend
|
| 18 |
+
2025-06-26 14:40:05,851 INFO MainThread:5872 [wandb_run.py:_console_start():2573] atexit reg
|
| 19 |
+
2025-06-26 14:40:05,851 INFO MainThread:5872 [wandb_run.py:_redirect():2421] redirect: wrap_raw
|
| 20 |
+
2025-06-26 14:40:05,855 INFO MainThread:5872 [wandb_run.py:_redirect():2490] Wrapping output streams.
|
| 21 |
+
2025-06-26 14:40:05,855 INFO MainThread:5872 [wandb_run.py:_redirect():2513] Redirects installed.
|
| 22 |
+
2025-06-26 14:40:05,857 INFO MainThread:5872 [wandb_init.py:init():1150] run started, returning control to user process
|
| 23 |
+
2025-06-26 14:40:11,773 INFO MainThread:5872 [wandb_run.py:_config_callback():1436] config_cb None None {'filename': 'stage1_06261435', 'seed': 42, 'mode': 'train', 'strategy': 'deepspeed', 'accelerator': 'gpu', 'devices': '0,1,2,3,4,5,6,7', 'precision': 'bf16-mixed', 'max_epochs': 30, 'check_val_every_n_epoch': 1, 'use_wandb_logger': True, 'mix_dataset': True, 'temperature': 0.1, 'save_every_n_epochs': 5, 'ptm': True, 'lm': True, 'rerank_cand_num': 128, 'plm_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m', 'plm_tune': 'freeze', 'load_4bit': False, 'pool_size': 0, 'bert_hidden_dim': 768, 'bert_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft', 'projection_dim': 256, 'cross_attention_freq': 2, 'num_query_token': 8, 'weight_decay': 0.05, 'init_lr': 0.0001, 'min_lr': 1e-05, 'warmup_lr': 1e-06, 'warmup_steps': 1000, 'lr_decay_rate': 0.9, 'scheduler': 'linear_warmup_cosine_lr', 'init_checkpoint': '', 'retrieval_eval_epoch': 10, 'num_workers': 8, 'batch_size': 64, 'match_batch_size': 64, 'root': 'data', 'text_max_len': 128, 'prot_max_len': 1024, 'prot_aug': 'None'}
|
| 24 |
+
2025-06-26 14:41:27,870 INFO MsgRouterThr:5872 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
|
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144000-coknhy79/run-coknhy79.wandb
ADDED
|
Binary file (32.8 kB). View file
|
|
|
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/files/output.log
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06261435 exists and is not empty.
|
| 2 |
+
Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.
|
| 3 |
+
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
|
| 4 |
+
|
| 5 |
+
| Name | Type | Params | Mode
|
| 6 |
+
------------------------------------------------------
|
| 7 |
+
0 | blip2qformer | Blip2Qformer | 327 M | train
|
| 8 |
+
------------------------------------------------------
|
| 9 |
+
179 M Trainable params
|
| 10 |
+
147 M Non-trainable params
|
| 11 |
+
327 M Total params
|
| 12 |
+
1,309.467 Total estimated model params size (MB)
|
| 13 |
+
5 Modules in train mode
|
| 14 |
+
926 Modules in eval mode
|
| 15 |
+
Epoch 9: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 666/666 [31:47<00:00, 0.35it/s, v_num=vyyf]
|
| 16 |
+
/nas/shared/kilab/wangyujia/ProtT3/model/blip2qformer.py:220: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
|
| 17 |
+
with torch.cuda.amp.autocast(enable_autocast, dtype=torch.float32):
|
| 18 |
+
Validation DataLoader 2: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:18<00:00, 0.44it/s]
|
| 19 |
+
/nas/shared/kilab/wangyujia/ProtT3/model/dist_funs.py:18: FutureWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/main/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.
|
| 20 |
+
sd = self.module.state_dict(destination, prefix, keep_vars)
|
| 21 |
+
/nas/shared/kilab/wangyujia/ProtT3/model/blip2_stage1.py:42: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
|
| 22 |
+
return torch.cuda.amp.autocast(dtype=dtype)
|
| 23 |
+
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 157/157 [08:51<00:00, 3.39s/it]
|
| 24 |
+
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1250/1250 [00:01<00:00, 1063.25it/s]
|
| 25 |
+
re-ranking p2t: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████���███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2500/2500 [10:06<00:00, 4.12it/s]
|
| 26 |
+
94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 1179/1250 [00:01<00:00, 1084.47it/s][rank: 1] Child process with PID 14146 terminated with code -6. Forcefully terminating all other processes to avoid zombies 🧟
|
| 27 |
+
re-ranking p2t: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2500/2500 [10:06<00:00, 4.13it/s]
|
| 28 |
+
re-ranking t2p: 58%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 1451/2500 [10:59<08:00, 2.18it/s]
|
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/files/requirements.txt
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
attrs==25.3.0
|
| 2 |
+
tqdm==4.67.1
|
| 3 |
+
langcodes==3.5.0
|
| 4 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 5 |
+
tifffile==2025.5.10
|
| 6 |
+
nvidia-cufile-cu12==1.11.1.6
|
| 7 |
+
nltk==3.9.1
|
| 8 |
+
salesforce-lavis==1.0.2
|
| 9 |
+
tzdata==2025.2
|
| 10 |
+
pyparsing==3.2.3
|
| 11 |
+
six==1.17.0
|
| 12 |
+
python-dateutil==2.9.0.post0
|
| 13 |
+
pandas==2.2.3
|
| 14 |
+
pytorch-lightning==2.5.1.post0
|
| 15 |
+
blinker==1.9.0
|
| 16 |
+
opencv-python-headless==4.5.5.64
|
| 17 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 18 |
+
pytz==2025.2
|
| 19 |
+
async-timeout==5.0.1
|
| 20 |
+
pillow==11.2.1
|
| 21 |
+
parso==0.8.4
|
| 22 |
+
joblib==1.5.1
|
| 23 |
+
contourpy==1.3.2
|
| 24 |
+
triton==3.2.0
|
| 25 |
+
marisa-trie==1.2.1
|
| 26 |
+
PyYAML==6.0.2
|
| 27 |
+
regex==2024.11.6
|
| 28 |
+
idna==3.10
|
| 29 |
+
nvidia-curand-cu12==10.3.5.147
|
| 30 |
+
rpds-py==0.25.1
|
| 31 |
+
aiosignal==1.3.2
|
| 32 |
+
srsly==2.5.1
|
| 33 |
+
confection==0.1.5
|
| 34 |
+
typing-inspection==0.4.1
|
| 35 |
+
packaging==24.2
|
| 36 |
+
distlib==0.3.9
|
| 37 |
+
networkx==3.4.2
|
| 38 |
+
absl-py==2.2.2
|
| 39 |
+
yarl==1.20.0
|
| 40 |
+
lightning-utilities==0.14.3
|
| 41 |
+
executing==2.2.0
|
| 42 |
+
pycocoevalcap==1.2
|
| 43 |
+
wheel==0.45.1
|
| 44 |
+
nvidia-ml-py==12.575.51
|
| 45 |
+
cycler==0.12.1
|
| 46 |
+
wrapt==1.17.2
|
| 47 |
+
jsonschema-specifications==2025.4.1
|
| 48 |
+
protobuf==6.31.0
|
| 49 |
+
mpmath==1.3.0
|
| 50 |
+
certifi==2025.4.26
|
| 51 |
+
py-cpuinfo==9.0.0
|
| 52 |
+
contexttimer==0.3.3
|
| 53 |
+
watchdog==6.0.0
|
| 54 |
+
pexpect==4.9.0
|
| 55 |
+
webencodings==0.5.1
|
| 56 |
+
hf-xet==1.1.2
|
| 57 |
+
cymem==2.0.11
|
| 58 |
+
requests==2.32.3
|
| 59 |
+
timm==0.4.12
|
| 60 |
+
omegaconf==2.3.0
|
| 61 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 62 |
+
webdataset==0.2.111
|
| 63 |
+
nodeenv==1.9.1
|
| 64 |
+
frozenlist==1.6.0
|
| 65 |
+
annotated-types==0.7.0
|
| 66 |
+
matplotlib-inline==0.1.7
|
| 67 |
+
urllib3==2.4.0
|
| 68 |
+
rich==14.0.0
|
| 69 |
+
GitPython==3.1.44
|
| 70 |
+
lazy_loader==0.4
|
| 71 |
+
msgpack==1.1.0
|
| 72 |
+
prompt_toolkit==3.0.51
|
| 73 |
+
fonttools==4.58.0
|
| 74 |
+
multidict==6.4.4
|
| 75 |
+
blis==1.3.0
|
| 76 |
+
thinc==8.3.6
|
| 77 |
+
nvidia-nvtx-cu12==12.4.127
|
| 78 |
+
torchmetrics==1.7.1
|
| 79 |
+
weasel==0.4.1
|
| 80 |
+
numpy==2.2.6
|
| 81 |
+
cachetools==5.5.2
|
| 82 |
+
Jinja2==3.1.6
|
| 83 |
+
matplotlib==3.10.3
|
| 84 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 85 |
+
Pygments==2.19.1
|
| 86 |
+
tornado==6.5.1
|
| 87 |
+
scipy==1.15.3
|
| 88 |
+
rouge_score==0.1.2
|
| 89 |
+
cloudpathlib==0.21.1
|
| 90 |
+
jedi==0.19.2
|
| 91 |
+
referencing==0.36.2
|
| 92 |
+
decord==0.6.0
|
| 93 |
+
setuptools==78.1.1
|
| 94 |
+
mdurl==0.1.2
|
| 95 |
+
identify==2.6.12
|
| 96 |
+
python-slugify==8.0.4
|
| 97 |
+
portalocker==3.1.1
|
| 98 |
+
catalogue==2.0.10
|
| 99 |
+
platformdirs==4.3.8
|
| 100 |
+
antlr4-python3-runtime==4.9.3
|
| 101 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 102 |
+
kaggle==1.7.4.5
|
| 103 |
+
pydeck==0.9.1
|
| 104 |
+
pydantic==2.11.5
|
| 105 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 106 |
+
pyarrow==20.0.0
|
| 107 |
+
nvidia-nccl-cu12==2.21.5
|
| 108 |
+
markdown-it-py==3.0.0
|
| 109 |
+
gitdb==4.0.12
|
| 110 |
+
altair==5.5.0
|
| 111 |
+
torchvision==0.21.0
|
| 112 |
+
python-magic==0.4.27
|
| 113 |
+
iopath==0.1.10
|
| 114 |
+
smart-open==7.1.0
|
| 115 |
+
torch==2.6.0
|
| 116 |
+
pycocotools==2.0.8
|
| 117 |
+
fairscale==0.4.4
|
| 118 |
+
traitlets==5.14.3
|
| 119 |
+
pure_eval==0.2.3
|
| 120 |
+
sympy==1.13.1
|
| 121 |
+
nvidia-cusparselt-cu12==0.6.2
|
| 122 |
+
imageio==2.37.0
|
| 123 |
+
stack-data==0.6.3
|
| 124 |
+
shellingham==1.5.4
|
| 125 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 126 |
+
einops==0.8.1
|
| 127 |
+
tenacity==9.1.2
|
| 128 |
+
virtualenv==20.31.2
|
| 129 |
+
ptyprocess==0.7.0
|
| 130 |
+
cfgv==3.4.0
|
| 131 |
+
pre_commit==4.2.0
|
| 132 |
+
language_data==1.3.0
|
| 133 |
+
typing_extensions==4.13.2
|
| 134 |
+
propcache==0.3.1
|
| 135 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 136 |
+
safetensors==0.5.3
|
| 137 |
+
text-unidecode==1.3
|
| 138 |
+
wcwidth==0.2.13
|
| 139 |
+
charset-normalizer==3.4.2
|
| 140 |
+
aiohappyeyeballs==2.6.1
|
| 141 |
+
ipython==8.36.0
|
| 142 |
+
streamlit==1.45.1
|
| 143 |
+
asttokens==3.0.0
|
| 144 |
+
psutil==7.0.0
|
| 145 |
+
smmap==5.0.2
|
| 146 |
+
exceptiongroup==1.3.0
|
| 147 |
+
murmurhash==1.0.13
|
| 148 |
+
filelock==3.18.0
|
| 149 |
+
plotly==6.1.1
|
| 150 |
+
hjson==3.1.0
|
| 151 |
+
pydantic_core==2.33.2
|
| 152 |
+
ninja==1.11.1.4
|
| 153 |
+
kiwisolver==1.4.8
|
| 154 |
+
spacy-legacy==3.0.12
|
| 155 |
+
opendatasets==0.1.22
|
| 156 |
+
decorator==5.2.1
|
| 157 |
+
spacy==3.8.7
|
| 158 |
+
wasabi==1.1.3
|
| 159 |
+
sentencepiece==0.2.0
|
| 160 |
+
toml==0.10.2
|
| 161 |
+
scikit-image==0.25.2
|
| 162 |
+
deepspeed==0.16.10+b666844f
|
| 163 |
+
ftfy==6.3.1
|
| 164 |
+
bleach==6.2.0
|
| 165 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 166 |
+
spacy-loggers==1.0.5
|
| 167 |
+
MarkupSafe==3.0.2
|
| 168 |
+
braceexpand==0.1.7
|
| 169 |
+
oss2==2.15.0
|
| 170 |
+
preshed==3.0.10
|
| 171 |
+
transformers==4.52.3
|
| 172 |
+
aiohttp==3.12.2
|
| 173 |
+
web.py==0.62
|
| 174 |
+
threadpoolctl==3.6.0
|
| 175 |
+
jaraco.functools==4.1.0
|
| 176 |
+
wandb==0.19.11
|
| 177 |
+
sentry-sdk==2.29.1
|
| 178 |
+
tokenizers==0.21.1
|
| 179 |
+
fsspec==2025.3.0
|
| 180 |
+
flash-attn==2.7.1.post1
|
| 181 |
+
opendelta==0.3.2
|
| 182 |
+
opencv-python==4.11.0.86
|
| 183 |
+
click==8.2.1
|
| 184 |
+
docker-pycreds==0.4.0
|
| 185 |
+
typer==0.16.0
|
| 186 |
+
xxhash==3.5.0
|
| 187 |
+
pathlib==1.0.1
|
| 188 |
+
dill==0.3.8
|
| 189 |
+
crcmod==1.7
|
| 190 |
+
bigmodelvis==0.0.1
|
| 191 |
+
datasets==3.6.0
|
| 192 |
+
pycryptodome==3.23.0
|
| 193 |
+
jsonschema==4.24.0
|
| 194 |
+
aliyun-python-sdk-core==2.16.0
|
| 195 |
+
jmespath==0.10.0
|
| 196 |
+
more-itertools==10.7.0
|
| 197 |
+
scikit-learn==1.6.1
|
| 198 |
+
huggingface-hub==0.32.1
|
| 199 |
+
cryptography==45.0.3
|
| 200 |
+
pycparser==2.22
|
| 201 |
+
yacs==0.1.8
|
| 202 |
+
aliyun-python-sdk-kms==2.16.5
|
| 203 |
+
cffi==1.17.1
|
| 204 |
+
delta-center-client==0.0.4
|
| 205 |
+
multiprocess==0.70.16
|
| 206 |
+
setproctitle==1.3.6
|
| 207 |
+
narwhals==1.41.0
|
| 208 |
+
pip==25.1.1
|
| 209 |
+
cheroot==10.0.1
|
| 210 |
+
jaraco.context==5.3.0
|
| 211 |
+
more-itertools==10.3.0
|
| 212 |
+
jaraco.functools==4.0.1
|
| 213 |
+
jaraco.text==3.12.1
|
| 214 |
+
platformdirs==4.2.2
|
| 215 |
+
packaging==24.2
|
| 216 |
+
wheel==0.45.1
|
| 217 |
+
zipp==3.19.2
|
| 218 |
+
inflect==7.3.1
|
| 219 |
+
autocommand==2.2.2
|
| 220 |
+
typeguard==4.3.0
|
| 221 |
+
jaraco.collections==5.1.0
|
| 222 |
+
backports.tarfile==1.2.0
|
| 223 |
+
tomli==2.0.1
|
| 224 |
+
importlib_metadata==8.0.0
|
| 225 |
+
typing_extensions==4.12.2
|
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
|
| 3 |
+
"python": "CPython 3.10.0",
|
| 4 |
+
"startedAt": "2025-06-26T06:47:31.299461Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--devices",
|
| 7 |
+
"0,1,2,3,4,5,6,7",
|
| 8 |
+
"--mode",
|
| 9 |
+
"train",
|
| 10 |
+
"--filename",
|
| 11 |
+
"stage1_06261435",
|
| 12 |
+
"--num_query_token",
|
| 13 |
+
"8",
|
| 14 |
+
"--plm_name",
|
| 15 |
+
"/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m",
|
| 16 |
+
"--bert_name",
|
| 17 |
+
"/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft",
|
| 18 |
+
"--save_every_n_epochs",
|
| 19 |
+
"5",
|
| 20 |
+
"--max_epochs",
|
| 21 |
+
"30",
|
| 22 |
+
"--batch_size",
|
| 23 |
+
"160",
|
| 24 |
+
"--precision",
|
| 25 |
+
"bf16-mixed",
|
| 26 |
+
"--mix_dataset",
|
| 27 |
+
"--num_workers",
|
| 28 |
+
"8",
|
| 29 |
+
"--use_wandb_logger"
|
| 30 |
+
],
|
| 31 |
+
"program": "/nas/shared/kilab/wangyujia/ProtT3/stage1.py",
|
| 32 |
+
"codePath": "stage1.py",
|
| 33 |
+
"email": "gia0603yucca@gmail.com",
|
| 34 |
+
"root": "./all_checkpoints/stage1_06261435/",
|
| 35 |
+
"host": "dsw-265304-cd576ddc5-gh74w",
|
| 36 |
+
"executable": "/root/miniconda3/envs/protT3/bin/python",
|
| 37 |
+
"codePathLocal": "stage1.py",
|
| 38 |
+
"cpu_count": 64,
|
| 39 |
+
"cpu_count_logical": 64,
|
| 40 |
+
"gpu": "NVIDIA A800-SXM4-80GB",
|
| 41 |
+
"gpu_count": 8,
|
| 42 |
+
"disk": {
|
| 43 |
+
"/": {
|
| 44 |
+
"total": "1623302262784",
|
| 45 |
+
"used": "1290838016"
|
| 46 |
+
}
|
| 47 |
+
},
|
| 48 |
+
"memory": {
|
| 49 |
+
"total": "549755813888"
|
| 50 |
+
},
|
| 51 |
+
"cpu": {
|
| 52 |
+
"count": 64,
|
| 53 |
+
"countLogical": 64
|
| 54 |
+
},
|
| 55 |
+
"gpu_nvidia": [
|
| 56 |
+
{
|
| 57 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 58 |
+
"memoryTotal": "85198045184",
|
| 59 |
+
"architecture": "Ampere"
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 63 |
+
"memoryTotal": "85198045184",
|
| 64 |
+
"architecture": "Ampere"
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 68 |
+
"memoryTotal": "85198045184",
|
| 69 |
+
"architecture": "Ampere"
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 73 |
+
"memoryTotal": "85198045184",
|
| 74 |
+
"architecture": "Ampere"
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 78 |
+
"memoryTotal": "85198045184",
|
| 79 |
+
"architecture": "Ampere"
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 83 |
+
"memoryTotal": "85198045184",
|
| 84 |
+
"architecture": "Ampere"
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 88 |
+
"memoryTotal": "85198045184",
|
| 89 |
+
"architecture": "Ampere"
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 93 |
+
"memoryTotal": "85198045184",
|
| 94 |
+
"architecture": "Ampere"
|
| 95 |
+
}
|
| 96 |
+
],
|
| 97 |
+
"cudaVersion": "12.1"
|
| 98 |
+
}
|
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-06-26T14:47:31.30118788+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/logs/debug-core.log"}
|
| 2 |
+
{"time":"2025-06-26T14:47:33.763139479+08:00","level":"INFO","msg":"created new stream","id":"1bz1vyyf"}
|
| 3 |
+
{"time":"2025-06-26T14:47:33.763180996+08:00","level":"INFO","msg":"stream: started","id":"1bz1vyyf"}
|
| 4 |
+
{"time":"2025-06-26T14:47:33.76320552+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"1bz1vyyf"}
|
| 5 |
+
{"time":"2025-06-26T14:47:33.763263895+08:00","level":"INFO","msg":"handler: started","stream_id":"1bz1vyyf"}
|
| 6 |
+
{"time":"2025-06-26T14:47:33.763302435+08:00","level":"INFO","msg":"sender: started","stream_id":"1bz1vyyf"}
|
| 7 |
+
{"time":"2025-06-26T14:47:35.049823143+08:00","level":"INFO","msg":"Starting system monitor"}
|
| 8 |
+
{"time":"2025-06-26T14:59:24.18296941+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06261435/1bz1vyyf/file_stream\": EOF"}
|
| 9 |
+
{"time":"2025-06-26T15:47:20.217171547+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 10 |
+
{"time":"2025-06-26T15:47:52.39492821+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
|
| 11 |
+
{"time":"2025-06-26T15:48:27.272812301+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
|
| 12 |
+
{"time":"2025-06-26T15:49:05.914803533+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 13 |
+
{"time":"2025-06-26T15:49:51.452193247+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06261435/1bz1vyyf/file_stream\": net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)"}
|
| 14 |
+
{"time":"2025-06-26T15:49:52.900302871+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
|
| 15 |
+
{"time":"2025-06-26T15:50:55.486728897+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
ProtT3/all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/logs/debug.log
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
|
| 2 |
+
2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_setup.py:_flush():70] Configure stats pid to 13641
|
| 3 |
+
2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
|
| 4 |
+
2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
|
| 5 |
+
2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_setup.py:_flush():70] Loading settings from environment variables
|
| 6 |
+
2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/logs/debug.log
|
| 7 |
+
2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage1_06261435/wandb/run-20250626_144731-1bz1vyyf/logs/debug-internal.log
|
| 8 |
+
2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:init():852] calling init triggers
|
| 9 |
+
2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'_wandb': {}}
|
| 11 |
+
2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:init():893] starting backend
|
| 12 |
+
2025-06-26 14:47:31,292 INFO MainThread:13641 [wandb_init.py:init():897] sending inform_init request
|
| 13 |
+
2025-06-26 14:47:31,294 INFO MainThread:13641 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 14 |
+
2025-06-26 14:47:31,296 INFO MainThread:13641 [wandb_init.py:init():907] backend started and connected
|
| 15 |
+
2025-06-26 14:47:31,300 INFO MainThread:13641 [wandb_init.py:init():1005] updated telemetry
|
| 16 |
+
2025-06-26 14:47:31,303 INFO MainThread:13641 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
|
| 17 |
+
2025-06-26 14:47:35,041 INFO MainThread:13641 [wandb_init.py:init():1104] starting run threads in backend
|
| 18 |
+
2025-06-26 14:47:35,187 INFO MainThread:13641 [wandb_run.py:_console_start():2573] atexit reg
|
| 19 |
+
2025-06-26 14:47:35,187 INFO MainThread:13641 [wandb_run.py:_redirect():2421] redirect: wrap_raw
|
| 20 |
+
2025-06-26 14:47:35,192 INFO MainThread:13641 [wandb_run.py:_redirect():2490] Wrapping output streams.
|
| 21 |
+
2025-06-26 14:47:35,192 INFO MainThread:13641 [wandb_run.py:_redirect():2513] Redirects installed.
|
| 22 |
+
2025-06-26 14:47:35,197 INFO MainThread:13641 [wandb_init.py:init():1150] run started, returning control to user process
|
| 23 |
+
2025-06-26 14:47:41,914 INFO MainThread:13641 [wandb_run.py:_config_callback():1436] config_cb None None {'filename': 'stage1_06261435', 'seed': 42, 'mode': 'train', 'strategy': 'deepspeed', 'accelerator': 'gpu', 'devices': '0,1,2,3,4,5,6,7', 'precision': 'bf16-mixed', 'max_epochs': 30, 'check_val_every_n_epoch': 1, 'use_wandb_logger': True, 'mix_dataset': True, 'temperature': 0.1, 'save_every_n_epochs': 5, 'ptm': True, 'lm': True, 'rerank_cand_num': 128, 'plm_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m', 'plm_tune': 'freeze', 'load_4bit': False, 'pool_size': 0, 'bert_hidden_dim': 768, 'bert_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft', 'projection_dim': 256, 'cross_attention_freq': 2, 'num_query_token': 8, 'weight_decay': 0.05, 'init_lr': 0.0001, 'min_lr': 1e-05, 'warmup_lr': 1e-06, 'warmup_steps': 1000, 'lr_decay_rate': 0.9, 'scheduler': 'linear_warmup_cosine_lr', 'init_checkpoint': '', 'retrieval_eval_epoch': 10, 'num_workers': 8, 'batch_size': 160, 'match_batch_size': 64, 'root': 'data', 'text_max_len': 128, 'prot_max_len': 1024, 'prot_aug': 'None'}
|
ProtT3/all_checkpoints/stage1_06262112/wandb/debug-internal.log
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-06-26T21:13:59.919018005+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/logs/debug-core.log"}
|
| 2 |
+
{"time":"2025-06-26T21:14:01.751242877+08:00","level":"INFO","msg":"created new stream","id":"gp8ndo2v"}
|
| 3 |
+
{"time":"2025-06-26T21:14:01.751292945+08:00","level":"INFO","msg":"stream: started","id":"gp8ndo2v"}
|
| 4 |
+
{"time":"2025-06-26T21:14:01.751353982+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"gp8ndo2v"}
|
| 5 |
+
{"time":"2025-06-26T21:14:01.751376676+08:00","level":"INFO","msg":"handler: started","stream_id":"gp8ndo2v"}
|
| 6 |
+
{"time":"2025-06-26T21:14:01.751406784+08:00","level":"INFO","msg":"sender: started","stream_id":"gp8ndo2v"}
|
| 7 |
+
{"time":"2025-06-26T21:14:03.18201785+08:00","level":"INFO","msg":"Starting system monitor"}
|
| 8 |
+
{"time":"2025-06-26T21:39:10.805194559+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:48308->172.67.193.61:443: read: connection timed out"}
|
| 9 |
+
{"time":"2025-06-26T21:45:08.181153524+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:33626->104.21.20.172:443: read: connection timed out"}
|
| 10 |
+
{"time":"2025-06-26T21:50:43.748258238+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:39240->172.67.193.61:443: read: connection reset by peer"}
|
| 11 |
+
{"time":"2025-06-26T21:56:59.349224169+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:41628->104.21.20.172:443: read: connection timed out"}
|
| 12 |
+
{"time":"2025-06-26T22:01:29.173164681+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:41150->172.67.193.61:443: read: connection timed out"}
|
| 13 |
+
{"time":"2025-06-26T22:04:52.9491833+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:40328->104.21.20.172:443: read: connection timed out"}
|
| 14 |
+
{"time":"2025-06-26T22:05:33.372515641+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 15 |
+
{"time":"2025-06-26T22:10:24.214205918+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:39814->104.21.20.172:443: read: connection timed out"}
|
| 16 |
+
{"time":"2025-06-26T22:11:19.608808233+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": unexpected EOF"}
|
| 17 |
+
{"time":"2025-06-26T22:15:15.541207766+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:57490->104.21.20.172:443: read: connection timed out"}
|
| 18 |
+
{"time":"2025-06-26T22:17:53.749178371+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:34226->172.67.193.61:443: read: connection timed out"}
|
| 19 |
+
{"time":"2025-06-26T22:20:43.734188539+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:58158->104.21.20.172:443: read: connection timed out"}
|
| 20 |
+
{"time":"2025-06-26T22:26:37.244674658+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 21 |
+
{"time":"2025-06-26T22:27:27.97084057+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:42482->172.67.193.61:443: read: connection reset by peer"}
|
| 22 |
+
{"time":"2025-06-26T22:32:12.373221258+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:42718->104.21.20.172:443: read: connection timed out"}
|
| 23 |
+
{"time":"2025-06-26T22:33:45.749714178+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:36352->172.67.193.61:443: read: connection reset by peer"}
|
| 24 |
+
{"time":"2025-06-26T22:34:27.154183486+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": unexpected EOF"}
|
| 25 |
+
{"time":"2025-06-26T22:37:03.388715023+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
|
| 26 |
+
{"time":"2025-06-26T22:38:44.053145624+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:56120->104.21.20.172:443: read: connection timed out"}
|
| 27 |
+
{"time":"2025-06-26T22:39:26.21620593+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:37762->172.67.193.61:443: read: connection reset by peer"}
|
| 28 |
+
{"time":"2025-06-26T22:42:48.392517517+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 29 |
+
{"time":"2025-06-26T22:43:20.509939526+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
|
| 30 |
+
{"time":"2025-06-26T22:43:55.50812991+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
|
| 31 |
+
{"time":"2025-06-26T22:44:05.260626832+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": context deadline exceeded"}
|
| 32 |
+
{"time":"2025-06-26T22:44:33.704733361+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 33 |
+
{"time":"2025-06-26T22:47:28.34118454+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:46752->104.21.20.172:443: read: connection timed out"}
|
| 34 |
+
{"time":"2025-06-26T22:48:22.800067638+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:35900->104.21.20.172:443: read: connection reset by peer"}
|
| 35 |
+
{"time":"2025-06-26T22:49:03.396821287+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 36 |
+
{"time":"2025-06-26T22:49:35.881823651+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
|
| 37 |
+
{"time":"2025-06-26T22:50:10.244289946+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
|
| 38 |
+
{"time":"2025-06-26T22:50:48.344767175+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
|
| 39 |
+
{"time":"2025-06-26T22:51:21.302223032+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:40288->172.67.193.61:443: read: connection timed out"}
|
| 40 |
+
{"time":"2025-06-26T22:54:35.861164416+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:35106->104.21.20.172:443: read: connection timed out"}
|
| 41 |
+
{"time":"2025-06-26T22:56:18.401507947+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 42 |
+
{"time":"2025-06-26T22:56:50.726216671+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
|
| 43 |
+
{"time":"2025-06-26T22:57:25.218970516+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 44 |
+
{"time":"2025-06-26T22:58:04.180971507+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
|
| 45 |
+
{"time":"2025-06-26T22:58:09.302334148+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:50854->104.21.20.172:443: read: connection reset by peer"}
|
| 46 |
+
{"time":"2025-06-26T22:58:51.546103299+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 47 |
+
{"time":"2025-06-26T23:00:01.390576029+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 48 |
+
{"time":"2025-06-26T23:02:34.06917085+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:37644->104.21.20.172:443: read: connection timed out"}
|
| 49 |
+
{"time":"2025-06-26T23:05:03.407063467+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 50 |
+
{"time":"2025-06-26T23:06:15.25317264+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:42244->172.67.193.61:443: read: connection timed out"}
|
| 51 |
+
{"time":"2025-06-26T23:06:33.40769986+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
|
| 52 |
+
{"time":"2025-06-26T23:09:18.410102964+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 53 |
+
{"time":"2025-06-26T23:11:33.411287647+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
|
| 54 |
+
{"time":"2025-06-26T23:12:46.933205687+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:55392->104.21.20.172:443: read: connection timed out"}
|
| 55 |
+
{"time":"2025-06-26T23:16:18.41464022+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 56 |
+
{"time":"2025-06-26T23:16:40.753618008+08:00","level":"ERROR","msg":"filestream: json decode error: net/http: request canceled (Client.Timeout or context cancellation while reading body)"}
|
| 57 |
+
{"time":"2025-06-26T23:16:50.454824576+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
|
| 58 |
+
{"time":"2025-06-26T23:17:25.158378302+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 59 |
+
{"time":"2025-06-26T23:18:00.15504283+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": unexpected EOF"}
|
| 60 |
+
{"time":"2025-06-26T23:18:03.880963877+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 61 |
+
{"time":"2025-06-26T23:18:50.836175421+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 62 |
+
{"time":"2025-06-26T23:20:45.653178557+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:38816->104.21.20.172:443: read: connection timed out"}
|
| 63 |
+
{"time":"2025-06-26T23:21:51.085821178+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:43680->172.67.193.61:443: read: connection reset by peer"}
|
| 64 |
+
{"time":"2025-06-26T23:24:50.901213106+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:40734->104.21.20.172:443: read: connection timed out"}
|
| 65 |
+
{"time":"2025-06-26T23:30:06.293188303+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:46360->104.21.20.172:443: read: connection timed out"}
|
| 66 |
+
{"time":"2025-06-26T23:36:26.709172933+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:33352->172.67.193.61:443: read: connection timed out"}
|
| 67 |
+
{"time":"2025-06-26T23:39:53.889169333+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": unexpected EOF"}
|
| 68 |
+
{"time":"2025-06-26T23:42:48.781078984+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": read tcp 10.1.8.160:35500->104.21.20.172:443: read: connection reset by peer"}
|
| 69 |
+
{"time":"2025-06-26T23:44:39.969525919+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:51204->172.67.193.61:443: read: connection reset by peer"}
|
| 70 |
+
{"time":"2025-06-26T23:52:25.685198314+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:41504->172.67.193.61:443: read: connection timed out"}
|
| 71 |
+
{"time":"2025-06-27T00:14:48.690187795+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": read tcp 10.1.8.160:49576->172.67.193.61:443: read: connection reset by peer"}
|
| 72 |
+
{"time":"2025-06-27T00:15:11.63645902+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": unexpected EOF"}
|
| 73 |
+
{"time":"2025-06-27T00:18:18.739690809+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": read tcp 10.1.8.160:55462->172.67.193.61:443: read: connection reset by peer"}
|
ProtT3/all_checkpoints/stage1_06262112/wandb/debug.log
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
|
| 2 |
+
2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_setup.py:_flush():70] Configure stats pid to 183028
|
| 3 |
+
2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
|
| 4 |
+
2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
|
| 5 |
+
2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_setup.py:_flush():70] Loading settings from environment variables
|
| 6 |
+
2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/logs/debug.log
|
| 7 |
+
2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/logs/debug-internal.log
|
| 8 |
+
2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:init():852] calling init triggers
|
| 9 |
+
2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'_wandb': {}}
|
| 11 |
+
2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:init():893] starting backend
|
| 12 |
+
2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:init():897] sending inform_init request
|
| 13 |
+
2025-06-26 21:13:59,906 INFO MainThread:183028 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 14 |
+
2025-06-26 21:13:59,908 INFO MainThread:183028 [wandb_init.py:init():907] backend started and connected
|
| 15 |
+
2025-06-26 21:13:59,909 INFO MainThread:183028 [wandb_init.py:init():1005] updated telemetry
|
| 16 |
+
2025-06-26 21:13:59,911 INFO MainThread:183028 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
|
| 17 |
+
2025-06-26 21:14:03,171 INFO MainThread:183028 [wandb_init.py:init():1104] starting run threads in backend
|
| 18 |
+
2025-06-26 21:14:03,332 INFO MainThread:183028 [wandb_run.py:_console_start():2573] atexit reg
|
| 19 |
+
2025-06-26 21:14:03,333 INFO MainThread:183028 [wandb_run.py:_redirect():2421] redirect: wrap_raw
|
| 20 |
+
2025-06-26 21:14:03,336 INFO MainThread:183028 [wandb_run.py:_redirect():2490] Wrapping output streams.
|
| 21 |
+
2025-06-26 21:14:03,336 INFO MainThread:183028 [wandb_run.py:_redirect():2513] Redirects installed.
|
| 22 |
+
2025-06-26 21:14:03,350 INFO MainThread:183028 [wandb_init.py:init():1150] run started, returning control to user process
|
| 23 |
+
2025-06-26 21:14:09,516 INFO MainThread:183028 [wandb_run.py:_config_callback():1436] config_cb None None {'filename': 'stage1_06262112', 'seed': 42, 'mode': 'train', 'strategy': 'deepspeed', 'accelerator': 'gpu', 'devices': '0,1,2,3,4,5,6,7', 'precision': 'bf16-mixed', 'max_epochs': 30, 'check_val_every_n_epoch': 1, 'use_wandb_logger': True, 'mix_dataset': True, 'temperature': 0.1, 'save_every_n_epochs': 5, 'ptm': True, 'lm': True, 'rerank_cand_num': 128, 'plm_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m', 'plm_tune': 'freeze', 'load_4bit': False, 'pool_size': 0, 'bert_hidden_dim': 768, 'bert_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft', 'projection_dim': 256, 'cross_attention_freq': 2, 'num_query_token': 8, 'weight_decay': 0.05, 'init_lr': 0.0001, 'min_lr': 1e-05, 'warmup_lr': 1e-06, 'warmup_steps': 1000, 'lr_decay_rate': 0.9, 'scheduler': 'linear_warmup_cosine_lr', 'init_checkpoint': '', 'retrieval_eval_epoch': 10, 'num_workers': 8, 'batch_size': 160, 'match_batch_size': 64, 'root': 'data', 'text_max_len': 128, 'prot_max_len': 1024, 'prot_aug': 'None'}
|
ProtT3/all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/files/output.log
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06262112 exists and is not empty.
|
| 2 |
+
Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.
|
| 3 |
+
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
|
| 4 |
+
|
| 5 |
+
| Name | Type | Params | Mode
|
| 6 |
+
------------------------------------------------------
|
| 7 |
+
0 | blip2qformer | Blip2Qformer | 327 M | train
|
| 8 |
+
------------------------------------------------------
|
| 9 |
+
179 M Trainable params
|
| 10 |
+
147 M Non-trainable params
|
| 11 |
+
327 M Total params
|
| 12 |
+
1,309.467 Total estimated model params size (MB)
|
| 13 |
+
5 Modules in train mode
|
| 14 |
+
926 Modules in eval mode
|
| 15 |
+
Epoch 9: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 336/336 [16:02<00:00, 0.35it/s, v_num=do2v]
|
| 16 |
+
/nas/shared/kilab/wangyujia/ProtT3/model/blip2qformer.py:220: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
|
| 17 |
+
with torch.cuda.amp.autocast(enable_autocast, dtype=torch.float32):
|
| 18 |
+
Validation DataLoader 2: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:18<00:00, 0.44it/s]
|
| 19 |
+
/nas/shared/kilab/wangyujia/ProtT3/model/dist_funs.py:18: FutureWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/main/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.
|
| 20 |
+
sd = self.module.state_dict(destination, prefix, keep_vars)
|
| 21 |
+
/nas/shared/kilab/wangyujia/ProtT3/model/blip2_stage1.py:42: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
|
| 22 |
+
return torch.cuda.amp.autocast(dtype=dtype)
|
| 23 |
+
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 157/157 [08:52<00:00, 3.39s/it]
|
| 24 |
+
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1250/1250 [00:01<00:00, 1024.41it/s]
|
| 25 |
+
re-ranking p2t: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████���███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2500/2500 [10:06<00:00, 4.12it/s]
|
| 26 |
+
94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 1177/1250 [00:01<00:00, 1045.73it/s][rank: 1] Child process with PID 183538 terminated with code -6. Forcefully terminating all other processes to avoid zombies 🧟
|
| 27 |
+
re-ranking p2t: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2500/2500 [10:06<00:00, 4.13it/s]
|
| 28 |
+
re-ranking t2p: 57%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 1424/2500 [11:01<08:20, 2.15it/s]
|
ProtT3/all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/files/requirements.txt
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
attrs==25.3.0
|
| 2 |
+
tqdm==4.67.1
|
| 3 |
+
langcodes==3.5.0
|
| 4 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 5 |
+
tifffile==2025.5.10
|
| 6 |
+
nvidia-cufile-cu12==1.11.1.6
|
| 7 |
+
nltk==3.9.1
|
| 8 |
+
salesforce-lavis==1.0.2
|
| 9 |
+
tzdata==2025.2
|
| 10 |
+
pyparsing==3.2.3
|
| 11 |
+
six==1.17.0
|
| 12 |
+
python-dateutil==2.9.0.post0
|
| 13 |
+
pandas==2.2.3
|
| 14 |
+
pytorch-lightning==2.5.1.post0
|
| 15 |
+
blinker==1.9.0
|
| 16 |
+
opencv-python-headless==4.5.5.64
|
| 17 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 18 |
+
pytz==2025.2
|
| 19 |
+
async-timeout==5.0.1
|
| 20 |
+
pillow==11.2.1
|
| 21 |
+
parso==0.8.4
|
| 22 |
+
joblib==1.5.1
|
| 23 |
+
contourpy==1.3.2
|
| 24 |
+
triton==3.2.0
|
| 25 |
+
marisa-trie==1.2.1
|
| 26 |
+
PyYAML==6.0.2
|
| 27 |
+
regex==2024.11.6
|
| 28 |
+
idna==3.10
|
| 29 |
+
nvidia-curand-cu12==10.3.5.147
|
| 30 |
+
rpds-py==0.25.1
|
| 31 |
+
aiosignal==1.3.2
|
| 32 |
+
srsly==2.5.1
|
| 33 |
+
confection==0.1.5
|
| 34 |
+
typing-inspection==0.4.1
|
| 35 |
+
packaging==24.2
|
| 36 |
+
distlib==0.3.9
|
| 37 |
+
networkx==3.4.2
|
| 38 |
+
absl-py==2.2.2
|
| 39 |
+
yarl==1.20.0
|
| 40 |
+
lightning-utilities==0.14.3
|
| 41 |
+
executing==2.2.0
|
| 42 |
+
pycocoevalcap==1.2
|
| 43 |
+
wheel==0.45.1
|
| 44 |
+
nvidia-ml-py==12.575.51
|
| 45 |
+
cycler==0.12.1
|
| 46 |
+
wrapt==1.17.2
|
| 47 |
+
jsonschema-specifications==2025.4.1
|
| 48 |
+
protobuf==6.31.0
|
| 49 |
+
mpmath==1.3.0
|
| 50 |
+
certifi==2025.4.26
|
| 51 |
+
py-cpuinfo==9.0.0
|
| 52 |
+
contexttimer==0.3.3
|
| 53 |
+
watchdog==6.0.0
|
| 54 |
+
pexpect==4.9.0
|
| 55 |
+
webencodings==0.5.1
|
| 56 |
+
hf-xet==1.1.2
|
| 57 |
+
cymem==2.0.11
|
| 58 |
+
requests==2.32.3
|
| 59 |
+
timm==0.4.12
|
| 60 |
+
omegaconf==2.3.0
|
| 61 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 62 |
+
webdataset==0.2.111
|
| 63 |
+
nodeenv==1.9.1
|
| 64 |
+
frozenlist==1.6.0
|
| 65 |
+
annotated-types==0.7.0
|
| 66 |
+
matplotlib-inline==0.1.7
|
| 67 |
+
urllib3==2.4.0
|
| 68 |
+
rich==14.0.0
|
| 69 |
+
GitPython==3.1.44
|
| 70 |
+
lazy_loader==0.4
|
| 71 |
+
msgpack==1.1.0
|
| 72 |
+
prompt_toolkit==3.0.51
|
| 73 |
+
fonttools==4.58.0
|
| 74 |
+
multidict==6.4.4
|
| 75 |
+
blis==1.3.0
|
| 76 |
+
thinc==8.3.6
|
| 77 |
+
nvidia-nvtx-cu12==12.4.127
|
| 78 |
+
torchmetrics==1.7.1
|
| 79 |
+
weasel==0.4.1
|
| 80 |
+
numpy==2.2.6
|
| 81 |
+
cachetools==5.5.2
|
| 82 |
+
Jinja2==3.1.6
|
| 83 |
+
matplotlib==3.10.3
|
| 84 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 85 |
+
Pygments==2.19.1
|
| 86 |
+
tornado==6.5.1
|
| 87 |
+
scipy==1.15.3
|
| 88 |
+
rouge_score==0.1.2
|
| 89 |
+
cloudpathlib==0.21.1
|
| 90 |
+
jedi==0.19.2
|
| 91 |
+
referencing==0.36.2
|
| 92 |
+
decord==0.6.0
|
| 93 |
+
setuptools==78.1.1
|
| 94 |
+
mdurl==0.1.2
|
| 95 |
+
identify==2.6.12
|
| 96 |
+
python-slugify==8.0.4
|
| 97 |
+
portalocker==3.1.1
|
| 98 |
+
catalogue==2.0.10
|
| 99 |
+
platformdirs==4.3.8
|
| 100 |
+
antlr4-python3-runtime==4.9.3
|
| 101 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 102 |
+
kaggle==1.7.4.5
|
| 103 |
+
pydeck==0.9.1
|
| 104 |
+
pydantic==2.11.5
|
| 105 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 106 |
+
pyarrow==20.0.0
|
| 107 |
+
nvidia-nccl-cu12==2.21.5
|
| 108 |
+
markdown-it-py==3.0.0
|
| 109 |
+
gitdb==4.0.12
|
| 110 |
+
altair==5.5.0
|
| 111 |
+
torchvision==0.21.0
|
| 112 |
+
python-magic==0.4.27
|
| 113 |
+
iopath==0.1.10
|
| 114 |
+
smart-open==7.1.0
|
| 115 |
+
torch==2.6.0
|
| 116 |
+
pycocotools==2.0.8
|
| 117 |
+
fairscale==0.4.4
|
| 118 |
+
traitlets==5.14.3
|
| 119 |
+
pure_eval==0.2.3
|
| 120 |
+
sympy==1.13.1
|
| 121 |
+
nvidia-cusparselt-cu12==0.6.2
|
| 122 |
+
imageio==2.37.0
|
| 123 |
+
stack-data==0.6.3
|
| 124 |
+
shellingham==1.5.4
|
| 125 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 126 |
+
einops==0.8.1
|
| 127 |
+
tenacity==9.1.2
|
| 128 |
+
virtualenv==20.31.2
|
| 129 |
+
ptyprocess==0.7.0
|
| 130 |
+
cfgv==3.4.0
|
| 131 |
+
pre_commit==4.2.0
|
| 132 |
+
language_data==1.3.0
|
| 133 |
+
typing_extensions==4.13.2
|
| 134 |
+
propcache==0.3.1
|
| 135 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 136 |
+
safetensors==0.5.3
|
| 137 |
+
text-unidecode==1.3
|
| 138 |
+
wcwidth==0.2.13
|
| 139 |
+
charset-normalizer==3.4.2
|
| 140 |
+
aiohappyeyeballs==2.6.1
|
| 141 |
+
ipython==8.36.0
|
| 142 |
+
streamlit==1.45.1
|
| 143 |
+
asttokens==3.0.0
|
| 144 |
+
psutil==7.0.0
|
| 145 |
+
smmap==5.0.2
|
| 146 |
+
exceptiongroup==1.3.0
|
| 147 |
+
murmurhash==1.0.13
|
| 148 |
+
filelock==3.18.0
|
| 149 |
+
plotly==6.1.1
|
| 150 |
+
hjson==3.1.0
|
| 151 |
+
pydantic_core==2.33.2
|
| 152 |
+
ninja==1.11.1.4
|
| 153 |
+
kiwisolver==1.4.8
|
| 154 |
+
spacy-legacy==3.0.12
|
| 155 |
+
opendatasets==0.1.22
|
| 156 |
+
decorator==5.2.1
|
| 157 |
+
spacy==3.8.7
|
| 158 |
+
wasabi==1.1.3
|
| 159 |
+
sentencepiece==0.2.0
|
| 160 |
+
toml==0.10.2
|
| 161 |
+
scikit-image==0.25.2
|
| 162 |
+
deepspeed==0.16.10+b666844f
|
| 163 |
+
ftfy==6.3.1
|
| 164 |
+
bleach==6.2.0
|
| 165 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 166 |
+
spacy-loggers==1.0.5
|
| 167 |
+
MarkupSafe==3.0.2
|
| 168 |
+
braceexpand==0.1.7
|
| 169 |
+
oss2==2.15.0
|
| 170 |
+
preshed==3.0.10
|
| 171 |
+
transformers==4.52.3
|
| 172 |
+
aiohttp==3.12.2
|
| 173 |
+
web.py==0.62
|
| 174 |
+
threadpoolctl==3.6.0
|
| 175 |
+
jaraco.functools==4.1.0
|
| 176 |
+
wandb==0.19.11
|
| 177 |
+
sentry-sdk==2.29.1
|
| 178 |
+
tokenizers==0.21.1
|
| 179 |
+
fsspec==2025.3.0
|
| 180 |
+
flash-attn==2.7.1.post1
|
| 181 |
+
opendelta==0.3.2
|
| 182 |
+
opencv-python==4.11.0.86
|
| 183 |
+
click==8.2.1
|
| 184 |
+
docker-pycreds==0.4.0
|
| 185 |
+
typer==0.16.0
|
| 186 |
+
xxhash==3.5.0
|
| 187 |
+
pathlib==1.0.1
|
| 188 |
+
dill==0.3.8
|
| 189 |
+
crcmod==1.7
|
| 190 |
+
bigmodelvis==0.0.1
|
| 191 |
+
datasets==3.6.0
|
| 192 |
+
pycryptodome==3.23.0
|
| 193 |
+
jsonschema==4.24.0
|
| 194 |
+
aliyun-python-sdk-core==2.16.0
|
| 195 |
+
jmespath==0.10.0
|
| 196 |
+
more-itertools==10.7.0
|
| 197 |
+
scikit-learn==1.6.1
|
| 198 |
+
huggingface-hub==0.32.1
|
| 199 |
+
cryptography==45.0.3
|
| 200 |
+
pycparser==2.22
|
| 201 |
+
yacs==0.1.8
|
| 202 |
+
aliyun-python-sdk-kms==2.16.5
|
| 203 |
+
cffi==1.17.1
|
| 204 |
+
delta-center-client==0.0.4
|
| 205 |
+
multiprocess==0.70.16
|
| 206 |
+
setproctitle==1.3.6
|
| 207 |
+
narwhals==1.41.0
|
| 208 |
+
pip==25.1.1
|
| 209 |
+
cheroot==10.0.1
|
| 210 |
+
jaraco.context==5.3.0
|
| 211 |
+
more-itertools==10.3.0
|
| 212 |
+
jaraco.functools==4.0.1
|
| 213 |
+
jaraco.text==3.12.1
|
| 214 |
+
platformdirs==4.2.2
|
| 215 |
+
packaging==24.2
|
| 216 |
+
wheel==0.45.1
|
| 217 |
+
zipp==3.19.2
|
| 218 |
+
inflect==7.3.1
|
| 219 |
+
autocommand==2.2.2
|
| 220 |
+
typeguard==4.3.0
|
| 221 |
+
jaraco.collections==5.1.0
|
| 222 |
+
backports.tarfile==1.2.0
|
| 223 |
+
tomli==2.0.1
|
| 224 |
+
importlib_metadata==8.0.0
|
| 225 |
+
typing_extensions==4.12.2
|
ProtT3/all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
|
| 3 |
+
"python": "CPython 3.10.0",
|
| 4 |
+
"startedAt": "2025-06-26T13:13:59.908975Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--devices",
|
| 7 |
+
"0,1,2,3,4,5,6,7",
|
| 8 |
+
"--mode",
|
| 9 |
+
"train",
|
| 10 |
+
"--filename",
|
| 11 |
+
"stage1_06262112",
|
| 12 |
+
"--num_query_token",
|
| 13 |
+
"8",
|
| 14 |
+
"--plm_name",
|
| 15 |
+
"/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m",
|
| 16 |
+
"--bert_name",
|
| 17 |
+
"/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft",
|
| 18 |
+
"--save_every_n_epochs",
|
| 19 |
+
"5",
|
| 20 |
+
"--max_epochs",
|
| 21 |
+
"30",
|
| 22 |
+
"--batch_size",
|
| 23 |
+
"160",
|
| 24 |
+
"--precision",
|
| 25 |
+
"bf16-mixed",
|
| 26 |
+
"--mix_dataset",
|
| 27 |
+
"--num_workers",
|
| 28 |
+
"8",
|
| 29 |
+
"--use_wandb_logger"
|
| 30 |
+
],
|
| 31 |
+
"program": "/nas/shared/kilab/wangyujia/ProtT3/stage1.py",
|
| 32 |
+
"codePath": "stage1.py",
|
| 33 |
+
"email": "gia0603yucca@gmail.com",
|
| 34 |
+
"root": "./all_checkpoints/stage1_06262112/",
|
| 35 |
+
"host": "dsw-265304-cd576ddc5-gh74w",
|
| 36 |
+
"executable": "/root/miniconda3/envs/protT3/bin/python",
|
| 37 |
+
"codePathLocal": "stage1.py",
|
| 38 |
+
"cpu_count": 64,
|
| 39 |
+
"cpu_count_logical": 64,
|
| 40 |
+
"gpu": "NVIDIA A800-SXM4-80GB",
|
| 41 |
+
"gpu_count": 8,
|
| 42 |
+
"disk": {
|
| 43 |
+
"/": {
|
| 44 |
+
"total": "1623302262784",
|
| 45 |
+
"used": "1290924032"
|
| 46 |
+
}
|
| 47 |
+
},
|
| 48 |
+
"memory": {
|
| 49 |
+
"total": "549755813888"
|
| 50 |
+
},
|
| 51 |
+
"cpu": {
|
| 52 |
+
"count": 64,
|
| 53 |
+
"countLogical": 64
|
| 54 |
+
},
|
| 55 |
+
"gpu_nvidia": [
|
| 56 |
+
{
|
| 57 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 58 |
+
"memoryTotal": "85198045184",
|
| 59 |
+
"architecture": "Ampere"
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 63 |
+
"memoryTotal": "85198045184",
|
| 64 |
+
"architecture": "Ampere"
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 68 |
+
"memoryTotal": "85198045184",
|
| 69 |
+
"architecture": "Ampere"
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 73 |
+
"memoryTotal": "85198045184",
|
| 74 |
+
"architecture": "Ampere"
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 78 |
+
"memoryTotal": "85198045184",
|
| 79 |
+
"architecture": "Ampere"
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 83 |
+
"memoryTotal": "85198045184",
|
| 84 |
+
"architecture": "Ampere"
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 88 |
+
"memoryTotal": "85198045184",
|
| 89 |
+
"architecture": "Ampere"
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 93 |
+
"memoryTotal": "85198045184",
|
| 94 |
+
"architecture": "Ampere"
|
| 95 |
+
}
|
| 96 |
+
],
|
| 97 |
+
"cudaVersion": "12.1"
|
| 98 |
+
}
|
ProtT3/all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-06-26T21:13:59.919018005+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/logs/debug-core.log"}
|
| 2 |
+
{"time":"2025-06-26T21:14:01.751242877+08:00","level":"INFO","msg":"created new stream","id":"gp8ndo2v"}
|
| 3 |
+
{"time":"2025-06-26T21:14:01.751292945+08:00","level":"INFO","msg":"stream: started","id":"gp8ndo2v"}
|
| 4 |
+
{"time":"2025-06-26T21:14:01.751353982+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"gp8ndo2v"}
|
| 5 |
+
{"time":"2025-06-26T21:14:01.751376676+08:00","level":"INFO","msg":"handler: started","stream_id":"gp8ndo2v"}
|
| 6 |
+
{"time":"2025-06-26T21:14:01.751406784+08:00","level":"INFO","msg":"sender: started","stream_id":"gp8ndo2v"}
|
| 7 |
+
{"time":"2025-06-26T21:14:03.18201785+08:00","level":"INFO","msg":"Starting system monitor"}
|
| 8 |
+
{"time":"2025-06-26T21:39:10.805194559+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:48308->172.67.193.61:443: read: connection timed out"}
|
| 9 |
+
{"time":"2025-06-26T21:45:08.181153524+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:33626->104.21.20.172:443: read: connection timed out"}
|
| 10 |
+
{"time":"2025-06-26T21:50:43.748258238+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:39240->172.67.193.61:443: read: connection reset by peer"}
|
| 11 |
+
{"time":"2025-06-26T21:56:59.349224169+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:41628->104.21.20.172:443: read: connection timed out"}
|
| 12 |
+
{"time":"2025-06-26T22:01:29.173164681+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:41150->172.67.193.61:443: read: connection timed out"}
|
| 13 |
+
{"time":"2025-06-26T22:04:52.9491833+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:40328->104.21.20.172:443: read: connection timed out"}
|
| 14 |
+
{"time":"2025-06-26T22:05:33.372515641+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 15 |
+
{"time":"2025-06-26T22:10:24.214205918+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:39814->104.21.20.172:443: read: connection timed out"}
|
| 16 |
+
{"time":"2025-06-26T22:11:19.608808233+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": unexpected EOF"}
|
| 17 |
+
{"time":"2025-06-26T22:15:15.541207766+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:57490->104.21.20.172:443: read: connection timed out"}
|
| 18 |
+
{"time":"2025-06-26T22:17:53.749178371+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:34226->172.67.193.61:443: read: connection timed out"}
|
| 19 |
+
{"time":"2025-06-26T22:20:43.734188539+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:58158->104.21.20.172:443: read: connection timed out"}
|
| 20 |
+
{"time":"2025-06-26T22:26:37.244674658+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 21 |
+
{"time":"2025-06-26T22:27:27.97084057+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:42482->172.67.193.61:443: read: connection reset by peer"}
|
| 22 |
+
{"time":"2025-06-26T22:32:12.373221258+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:42718->104.21.20.172:443: read: connection timed out"}
|
| 23 |
+
{"time":"2025-06-26T22:33:45.749714178+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:36352->172.67.193.61:443: read: connection reset by peer"}
|
| 24 |
+
{"time":"2025-06-26T22:34:27.154183486+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": unexpected EOF"}
|
| 25 |
+
{"time":"2025-06-26T22:37:03.388715023+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
|
| 26 |
+
{"time":"2025-06-26T22:38:44.053145624+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:56120->104.21.20.172:443: read: connection timed out"}
|
| 27 |
+
{"time":"2025-06-26T22:39:26.21620593+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:37762->172.67.193.61:443: read: connection reset by peer"}
|
| 28 |
+
{"time":"2025-06-26T22:42:48.392517517+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 29 |
+
{"time":"2025-06-26T22:43:20.509939526+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
|
| 30 |
+
{"time":"2025-06-26T22:43:55.50812991+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
|
| 31 |
+
{"time":"2025-06-26T22:44:05.260626832+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": context deadline exceeded"}
|
| 32 |
+
{"time":"2025-06-26T22:44:33.704733361+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 33 |
+
{"time":"2025-06-26T22:47:28.34118454+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:46752->104.21.20.172:443: read: connection timed out"}
|
| 34 |
+
{"time":"2025-06-26T22:48:22.800067638+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:35900->104.21.20.172:443: read: connection reset by peer"}
|
| 35 |
+
{"time":"2025-06-26T22:49:03.396821287+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 36 |
+
{"time":"2025-06-26T22:49:35.881823651+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
|
| 37 |
+
{"time":"2025-06-26T22:50:10.244289946+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
|
| 38 |
+
{"time":"2025-06-26T22:50:48.344767175+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
|
| 39 |
+
{"time":"2025-06-26T22:51:21.302223032+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:40288->172.67.193.61:443: read: connection timed out"}
|
| 40 |
+
{"time":"2025-06-26T22:54:35.861164416+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:35106->104.21.20.172:443: read: connection timed out"}
|
| 41 |
+
{"time":"2025-06-26T22:56:18.401507947+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 42 |
+
{"time":"2025-06-26T22:56:50.726216671+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
|
| 43 |
+
{"time":"2025-06-26T22:57:25.218970516+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 44 |
+
{"time":"2025-06-26T22:58:04.180971507+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
|
| 45 |
+
{"time":"2025-06-26T22:58:09.302334148+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:50854->104.21.20.172:443: read: connection reset by peer"}
|
| 46 |
+
{"time":"2025-06-26T22:58:51.546103299+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 47 |
+
{"time":"2025-06-26T23:00:01.390576029+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 48 |
+
{"time":"2025-06-26T23:02:34.06917085+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:37644->104.21.20.172:443: read: connection timed out"}
|
| 49 |
+
{"time":"2025-06-26T23:05:03.407063467+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 50 |
+
{"time":"2025-06-26T23:06:15.25317264+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:42244->172.67.193.61:443: read: connection timed out"}
|
| 51 |
+
{"time":"2025-06-26T23:06:33.40769986+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
|
| 52 |
+
{"time":"2025-06-26T23:09:18.410102964+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 53 |
+
{"time":"2025-06-26T23:11:33.411287647+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
|
| 54 |
+
{"time":"2025-06-26T23:12:46.933205687+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:55392->104.21.20.172:443: read: connection timed out"}
|
| 55 |
+
{"time":"2025-06-26T23:16:18.41464022+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 56 |
+
{"time":"2025-06-26T23:16:40.753618008+08:00","level":"ERROR","msg":"filestream: json decode error: net/http: request canceled (Client.Timeout or context cancellation while reading body)"}
|
| 57 |
+
{"time":"2025-06-26T23:16:50.454824576+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": context deadline exceeded"}
|
| 58 |
+
{"time":"2025-06-26T23:17:25.158378302+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 59 |
+
{"time":"2025-06-26T23:18:00.15504283+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": unexpected EOF"}
|
| 60 |
+
{"time":"2025-06-26T23:18:03.880963877+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 61 |
+
{"time":"2025-06-26T23:18:50.836175421+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
| 62 |
+
{"time":"2025-06-26T23:20:45.653178557+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:38816->104.21.20.172:443: read: connection timed out"}
|
| 63 |
+
{"time":"2025-06-26T23:21:51.085821178+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:43680->172.67.193.61:443: read: connection reset by peer"}
|
| 64 |
+
{"time":"2025-06-26T23:24:50.901213106+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:40734->104.21.20.172:443: read: connection timed out"}
|
| 65 |
+
{"time":"2025-06-26T23:30:06.293188303+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:46360->104.21.20.172:443: read: connection timed out"}
|
| 66 |
+
{"time":"2025-06-26T23:36:26.709172933+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:33352->172.67.193.61:443: read: connection timed out"}
|
| 67 |
+
{"time":"2025-06-26T23:39:53.889169333+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": unexpected EOF"}
|
| 68 |
+
{"time":"2025-06-26T23:42:48.781078984+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": read tcp 10.1.8.160:35500->104.21.20.172:443: read: connection reset by peer"}
|
| 69 |
+
{"time":"2025-06-26T23:44:39.969525919+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:51204->172.67.193.61:443: read: connection reset by peer"}
|
| 70 |
+
{"time":"2025-06-26T23:52:25.685198314+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage1_06262112/gp8ndo2v/file_stream\": read tcp 10.1.8.160:41504->172.67.193.61:443: read: connection timed out"}
|
| 71 |
+
{"time":"2025-06-27T00:14:48.690187795+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": read tcp 10.1.8.160:49576->172.67.193.61:443: read: connection reset by peer"}
|
| 72 |
+
{"time":"2025-06-27T00:15:11.63645902+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": unexpected EOF"}
|
| 73 |
+
{"time":"2025-06-27T00:18:18.739690809+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": read tcp 10.1.8.160:55462->172.67.193.61:443: read: connection reset by peer"}
|
ProtT3/all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/logs/debug.log
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
|
| 2 |
+
2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_setup.py:_flush():70] Configure stats pid to 183028
|
| 3 |
+
2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
|
| 4 |
+
2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
|
| 5 |
+
2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_setup.py:_flush():70] Loading settings from environment variables
|
| 6 |
+
2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/logs/debug.log
|
| 7 |
+
2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage1_06262112/wandb/run-20250626_211359-gp8ndo2v/logs/debug-internal.log
|
| 8 |
+
2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:init():852] calling init triggers
|
| 9 |
+
2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'_wandb': {}}
|
| 11 |
+
2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:init():893] starting backend
|
| 12 |
+
2025-06-26 21:13:59,905 INFO MainThread:183028 [wandb_init.py:init():897] sending inform_init request
|
| 13 |
+
2025-06-26 21:13:59,906 INFO MainThread:183028 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 14 |
+
2025-06-26 21:13:59,908 INFO MainThread:183028 [wandb_init.py:init():907] backend started and connected
|
| 15 |
+
2025-06-26 21:13:59,909 INFO MainThread:183028 [wandb_init.py:init():1005] updated telemetry
|
| 16 |
+
2025-06-26 21:13:59,911 INFO MainThread:183028 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
|
| 17 |
+
2025-06-26 21:14:03,171 INFO MainThread:183028 [wandb_init.py:init():1104] starting run threads in backend
|
| 18 |
+
2025-06-26 21:14:03,332 INFO MainThread:183028 [wandb_run.py:_console_start():2573] atexit reg
|
| 19 |
+
2025-06-26 21:14:03,333 INFO MainThread:183028 [wandb_run.py:_redirect():2421] redirect: wrap_raw
|
| 20 |
+
2025-06-26 21:14:03,336 INFO MainThread:183028 [wandb_run.py:_redirect():2490] Wrapping output streams.
|
| 21 |
+
2025-06-26 21:14:03,336 INFO MainThread:183028 [wandb_run.py:_redirect():2513] Redirects installed.
|
| 22 |
+
2025-06-26 21:14:03,350 INFO MainThread:183028 [wandb_init.py:init():1150] run started, returning control to user process
|
| 23 |
+
2025-06-26 21:14:09,516 INFO MainThread:183028 [wandb_run.py:_config_callback():1436] config_cb None None {'filename': 'stage1_06262112', 'seed': 42, 'mode': 'train', 'strategy': 'deepspeed', 'accelerator': 'gpu', 'devices': '0,1,2,3,4,5,6,7', 'precision': 'bf16-mixed', 'max_epochs': 30, 'check_val_every_n_epoch': 1, 'use_wandb_logger': True, 'mix_dataset': True, 'temperature': 0.1, 'save_every_n_epochs': 5, 'ptm': True, 'lm': True, 'rerank_cand_num': 128, 'plm_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m', 'plm_tune': 'freeze', 'load_4bit': False, 'pool_size': 0, 'bert_hidden_dim': 768, 'bert_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft', 'projection_dim': 256, 'cross_attention_freq': 2, 'num_query_token': 8, 'weight_decay': 0.05, 'init_lr': 0.0001, 'min_lr': 1e-05, 'warmup_lr': 1e-06, 'warmup_steps': 1000, 'lr_decay_rate': 0.9, 'scheduler': 'linear_warmup_cosine_lr', 'init_checkpoint': '', 'retrieval_eval_epoch': 10, 'num_workers': 8, 'batch_size': 160, 'match_batch_size': 64, 'root': 'data', 'text_max_len': 128, 'prot_max_len': 1024, 'prot_aug': 'None'}
|
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/debug-internal.log
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-07-07T20:02:29.75666986+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_200229-yex1pcwt/logs/debug-core.log"}
|
| 2 |
+
{"time":"2025-07-07T20:02:30.922635932+08:00","level":"INFO","msg":"created new stream","id":"yex1pcwt"}
|
| 3 |
+
{"time":"2025-07-07T20:02:30.922678667+08:00","level":"INFO","msg":"stream: started","id":"yex1pcwt"}
|
| 4 |
+
{"time":"2025-07-07T20:02:30.922713833+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"yex1pcwt"}
|
| 5 |
+
{"time":"2025-07-07T20:02:30.922757284+08:00","level":"INFO","msg":"sender: started","stream_id":"yex1pcwt"}
|
| 6 |
+
{"time":"2025-07-07T20:02:30.92278615+08:00","level":"INFO","msg":"handler: started","stream_id":"yex1pcwt"}
|
| 7 |
+
{"time":"2025-07-07T20:02:32.296458789+08:00","level":"INFO","msg":"Starting system monitor"}
|
| 8 |
+
{"time":"2025-07-08T01:01:50.39071972+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/graphql\": read tcp 10.1.4.159:39416->104.21.20.172:443: read: connection reset by peer"}
|
| 9 |
+
{"time":"2025-07-08T01:07:45.887474022+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.bandw.top/files/gia0603yucca/stage2.5_mol_instruction/yex1pcwt/file_stream\": read tcp 10.1.4.159:48838->172.67.193.61:443: read: connection timed out"}
|
| 10 |
+
{"time":"2025-07-08T05:20:12.207426797+08:00","level":"INFO","msg":"stream: closing","id":"yex1pcwt"}
|
| 11 |
+
{"time":"2025-07-08T05:20:12.207468139+08:00","level":"INFO","msg":"Stopping system monitor"}
|
| 12 |
+
{"time":"2025-07-08T05:20:12.208684636+08:00","level":"INFO","msg":"Stopped system monitor"}
|
| 13 |
+
{"time":"2025-07-08T05:20:13.938647534+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 14 |
+
{"time":"2025-07-08T05:20:14.977621841+08:00","level":"INFO","msg":"handler: closed","stream_id":"yex1pcwt"}
|
| 15 |
+
{"time":"2025-07-08T05:20:14.977653692+08:00","level":"INFO","msg":"sender: closed","stream_id":"yex1pcwt"}
|
| 16 |
+
{"time":"2025-07-08T05:20:14.977651902+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"yex1pcwt"}
|
| 17 |
+
{"time":"2025-07-08T05:20:14.982274952+08:00","level":"INFO","msg":"stream: closed","id":"yex1pcwt"}
|
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/debug.log
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
|
| 2 |
+
2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_setup.py:_flush():70] Configure stats pid to 129761
|
| 3 |
+
2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
|
| 4 |
+
2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
|
| 5 |
+
2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_setup.py:_flush():70] Loading settings from environment variables
|
| 6 |
+
2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_200229-yex1pcwt/logs/debug.log
|
| 7 |
+
2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_200229-yex1pcwt/logs/debug-internal.log
|
| 8 |
+
2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_init.py:init():852] calling init triggers
|
| 9 |
+
2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'_wandb': {}}
|
| 11 |
+
2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_init.py:init():893] starting backend
|
| 12 |
+
2025-07-07 20:02:29,743 INFO MainThread:129761 [wandb_init.py:init():897] sending inform_init request
|
| 13 |
+
2025-07-07 20:02:29,745 INFO MainThread:129761 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 14 |
+
2025-07-07 20:02:29,748 INFO MainThread:129761 [wandb_init.py:init():907] backend started and connected
|
| 15 |
+
2025-07-07 20:02:29,754 INFO MainThread:129761 [wandb_init.py:init():1005] updated telemetry
|
| 16 |
+
2025-07-07 20:02:29,762 INFO MainThread:129761 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
|
| 17 |
+
2025-07-07 20:02:32,258 INFO MainThread:129761 [wandb_init.py:init():1104] starting run threads in backend
|
| 18 |
+
2025-07-07 20:02:32,427 INFO MainThread:129761 [wandb_run.py:_console_start():2573] atexit reg
|
| 19 |
+
2025-07-07 20:02:32,427 INFO MainThread:129761 [wandb_run.py:_redirect():2421] redirect: wrap_raw
|
| 20 |
+
2025-07-07 20:02:32,457 INFO MainThread:129761 [wandb_run.py:_redirect():2490] Wrapping output streams.
|
| 21 |
+
2025-07-07 20:02:32,462 INFO MainThread:129761 [wandb_run.py:_redirect():2513] Redirects installed.
|
| 22 |
+
2025-07-07 20:02:32,463 INFO MainThread:129761 [wandb_init.py:init():1150] run started, returning control to user process
|
| 23 |
+
2025-07-07 20:02:40,689 INFO MainThread:129761 [wandb_run.py:_config_callback():1436] config_cb None None {'filename': 'stage2.5_mol_instruction', 'seed': 42, 'mode': 'train', 'strategy': 'deepspeed', 'accelerator': 'gpu', 'devices': '0,1,2,3,4,5,6,7', 'precision': 'bf16-mixed', 'max_epochs': 10, 'accumulate_grad_batches': 1, 'check_val_every_n_epoch': 1, 'enable_flash': False, 'use_wandb_logger': True, 'mix_dataset': False, 'save_every_n_epochs': 1, 'bert_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft', 'cross_attention_freq': 2, 'num_query_token': 8, 'llm_name': '/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300', 'num_beams': 5, 'do_sample': False, 'max_inference_len': 128, 'min_inference_len': 1, 'llm_tune': 'mid_lora', 'peft_config': '', 'peft_dir': '', 'plm_model': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m', 'plm_tune': 'freeze', 'lora_r': 8, 'lora_alpha': 16, 'lora_dropout': 0.1, 'enbale_gradient_checkpointing': False, 'weight_decay': 0.05, 'init_lr': 0.0001, 'min_lr': 1e-05, 'warmup_lr': 1e-06, 'warmup_steps': 1000, 'lr_decay_rate': 0.9, 'scheduler': 'linear_warmup_cosine_lr', 'stage1_path': '/nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt', 'stage2_path': '', 'init_checkpoint': '', 'caption_eval_epoch': 10, 'num_workers': 8, 'batch_size': 2, 'inference_batch_size': 4, 'root': 'data', 'text_max_len': 1024, 'q_max_len': 29, 'a_max_len': 36, 'prot_max_len': 1024, 'prompt': 'The protein has the following properties: ', 'filter_side_qa': False}
|
| 24 |
+
2025-07-08 05:20:12,205 INFO MsgRouterThr:129761 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
|
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/files/output.log
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2.5_mol_instruction exists and is not empty.
|
| 2 |
+
Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.
|
| 3 |
+
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
|
| 4 |
+
|
| 5 |
+
| Name | Type | Params | Mode
|
| 6 |
+
-------------------------------------------
|
| 7 |
+
0 | blip2 | Blip2OPT | 7.9 B | train
|
| 8 |
+
-------------------------------------------
|
| 9 |
+
104 M Trainable params
|
| 10 |
+
7.8 B Non-trainable params
|
| 11 |
+
7.9 B Total params
|
| 12 |
+
31,459.025Total estimated model params size (MB)
|
| 13 |
+
174 Modules in train mode
|
| 14 |
+
1203 Modules in eval mode
|
| 15 |
+
Epoch 0: 0%| | 0/410 [00:00<?, ?it/s]
|
| 16 |
+
[rank: 1] Child process with PID 111788 terminated with code 1. Forcefully terminating all other processes to avoid zombies 🧟
|
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/files/requirements.txt
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pydantic_core==2.33.2
|
| 2 |
+
psutil==7.0.0
|
| 3 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 4 |
+
mpmath==1.3.0
|
| 5 |
+
tzdata==2025.2
|
| 6 |
+
contexttimer==0.3.3
|
| 7 |
+
cycler==0.12.1
|
| 8 |
+
python-magic==0.4.27
|
| 9 |
+
pexpect==4.9.0
|
| 10 |
+
sympy==1.13.1
|
| 11 |
+
wrapt==1.17.2
|
| 12 |
+
marisa-trie==1.2.1
|
| 13 |
+
langcodes==3.5.0
|
| 14 |
+
nvidia-nvtx-cu12==12.4.127
|
| 15 |
+
ipython==8.36.0
|
| 16 |
+
opencv-python-headless==4.5.5.64
|
| 17 |
+
MarkupSafe==3.0.2
|
| 18 |
+
jsonschema-specifications==2025.4.1
|
| 19 |
+
wasabi==1.1.3
|
| 20 |
+
blinker==1.9.0
|
| 21 |
+
cfgv==3.4.0
|
| 22 |
+
numpy==2.2.6
|
| 23 |
+
idna==3.10
|
| 24 |
+
nvidia-cufile-cu12==1.11.1.6
|
| 25 |
+
ninja==1.11.1.4
|
| 26 |
+
nvidia-nccl-cu12==2.21.5
|
| 27 |
+
networkx==3.4.2
|
| 28 |
+
certifi==2025.4.26
|
| 29 |
+
deepspeed==0.16.10+b666844f
|
| 30 |
+
pure_eval==0.2.3
|
| 31 |
+
packaging==24.2
|
| 32 |
+
nltk==3.9.1
|
| 33 |
+
contourpy==1.3.2
|
| 34 |
+
pre_commit==4.2.0
|
| 35 |
+
nodeenv==1.9.1
|
| 36 |
+
setuptools==78.1.1
|
| 37 |
+
annotated-types==0.7.0
|
| 38 |
+
multidict==6.4.4
|
| 39 |
+
requests==2.32.3
|
| 40 |
+
tornado==6.5.1
|
| 41 |
+
triton==3.2.0
|
| 42 |
+
pillow==11.2.1
|
| 43 |
+
decord==0.6.0
|
| 44 |
+
shellingham==1.5.4
|
| 45 |
+
streamlit==1.45.1
|
| 46 |
+
pydeck==0.9.1
|
| 47 |
+
confection==0.1.5
|
| 48 |
+
exceptiongroup==1.3.0
|
| 49 |
+
prompt_toolkit==3.0.51
|
| 50 |
+
text-unidecode==1.3
|
| 51 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 52 |
+
antlr4-python3-runtime==4.9.3
|
| 53 |
+
fairscale==0.4.4
|
| 54 |
+
rouge_score==0.1.2
|
| 55 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 56 |
+
tqdm==4.67.1
|
| 57 |
+
rich==14.0.0
|
| 58 |
+
frozenlist==1.6.0
|
| 59 |
+
webencodings==0.5.1
|
| 60 |
+
altair==5.5.0
|
| 61 |
+
opendatasets==0.1.22
|
| 62 |
+
nvidia-curand-cu12==10.3.5.147
|
| 63 |
+
protobuf==6.31.0
|
| 64 |
+
asttokens==3.0.0
|
| 65 |
+
wheel==0.45.1
|
| 66 |
+
hf-xet==1.1.2
|
| 67 |
+
weasel==0.4.1
|
| 68 |
+
aiosignal==1.3.2
|
| 69 |
+
absl-py==2.2.2
|
| 70 |
+
thinc==8.3.6
|
| 71 |
+
torchvision==0.21.0
|
| 72 |
+
pandas==2.2.3
|
| 73 |
+
fonttools==4.58.0
|
| 74 |
+
bleach==6.2.0
|
| 75 |
+
typing-inspection==0.4.1
|
| 76 |
+
ftfy==6.3.1
|
| 77 |
+
typing_extensions==4.13.2
|
| 78 |
+
nvidia-ml-py==12.575.51
|
| 79 |
+
python-slugify==8.0.4
|
| 80 |
+
lightning-utilities==0.14.3
|
| 81 |
+
py-cpuinfo==9.0.0
|
| 82 |
+
smmap==5.0.2
|
| 83 |
+
regex==2024.11.6
|
| 84 |
+
scikit-image==0.25.2
|
| 85 |
+
iopath==0.1.10
|
| 86 |
+
spacy-legacy==3.0.12
|
| 87 |
+
hjson==3.1.0
|
| 88 |
+
executing==2.2.0
|
| 89 |
+
kiwisolver==1.4.8
|
| 90 |
+
scipy==1.15.3
|
| 91 |
+
aiohappyeyeballs==2.6.1
|
| 92 |
+
toml==0.10.2
|
| 93 |
+
jedi==0.19.2
|
| 94 |
+
GitPython==3.1.44
|
| 95 |
+
ptyprocess==0.7.0
|
| 96 |
+
kaggle==1.7.4.5
|
| 97 |
+
braceexpand==0.1.7
|
| 98 |
+
wcwidth==0.2.13
|
| 99 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 100 |
+
pytorch-lightning==2.5.1.post0
|
| 101 |
+
Jinja2==3.1.6
|
| 102 |
+
urllib3==2.4.0
|
| 103 |
+
watchdog==6.0.0
|
| 104 |
+
filelock==3.18.0
|
| 105 |
+
propcache==0.3.1
|
| 106 |
+
torch==2.6.0
|
| 107 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 108 |
+
cymem==2.0.11
|
| 109 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 110 |
+
murmurhash==1.0.13
|
| 111 |
+
catalogue==2.0.10
|
| 112 |
+
yarl==1.20.0
|
| 113 |
+
charset-normalizer==3.4.2
|
| 114 |
+
gitdb==4.0.12
|
| 115 |
+
matplotlib==3.10.3
|
| 116 |
+
portalocker==3.1.1
|
| 117 |
+
platformdirs==4.3.8
|
| 118 |
+
async-timeout==5.0.1
|
| 119 |
+
parso==0.8.4
|
| 120 |
+
markdown-it-py==3.0.0
|
| 121 |
+
omegaconf==2.3.0
|
| 122 |
+
cloudpathlib==0.21.1
|
| 123 |
+
nvidia-cusparselt-cu12==0.6.2
|
| 124 |
+
spacy-loggers==1.0.5
|
| 125 |
+
srsly==2.5.1
|
| 126 |
+
identify==2.6.12
|
| 127 |
+
rpds-py==0.25.1
|
| 128 |
+
spacy==3.8.7
|
| 129 |
+
matplotlib-inline==0.1.7
|
| 130 |
+
smart-open==7.1.0
|
| 131 |
+
pydantic==2.11.5
|
| 132 |
+
mdurl==0.1.2
|
| 133 |
+
virtualenv==20.31.2
|
| 134 |
+
pytz==2025.2
|
| 135 |
+
pycocotools==2.0.8
|
| 136 |
+
six==1.17.0
|
| 137 |
+
decorator==5.2.1
|
| 138 |
+
referencing==0.36.2
|
| 139 |
+
sentencepiece==0.2.0
|
| 140 |
+
PyYAML==6.0.2
|
| 141 |
+
pycocoevalcap==1.2
|
| 142 |
+
imageio==2.37.0
|
| 143 |
+
distlib==0.3.9
|
| 144 |
+
pyarrow==20.0.0
|
| 145 |
+
tenacity==9.1.2
|
| 146 |
+
language_data==1.3.0
|
| 147 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 148 |
+
blis==1.3.0
|
| 149 |
+
Pygments==2.19.1
|
| 150 |
+
tifffile==2025.5.10
|
| 151 |
+
pyparsing==3.2.3
|
| 152 |
+
cachetools==5.5.2
|
| 153 |
+
safetensors==0.5.3
|
| 154 |
+
attrs==25.3.0
|
| 155 |
+
webdataset==0.2.111
|
| 156 |
+
plotly==6.1.1
|
| 157 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 158 |
+
timm==0.4.12
|
| 159 |
+
torchmetrics==1.7.1
|
| 160 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 161 |
+
stack-data==0.6.3
|
| 162 |
+
python-dateutil==2.9.0.post0
|
| 163 |
+
lazy_loader==0.4
|
| 164 |
+
traitlets==5.14.3
|
| 165 |
+
einops==0.8.1
|
| 166 |
+
salesforce-lavis==1.0.2
|
| 167 |
+
joblib==1.5.1
|
| 168 |
+
msgpack==1.1.0
|
| 169 |
+
tokenizers==0.21.1
|
| 170 |
+
sentry-sdk==2.29.1
|
| 171 |
+
oss2==2.15.0
|
| 172 |
+
setproctitle==1.3.6
|
| 173 |
+
pip==25.1.1
|
| 174 |
+
cffi==1.17.1
|
| 175 |
+
transformers==4.52.3
|
| 176 |
+
narwhals==1.41.0
|
| 177 |
+
aliyun-python-sdk-core==2.16.0
|
| 178 |
+
jsonschema==4.24.0
|
| 179 |
+
flash-attn==2.7.1.post1
|
| 180 |
+
preshed==3.0.10
|
| 181 |
+
multiprocess==0.70.16
|
| 182 |
+
cryptography==45.0.3
|
| 183 |
+
aliyun-python-sdk-kms==2.16.5
|
| 184 |
+
scikit-learn==1.6.1
|
| 185 |
+
huggingface-hub==0.32.1
|
| 186 |
+
crcmod==1.7
|
| 187 |
+
typer==0.16.0
|
| 188 |
+
web.py==0.62
|
| 189 |
+
docker-pycreds==0.4.0
|
| 190 |
+
xxhash==3.5.0
|
| 191 |
+
bigmodelvis==0.0.1
|
| 192 |
+
datasets==3.6.0
|
| 193 |
+
more-itertools==10.7.0
|
| 194 |
+
yacs==0.1.8
|
| 195 |
+
jmespath==0.10.0
|
| 196 |
+
aiohttp==3.12.2
|
| 197 |
+
opencv-python==4.11.0.86
|
| 198 |
+
pycparser==2.22
|
| 199 |
+
threadpoolctl==3.6.0
|
| 200 |
+
jaraco.functools==4.1.0
|
| 201 |
+
click==8.2.1
|
| 202 |
+
wandb==0.19.11
|
| 203 |
+
opendelta==0.3.2
|
| 204 |
+
pycryptodome==3.23.0
|
| 205 |
+
pathlib==1.0.1
|
| 206 |
+
dill==0.3.8
|
| 207 |
+
fsspec==2025.3.0
|
| 208 |
+
delta-center-client==0.0.4
|
| 209 |
+
cheroot==10.0.1
|
| 210 |
+
typing_extensions==4.12.2
|
| 211 |
+
platformdirs==4.2.2
|
| 212 |
+
jaraco.text==3.12.1
|
| 213 |
+
packaging==24.2
|
| 214 |
+
inflect==7.3.1
|
| 215 |
+
jaraco.context==5.3.0
|
| 216 |
+
wheel==0.45.1
|
| 217 |
+
typeguard==4.3.0
|
| 218 |
+
more-itertools==10.3.0
|
| 219 |
+
tomli==2.0.1
|
| 220 |
+
importlib_metadata==8.0.0
|
| 221 |
+
backports.tarfile==1.2.0
|
| 222 |
+
zipp==3.19.2
|
| 223 |
+
jaraco.collections==5.1.0
|
| 224 |
+
autocommand==2.2.2
|
| 225 |
+
jaraco.functools==4.0.1
|
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
|
| 3 |
+
"python": "CPython 3.10.0",
|
| 4 |
+
"startedAt": "2025-07-07T10:49:42.891959Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--devices",
|
| 7 |
+
"0,1,2,3,4,5,6,7",
|
| 8 |
+
"--mode",
|
| 9 |
+
"train",
|
| 10 |
+
"--filename",
|
| 11 |
+
"stage2.5_mol_instruction",
|
| 12 |
+
"--num_query_token",
|
| 13 |
+
"8",
|
| 14 |
+
"--save_every_n_epochs",
|
| 15 |
+
"1",
|
| 16 |
+
"--max_epochs",
|
| 17 |
+
"10",
|
| 18 |
+
"--batch_size",
|
| 19 |
+
"32",
|
| 20 |
+
"--precision",
|
| 21 |
+
"bf16-mixed",
|
| 22 |
+
"--num_workers",
|
| 23 |
+
"8",
|
| 24 |
+
"--plm_model",
|
| 25 |
+
"/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m",
|
| 26 |
+
"--bert_name",
|
| 27 |
+
"/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft",
|
| 28 |
+
"--llm_name",
|
| 29 |
+
"/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300",
|
| 30 |
+
"--llm_tune",
|
| 31 |
+
"mid_lora",
|
| 32 |
+
"--stage1_path",
|
| 33 |
+
"/nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt",
|
| 34 |
+
"--use_wandb_logger"
|
| 35 |
+
],
|
| 36 |
+
"program": "/nas/shared/kilab/wangyujia/ProtT3/stage2.py",
|
| 37 |
+
"codePath": "stage2.py",
|
| 38 |
+
"email": "gia0603yucca@gmail.com",
|
| 39 |
+
"root": "./all_checkpoints/stage2.5_mol_instruction/",
|
| 40 |
+
"host": "dsw-265304-7f6db6b4bb-g4b9r",
|
| 41 |
+
"executable": "/root/miniconda3/envs/protT3/bin/python",
|
| 42 |
+
"codePathLocal": "stage2.py",
|
| 43 |
+
"cpu_count": 64,
|
| 44 |
+
"cpu_count_logical": 64,
|
| 45 |
+
"gpu": "NVIDIA A800-SXM4-80GB",
|
| 46 |
+
"gpu_count": 8,
|
| 47 |
+
"disk": {
|
| 48 |
+
"/": {
|
| 49 |
+
"total": "1623302262784",
|
| 50 |
+
"used": "1260912640"
|
| 51 |
+
}
|
| 52 |
+
},
|
| 53 |
+
"memory": {
|
| 54 |
+
"total": "549755813888"
|
| 55 |
+
},
|
| 56 |
+
"cpu": {
|
| 57 |
+
"count": 64,
|
| 58 |
+
"countLogical": 64
|
| 59 |
+
},
|
| 60 |
+
"gpu_nvidia": [
|
| 61 |
+
{
|
| 62 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 63 |
+
"memoryTotal": "85198045184",
|
| 64 |
+
"architecture": "Ampere"
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 68 |
+
"memoryTotal": "85198045184",
|
| 69 |
+
"architecture": "Ampere"
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 73 |
+
"memoryTotal": "85198045184",
|
| 74 |
+
"architecture": "Ampere"
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 78 |
+
"memoryTotal": "85198045184",
|
| 79 |
+
"architecture": "Ampere"
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 83 |
+
"memoryTotal": "85198045184",
|
| 84 |
+
"architecture": "Ampere"
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 88 |
+
"memoryTotal": "85198045184",
|
| 89 |
+
"architecture": "Ampere"
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 93 |
+
"memoryTotal": "85198045184",
|
| 94 |
+
"architecture": "Ampere"
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 98 |
+
"memoryTotal": "85198045184",
|
| 99 |
+
"architecture": "Ampere"
|
| 100 |
+
}
|
| 101 |
+
],
|
| 102 |
+
"cudaVersion": "12.1"
|
| 103 |
+
}
|
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-07-07T18:49:42.893783985+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/logs/debug-core.log"}
|
| 2 |
+
{"time":"2025-07-07T18:49:43.936695317+08:00","level":"INFO","msg":"created new stream","id":"2bo0nfvt"}
|
| 3 |
+
{"time":"2025-07-07T18:49:43.936731645+08:00","level":"INFO","msg":"stream: started","id":"2bo0nfvt"}
|
| 4 |
+
{"time":"2025-07-07T18:49:43.936758154+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"2bo0nfvt"}
|
| 5 |
+
{"time":"2025-07-07T18:49:43.936774344+08:00","level":"INFO","msg":"sender: started","stream_id":"2bo0nfvt"}
|
| 6 |
+
{"time":"2025-07-07T18:49:43.936811419+08:00","level":"INFO","msg":"handler: started","stream_id":"2bo0nfvt"}
|
| 7 |
+
{"time":"2025-07-07T18:49:45.07418554+08:00","level":"INFO","msg":"Starting system monitor"}
|
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/logs/debug.log
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-07-07 18:49:42,884 INFO MainThread:111335 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
|
| 2 |
+
2025-07-07 18:49:42,884 INFO MainThread:111335 [wandb_setup.py:_flush():70] Configure stats pid to 111335
|
| 3 |
+
2025-07-07 18:49:42,884 INFO MainThread:111335 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
|
| 4 |
+
2025-07-07 18:49:42,884 INFO MainThread:111335 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
|
| 5 |
+
2025-07-07 18:49:42,884 INFO MainThread:111335 [wandb_setup.py:_flush():70] Loading settings from environment variables
|
| 6 |
+
2025-07-07 18:49:42,884 INFO MainThread:111335 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/logs/debug.log
|
| 7 |
+
2025-07-07 18:49:42,884 INFO MainThread:111335 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/logs/debug-internal.log
|
| 8 |
+
2025-07-07 18:49:42,884 INFO MainThread:111335 [wandb_init.py:init():852] calling init triggers
|
| 9 |
+
2025-07-07 18:49:42,884 INFO MainThread:111335 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'_wandb': {}}
|
| 11 |
+
2025-07-07 18:49:42,884 INFO MainThread:111335 [wandb_init.py:init():893] starting backend
|
| 12 |
+
2025-07-07 18:49:42,884 INFO MainThread:111335 [wandb_init.py:init():897] sending inform_init request
|
| 13 |
+
2025-07-07 18:49:42,886 INFO MainThread:111335 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 14 |
+
2025-07-07 18:49:42,888 INFO MainThread:111335 [wandb_init.py:init():907] backend started and connected
|
| 15 |
+
2025-07-07 18:49:42,893 INFO MainThread:111335 [wandb_init.py:init():1005] updated telemetry
|
| 16 |
+
2025-07-07 18:49:42,896 INFO MainThread:111335 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
|
| 17 |
+
2025-07-07 18:49:45,025 INFO MainThread:111335 [wandb_init.py:init():1104] starting run threads in backend
|
| 18 |
+
2025-07-07 18:49:45,210 INFO MainThread:111335 [wandb_run.py:_console_start():2573] atexit reg
|
| 19 |
+
2025-07-07 18:49:45,210 INFO MainThread:111335 [wandb_run.py:_redirect():2421] redirect: wrap_raw
|
| 20 |
+
2025-07-07 18:49:45,214 INFO MainThread:111335 [wandb_run.py:_redirect():2490] Wrapping output streams.
|
| 21 |
+
2025-07-07 18:49:45,214 INFO MainThread:111335 [wandb_run.py:_redirect():2513] Redirects installed.
|
| 22 |
+
2025-07-07 18:49:45,216 INFO MainThread:111335 [wandb_init.py:init():1150] run started, returning control to user process
|
| 23 |
+
2025-07-07 18:49:53,747 INFO MainThread:111335 [wandb_run.py:_config_callback():1436] config_cb None None {'filename': 'stage2.5_mol_instruction', 'seed': 42, 'mode': 'train', 'strategy': 'deepspeed', 'accelerator': 'gpu', 'devices': '0,1,2,3,4,5,6,7', 'precision': 'bf16-mixed', 'max_epochs': 10, 'accumulate_grad_batches': 1, 'check_val_every_n_epoch': 1, 'enable_flash': False, 'use_wandb_logger': True, 'mix_dataset': False, 'save_every_n_epochs': 1, 'bert_name': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft', 'cross_attention_freq': 2, 'num_query_token': 8, 'llm_name': '/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300', 'num_beams': 5, 'do_sample': False, 'max_inference_len': 128, 'min_inference_len': 1, 'llm_tune': 'mid_lora', 'peft_config': '', 'peft_dir': '', 'plm_model': '/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m', 'plm_tune': 'freeze', 'lora_r': 8, 'lora_alpha': 16, 'lora_dropout': 0.1, 'enbale_gradient_checkpointing': False, 'weight_decay': 0.05, 'init_lr': 0.0001, 'min_lr': 1e-05, 'warmup_lr': 1e-06, 'warmup_steps': 1000, 'lr_decay_rate': 0.9, 'scheduler': 'linear_warmup_cosine_lr', 'stage1_path': '/nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt', 'stage2_path': '', 'init_checkpoint': '', 'caption_eval_epoch': 10, 'num_workers': 8, 'batch_size': 32, 'inference_batch_size': 4, 'root': 'data', 'text_max_len': 1024, 'q_max_len': 29, 'a_max_len': 36, 'prot_max_len': 1024, 'prompt': 'The protein has the following properties: ', 'filter_side_qa': False}
|
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_184942-2bo0nfvt/run-2bo0nfvt.wandb
ADDED
|
File without changes
|
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/files/output.log
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2.5_mol_instruction exists and is not empty.
|
| 2 |
+
Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.
|
| 3 |
+
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
|
| 4 |
+
[rank: 5] Child process with PID 116132 terminated with code 1. Forcefully terminating all other processes to avoid zombies 🧟
|
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/files/requirements.txt
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pydantic_core==2.33.2
|
| 2 |
+
psutil==7.0.0
|
| 3 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 4 |
+
mpmath==1.3.0
|
| 5 |
+
tzdata==2025.2
|
| 6 |
+
contexttimer==0.3.3
|
| 7 |
+
cycler==0.12.1
|
| 8 |
+
python-magic==0.4.27
|
| 9 |
+
pexpect==4.9.0
|
| 10 |
+
sympy==1.13.1
|
| 11 |
+
wrapt==1.17.2
|
| 12 |
+
marisa-trie==1.2.1
|
| 13 |
+
langcodes==3.5.0
|
| 14 |
+
nvidia-nvtx-cu12==12.4.127
|
| 15 |
+
ipython==8.36.0
|
| 16 |
+
opencv-python-headless==4.5.5.64
|
| 17 |
+
MarkupSafe==3.0.2
|
| 18 |
+
jsonschema-specifications==2025.4.1
|
| 19 |
+
wasabi==1.1.3
|
| 20 |
+
blinker==1.9.0
|
| 21 |
+
cfgv==3.4.0
|
| 22 |
+
numpy==2.2.6
|
| 23 |
+
idna==3.10
|
| 24 |
+
nvidia-cufile-cu12==1.11.1.6
|
| 25 |
+
ninja==1.11.1.4
|
| 26 |
+
nvidia-nccl-cu12==2.21.5
|
| 27 |
+
networkx==3.4.2
|
| 28 |
+
certifi==2025.4.26
|
| 29 |
+
deepspeed==0.16.10+b666844f
|
| 30 |
+
pure_eval==0.2.3
|
| 31 |
+
packaging==24.2
|
| 32 |
+
nltk==3.9.1
|
| 33 |
+
contourpy==1.3.2
|
| 34 |
+
pre_commit==4.2.0
|
| 35 |
+
nodeenv==1.9.1
|
| 36 |
+
setuptools==78.1.1
|
| 37 |
+
annotated-types==0.7.0
|
| 38 |
+
multidict==6.4.4
|
| 39 |
+
requests==2.32.3
|
| 40 |
+
tornado==6.5.1
|
| 41 |
+
triton==3.2.0
|
| 42 |
+
pillow==11.2.1
|
| 43 |
+
decord==0.6.0
|
| 44 |
+
shellingham==1.5.4
|
| 45 |
+
streamlit==1.45.1
|
| 46 |
+
pydeck==0.9.1
|
| 47 |
+
confection==0.1.5
|
| 48 |
+
exceptiongroup==1.3.0
|
| 49 |
+
prompt_toolkit==3.0.51
|
| 50 |
+
text-unidecode==1.3
|
| 51 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 52 |
+
antlr4-python3-runtime==4.9.3
|
| 53 |
+
fairscale==0.4.4
|
| 54 |
+
rouge_score==0.1.2
|
| 55 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 56 |
+
tqdm==4.67.1
|
| 57 |
+
rich==14.0.0
|
| 58 |
+
frozenlist==1.6.0
|
| 59 |
+
webencodings==0.5.1
|
| 60 |
+
altair==5.5.0
|
| 61 |
+
opendatasets==0.1.22
|
| 62 |
+
nvidia-curand-cu12==10.3.5.147
|
| 63 |
+
protobuf==6.31.0
|
| 64 |
+
asttokens==3.0.0
|
| 65 |
+
wheel==0.45.1
|
| 66 |
+
hf-xet==1.1.2
|
| 67 |
+
weasel==0.4.1
|
| 68 |
+
aiosignal==1.3.2
|
| 69 |
+
absl-py==2.2.2
|
| 70 |
+
thinc==8.3.6
|
| 71 |
+
torchvision==0.21.0
|
| 72 |
+
pandas==2.2.3
|
| 73 |
+
fonttools==4.58.0
|
| 74 |
+
bleach==6.2.0
|
| 75 |
+
typing-inspection==0.4.1
|
| 76 |
+
ftfy==6.3.1
|
| 77 |
+
typing_extensions==4.13.2
|
| 78 |
+
nvidia-ml-py==12.575.51
|
| 79 |
+
python-slugify==8.0.4
|
| 80 |
+
lightning-utilities==0.14.3
|
| 81 |
+
py-cpuinfo==9.0.0
|
| 82 |
+
smmap==5.0.2
|
| 83 |
+
regex==2024.11.6
|
| 84 |
+
scikit-image==0.25.2
|
| 85 |
+
iopath==0.1.10
|
| 86 |
+
spacy-legacy==3.0.12
|
| 87 |
+
hjson==3.1.0
|
| 88 |
+
executing==2.2.0
|
| 89 |
+
kiwisolver==1.4.8
|
| 90 |
+
scipy==1.15.3
|
| 91 |
+
aiohappyeyeballs==2.6.1
|
| 92 |
+
toml==0.10.2
|
| 93 |
+
jedi==0.19.2
|
| 94 |
+
GitPython==3.1.44
|
| 95 |
+
ptyprocess==0.7.0
|
| 96 |
+
kaggle==1.7.4.5
|
| 97 |
+
braceexpand==0.1.7
|
| 98 |
+
wcwidth==0.2.13
|
| 99 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 100 |
+
pytorch-lightning==2.5.1.post0
|
| 101 |
+
Jinja2==3.1.6
|
| 102 |
+
urllib3==2.4.0
|
| 103 |
+
watchdog==6.0.0
|
| 104 |
+
filelock==3.18.0
|
| 105 |
+
propcache==0.3.1
|
| 106 |
+
torch==2.6.0
|
| 107 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 108 |
+
cymem==2.0.11
|
| 109 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 110 |
+
murmurhash==1.0.13
|
| 111 |
+
catalogue==2.0.10
|
| 112 |
+
yarl==1.20.0
|
| 113 |
+
charset-normalizer==3.4.2
|
| 114 |
+
gitdb==4.0.12
|
| 115 |
+
matplotlib==3.10.3
|
| 116 |
+
portalocker==3.1.1
|
| 117 |
+
platformdirs==4.3.8
|
| 118 |
+
async-timeout==5.0.1
|
| 119 |
+
parso==0.8.4
|
| 120 |
+
markdown-it-py==3.0.0
|
| 121 |
+
omegaconf==2.3.0
|
| 122 |
+
cloudpathlib==0.21.1
|
| 123 |
+
nvidia-cusparselt-cu12==0.6.2
|
| 124 |
+
spacy-loggers==1.0.5
|
| 125 |
+
srsly==2.5.1
|
| 126 |
+
identify==2.6.12
|
| 127 |
+
rpds-py==0.25.1
|
| 128 |
+
spacy==3.8.7
|
| 129 |
+
matplotlib-inline==0.1.7
|
| 130 |
+
smart-open==7.1.0
|
| 131 |
+
pydantic==2.11.5
|
| 132 |
+
mdurl==0.1.2
|
| 133 |
+
virtualenv==20.31.2
|
| 134 |
+
pytz==2025.2
|
| 135 |
+
pycocotools==2.0.8
|
| 136 |
+
six==1.17.0
|
| 137 |
+
decorator==5.2.1
|
| 138 |
+
referencing==0.36.2
|
| 139 |
+
sentencepiece==0.2.0
|
| 140 |
+
PyYAML==6.0.2
|
| 141 |
+
pycocoevalcap==1.2
|
| 142 |
+
imageio==2.37.0
|
| 143 |
+
distlib==0.3.9
|
| 144 |
+
pyarrow==20.0.0
|
| 145 |
+
tenacity==9.1.2
|
| 146 |
+
language_data==1.3.0
|
| 147 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 148 |
+
blis==1.3.0
|
| 149 |
+
Pygments==2.19.1
|
| 150 |
+
tifffile==2025.5.10
|
| 151 |
+
pyparsing==3.2.3
|
| 152 |
+
cachetools==5.5.2
|
| 153 |
+
safetensors==0.5.3
|
| 154 |
+
attrs==25.3.0
|
| 155 |
+
webdataset==0.2.111
|
| 156 |
+
plotly==6.1.1
|
| 157 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 158 |
+
timm==0.4.12
|
| 159 |
+
torchmetrics==1.7.1
|
| 160 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 161 |
+
stack-data==0.6.3
|
| 162 |
+
python-dateutil==2.9.0.post0
|
| 163 |
+
lazy_loader==0.4
|
| 164 |
+
traitlets==5.14.3
|
| 165 |
+
einops==0.8.1
|
| 166 |
+
salesforce-lavis==1.0.2
|
| 167 |
+
joblib==1.5.1
|
| 168 |
+
msgpack==1.1.0
|
| 169 |
+
tokenizers==0.21.1
|
| 170 |
+
sentry-sdk==2.29.1
|
| 171 |
+
oss2==2.15.0
|
| 172 |
+
setproctitle==1.3.6
|
| 173 |
+
pip==25.1.1
|
| 174 |
+
cffi==1.17.1
|
| 175 |
+
transformers==4.52.3
|
| 176 |
+
narwhals==1.41.0
|
| 177 |
+
aliyun-python-sdk-core==2.16.0
|
| 178 |
+
jsonschema==4.24.0
|
| 179 |
+
flash-attn==2.7.1.post1
|
| 180 |
+
preshed==3.0.10
|
| 181 |
+
multiprocess==0.70.16
|
| 182 |
+
cryptography==45.0.3
|
| 183 |
+
aliyun-python-sdk-kms==2.16.5
|
| 184 |
+
scikit-learn==1.6.1
|
| 185 |
+
huggingface-hub==0.32.1
|
| 186 |
+
crcmod==1.7
|
| 187 |
+
typer==0.16.0
|
| 188 |
+
web.py==0.62
|
| 189 |
+
docker-pycreds==0.4.0
|
| 190 |
+
xxhash==3.5.0
|
| 191 |
+
bigmodelvis==0.0.1
|
| 192 |
+
datasets==3.6.0
|
| 193 |
+
more-itertools==10.7.0
|
| 194 |
+
yacs==0.1.8
|
| 195 |
+
jmespath==0.10.0
|
| 196 |
+
aiohttp==3.12.2
|
| 197 |
+
opencv-python==4.11.0.86
|
| 198 |
+
pycparser==2.22
|
| 199 |
+
threadpoolctl==3.6.0
|
| 200 |
+
jaraco.functools==4.1.0
|
| 201 |
+
click==8.2.1
|
| 202 |
+
wandb==0.19.11
|
| 203 |
+
opendelta==0.3.2
|
| 204 |
+
pycryptodome==3.23.0
|
| 205 |
+
pathlib==1.0.1
|
| 206 |
+
dill==0.3.8
|
| 207 |
+
fsspec==2025.3.0
|
| 208 |
+
delta-center-client==0.0.4
|
| 209 |
+
cheroot==10.0.1
|
| 210 |
+
typing_extensions==4.12.2
|
| 211 |
+
platformdirs==4.2.2
|
| 212 |
+
jaraco.text==3.12.1
|
| 213 |
+
packaging==24.2
|
| 214 |
+
inflect==7.3.1
|
| 215 |
+
jaraco.context==5.3.0
|
| 216 |
+
wheel==0.45.1
|
| 217 |
+
typeguard==4.3.0
|
| 218 |
+
more-itertools==10.3.0
|
| 219 |
+
tomli==2.0.1
|
| 220 |
+
importlib_metadata==8.0.0
|
| 221 |
+
backports.tarfile==1.2.0
|
| 222 |
+
zipp==3.19.2
|
| 223 |
+
jaraco.collections==5.1.0
|
| 224 |
+
autocommand==2.2.2
|
| 225 |
+
jaraco.functools==4.0.1
|
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
|
| 3 |
+
"python": "CPython 3.10.0",
|
| 4 |
+
"startedAt": "2025-07-07T10:52:39.364663Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--devices",
|
| 7 |
+
"0,1,2,3,4,5,6,7",
|
| 8 |
+
"--mode",
|
| 9 |
+
"train",
|
| 10 |
+
"--filename",
|
| 11 |
+
"stage2.5_mol_instruction",
|
| 12 |
+
"--num_query_token",
|
| 13 |
+
"8",
|
| 14 |
+
"--save_every_n_epochs",
|
| 15 |
+
"1",
|
| 16 |
+
"--max_epochs",
|
| 17 |
+
"10",
|
| 18 |
+
"--batch_size",
|
| 19 |
+
"2",
|
| 20 |
+
"--precision",
|
| 21 |
+
"bf16-mixed",
|
| 22 |
+
"--num_workers",
|
| 23 |
+
"8",
|
| 24 |
+
"--plm_model",
|
| 25 |
+
"/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m",
|
| 26 |
+
"--bert_name",
|
| 27 |
+
"/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft",
|
| 28 |
+
"--llm_name",
|
| 29 |
+
"/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300",
|
| 30 |
+
"--llm_tune",
|
| 31 |
+
"mid_lora",
|
| 32 |
+
"--stage1_path",
|
| 33 |
+
"/nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt",
|
| 34 |
+
"--use_wandb_logger"
|
| 35 |
+
],
|
| 36 |
+
"program": "/nas/shared/kilab/wangyujia/ProtT3/stage2.py",
|
| 37 |
+
"codePath": "stage2.py",
|
| 38 |
+
"email": "gia0603yucca@gmail.com",
|
| 39 |
+
"root": "./all_checkpoints/stage2.5_mol_instruction/",
|
| 40 |
+
"host": "dsw-265304-7f6db6b4bb-g4b9r",
|
| 41 |
+
"executable": "/root/miniconda3/envs/protT3/bin/python",
|
| 42 |
+
"codePathLocal": "stage2.py",
|
| 43 |
+
"cpu_count": 64,
|
| 44 |
+
"cpu_count_logical": 64,
|
| 45 |
+
"gpu": "NVIDIA A800-SXM4-80GB",
|
| 46 |
+
"gpu_count": 8,
|
| 47 |
+
"disk": {
|
| 48 |
+
"/": {
|
| 49 |
+
"total": "1623302262784",
|
| 50 |
+
"used": "1260916736"
|
| 51 |
+
}
|
| 52 |
+
},
|
| 53 |
+
"memory": {
|
| 54 |
+
"total": "549755813888"
|
| 55 |
+
},
|
| 56 |
+
"cpu": {
|
| 57 |
+
"count": 64,
|
| 58 |
+
"countLogical": 64
|
| 59 |
+
},
|
| 60 |
+
"gpu_nvidia": [
|
| 61 |
+
{
|
| 62 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 63 |
+
"memoryTotal": "85198045184",
|
| 64 |
+
"architecture": "Ampere"
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 68 |
+
"memoryTotal": "85198045184",
|
| 69 |
+
"architecture": "Ampere"
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 73 |
+
"memoryTotal": "85198045184",
|
| 74 |
+
"architecture": "Ampere"
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 78 |
+
"memoryTotal": "85198045184",
|
| 79 |
+
"architecture": "Ampere"
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 83 |
+
"memoryTotal": "85198045184",
|
| 84 |
+
"architecture": "Ampere"
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 88 |
+
"memoryTotal": "85198045184",
|
| 89 |
+
"architecture": "Ampere"
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 93 |
+
"memoryTotal": "85198045184",
|
| 94 |
+
"architecture": "Ampere"
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 98 |
+
"memoryTotal": "85198045184",
|
| 99 |
+
"architecture": "Ampere"
|
| 100 |
+
}
|
| 101 |
+
],
|
| 102 |
+
"cudaVersion": "12.1"
|
| 103 |
+
}
|
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-07-07T18:52:39.366061072+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/logs/debug-core.log"}
|
| 2 |
+
{"time":"2025-07-07T18:52:40.447272944+08:00","level":"INFO","msg":"created new stream","id":"ftp1v3gy"}
|
| 3 |
+
{"time":"2025-07-07T18:52:40.447330666+08:00","level":"INFO","msg":"stream: started","id":"ftp1v3gy"}
|
| 4 |
+
{"time":"2025-07-07T18:52:40.447402585+08:00","level":"INFO","msg":"handler: started","stream_id":"ftp1v3gy"}
|
| 5 |
+
{"time":"2025-07-07T18:52:40.447405198+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"ftp1v3gy"}
|
| 6 |
+
{"time":"2025-07-07T18:52:40.447445082+08:00","level":"INFO","msg":"sender: started","stream_id":"ftp1v3gy"}
|
| 7 |
+
{"time":"2025-07-07T18:52:41.658224222+08:00","level":"INFO","msg":"Starting system monitor"}
|
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/logs/debug.log
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-07-07 18:52:39,355 INFO MainThread:115698 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
|
| 2 |
+
2025-07-07 18:52:39,356 INFO MainThread:115698 [wandb_setup.py:_flush():70] Configure stats pid to 115698
|
| 3 |
+
2025-07-07 18:52:39,356 INFO MainThread:115698 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
|
| 4 |
+
2025-07-07 18:52:39,356 INFO MainThread:115698 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
|
| 5 |
+
2025-07-07 18:52:39,356 INFO MainThread:115698 [wandb_setup.py:_flush():70] Loading settings from environment variables
|
| 6 |
+
2025-07-07 18:52:39,356 INFO MainThread:115698 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/logs/debug.log
|
| 7 |
+
2025-07-07 18:52:39,356 INFO MainThread:115698 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/logs/debug-internal.log
|
| 8 |
+
2025-07-07 18:52:39,356 INFO MainThread:115698 [wandb_init.py:init():852] calling init triggers
|
| 9 |
+
2025-07-07 18:52:39,356 INFO MainThread:115698 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'_wandb': {}}
|
| 11 |
+
2025-07-07 18:52:39,356 INFO MainThread:115698 [wandb_init.py:init():893] starting backend
|
| 12 |
+
2025-07-07 18:52:39,356 INFO MainThread:115698 [wandb_init.py:init():897] sending inform_init request
|
| 13 |
+
2025-07-07 18:52:39,357 INFO MainThread:115698 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 14 |
+
2025-07-07 18:52:39,361 INFO MainThread:115698 [wandb_init.py:init():907] backend started and connected
|
| 15 |
+
2025-07-07 18:52:39,365 INFO MainThread:115698 [wandb_init.py:init():1005] updated telemetry
|
| 16 |
+
2025-07-07 18:52:39,368 INFO MainThread:115698 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
|
| 17 |
+
2025-07-07 18:52:41,647 INFO MainThread:115698 [wandb_init.py:init():1104] starting run threads in backend
|
| 18 |
+
2025-07-07 18:52:41,785 INFO MainThread:115698 [wandb_run.py:_console_start():2573] atexit reg
|
| 19 |
+
2025-07-07 18:52:41,785 INFO MainThread:115698 [wandb_run.py:_redirect():2421] redirect: wrap_raw
|
| 20 |
+
2025-07-07 18:52:41,788 INFO MainThread:115698 [wandb_run.py:_redirect():2490] Wrapping output streams.
|
| 21 |
+
2025-07-07 18:52:41,788 INFO MainThread:115698 [wandb_run.py:_redirect():2513] Redirects installed.
|
| 22 |
+
2025-07-07 18:52:41,789 INFO MainThread:115698 [wandb_init.py:init():1150] run started, returning control to user process
|
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185239-ftp1v3gy/run-ftp1v3gy.wandb
ADDED
|
File without changes
|
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/files/output.log
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2.5_mol_instruction exists and is not empty.
|
| 2 |
+
Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.
|
| 3 |
+
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
|
| 4 |
+
[rank: 5] Child process with PID 118872 terminated with code 1. Forcefully terminating all other processes to avoid zombies 🧟
|
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/files/requirements.txt
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pydantic_core==2.33.2
|
| 2 |
+
psutil==7.0.0
|
| 3 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 4 |
+
mpmath==1.3.0
|
| 5 |
+
tzdata==2025.2
|
| 6 |
+
contexttimer==0.3.3
|
| 7 |
+
cycler==0.12.1
|
| 8 |
+
python-magic==0.4.27
|
| 9 |
+
pexpect==4.9.0
|
| 10 |
+
sympy==1.13.1
|
| 11 |
+
wrapt==1.17.2
|
| 12 |
+
marisa-trie==1.2.1
|
| 13 |
+
langcodes==3.5.0
|
| 14 |
+
nvidia-nvtx-cu12==12.4.127
|
| 15 |
+
ipython==8.36.0
|
| 16 |
+
opencv-python-headless==4.5.5.64
|
| 17 |
+
MarkupSafe==3.0.2
|
| 18 |
+
jsonschema-specifications==2025.4.1
|
| 19 |
+
wasabi==1.1.3
|
| 20 |
+
blinker==1.9.0
|
| 21 |
+
cfgv==3.4.0
|
| 22 |
+
numpy==2.2.6
|
| 23 |
+
idna==3.10
|
| 24 |
+
nvidia-cufile-cu12==1.11.1.6
|
| 25 |
+
ninja==1.11.1.4
|
| 26 |
+
nvidia-nccl-cu12==2.21.5
|
| 27 |
+
networkx==3.4.2
|
| 28 |
+
certifi==2025.4.26
|
| 29 |
+
deepspeed==0.16.10+b666844f
|
| 30 |
+
pure_eval==0.2.3
|
| 31 |
+
packaging==24.2
|
| 32 |
+
nltk==3.9.1
|
| 33 |
+
contourpy==1.3.2
|
| 34 |
+
pre_commit==4.2.0
|
| 35 |
+
nodeenv==1.9.1
|
| 36 |
+
setuptools==78.1.1
|
| 37 |
+
annotated-types==0.7.0
|
| 38 |
+
multidict==6.4.4
|
| 39 |
+
requests==2.32.3
|
| 40 |
+
tornado==6.5.1
|
| 41 |
+
triton==3.2.0
|
| 42 |
+
pillow==11.2.1
|
| 43 |
+
decord==0.6.0
|
| 44 |
+
shellingham==1.5.4
|
| 45 |
+
streamlit==1.45.1
|
| 46 |
+
pydeck==0.9.1
|
| 47 |
+
confection==0.1.5
|
| 48 |
+
exceptiongroup==1.3.0
|
| 49 |
+
prompt_toolkit==3.0.51
|
| 50 |
+
text-unidecode==1.3
|
| 51 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 52 |
+
antlr4-python3-runtime==4.9.3
|
| 53 |
+
fairscale==0.4.4
|
| 54 |
+
rouge_score==0.1.2
|
| 55 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 56 |
+
tqdm==4.67.1
|
| 57 |
+
rich==14.0.0
|
| 58 |
+
frozenlist==1.6.0
|
| 59 |
+
webencodings==0.5.1
|
| 60 |
+
altair==5.5.0
|
| 61 |
+
opendatasets==0.1.22
|
| 62 |
+
nvidia-curand-cu12==10.3.5.147
|
| 63 |
+
protobuf==6.31.0
|
| 64 |
+
asttokens==3.0.0
|
| 65 |
+
wheel==0.45.1
|
| 66 |
+
hf-xet==1.1.2
|
| 67 |
+
weasel==0.4.1
|
| 68 |
+
aiosignal==1.3.2
|
| 69 |
+
absl-py==2.2.2
|
| 70 |
+
thinc==8.3.6
|
| 71 |
+
torchvision==0.21.0
|
| 72 |
+
pandas==2.2.3
|
| 73 |
+
fonttools==4.58.0
|
| 74 |
+
bleach==6.2.0
|
| 75 |
+
typing-inspection==0.4.1
|
| 76 |
+
ftfy==6.3.1
|
| 77 |
+
typing_extensions==4.13.2
|
| 78 |
+
nvidia-ml-py==12.575.51
|
| 79 |
+
python-slugify==8.0.4
|
| 80 |
+
lightning-utilities==0.14.3
|
| 81 |
+
py-cpuinfo==9.0.0
|
| 82 |
+
smmap==5.0.2
|
| 83 |
+
regex==2024.11.6
|
| 84 |
+
scikit-image==0.25.2
|
| 85 |
+
iopath==0.1.10
|
| 86 |
+
spacy-legacy==3.0.12
|
| 87 |
+
hjson==3.1.0
|
| 88 |
+
executing==2.2.0
|
| 89 |
+
kiwisolver==1.4.8
|
| 90 |
+
scipy==1.15.3
|
| 91 |
+
aiohappyeyeballs==2.6.1
|
| 92 |
+
toml==0.10.2
|
| 93 |
+
jedi==0.19.2
|
| 94 |
+
GitPython==3.1.44
|
| 95 |
+
ptyprocess==0.7.0
|
| 96 |
+
kaggle==1.7.4.5
|
| 97 |
+
braceexpand==0.1.7
|
| 98 |
+
wcwidth==0.2.13
|
| 99 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 100 |
+
pytorch-lightning==2.5.1.post0
|
| 101 |
+
Jinja2==3.1.6
|
| 102 |
+
urllib3==2.4.0
|
| 103 |
+
watchdog==6.0.0
|
| 104 |
+
filelock==3.18.0
|
| 105 |
+
propcache==0.3.1
|
| 106 |
+
torch==2.6.0
|
| 107 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 108 |
+
cymem==2.0.11
|
| 109 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 110 |
+
murmurhash==1.0.13
|
| 111 |
+
catalogue==2.0.10
|
| 112 |
+
yarl==1.20.0
|
| 113 |
+
charset-normalizer==3.4.2
|
| 114 |
+
gitdb==4.0.12
|
| 115 |
+
matplotlib==3.10.3
|
| 116 |
+
portalocker==3.1.1
|
| 117 |
+
platformdirs==4.3.8
|
| 118 |
+
async-timeout==5.0.1
|
| 119 |
+
parso==0.8.4
|
| 120 |
+
markdown-it-py==3.0.0
|
| 121 |
+
omegaconf==2.3.0
|
| 122 |
+
cloudpathlib==0.21.1
|
| 123 |
+
nvidia-cusparselt-cu12==0.6.2
|
| 124 |
+
spacy-loggers==1.0.5
|
| 125 |
+
srsly==2.5.1
|
| 126 |
+
identify==2.6.12
|
| 127 |
+
rpds-py==0.25.1
|
| 128 |
+
spacy==3.8.7
|
| 129 |
+
matplotlib-inline==0.1.7
|
| 130 |
+
smart-open==7.1.0
|
| 131 |
+
pydantic==2.11.5
|
| 132 |
+
mdurl==0.1.2
|
| 133 |
+
virtualenv==20.31.2
|
| 134 |
+
pytz==2025.2
|
| 135 |
+
pycocotools==2.0.8
|
| 136 |
+
six==1.17.0
|
| 137 |
+
decorator==5.2.1
|
| 138 |
+
referencing==0.36.2
|
| 139 |
+
sentencepiece==0.2.0
|
| 140 |
+
PyYAML==6.0.2
|
| 141 |
+
pycocoevalcap==1.2
|
| 142 |
+
imageio==2.37.0
|
| 143 |
+
distlib==0.3.9
|
| 144 |
+
pyarrow==20.0.0
|
| 145 |
+
tenacity==9.1.2
|
| 146 |
+
language_data==1.3.0
|
| 147 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 148 |
+
blis==1.3.0
|
| 149 |
+
Pygments==2.19.1
|
| 150 |
+
tifffile==2025.5.10
|
| 151 |
+
pyparsing==3.2.3
|
| 152 |
+
cachetools==5.5.2
|
| 153 |
+
safetensors==0.5.3
|
| 154 |
+
attrs==25.3.0
|
| 155 |
+
webdataset==0.2.111
|
| 156 |
+
plotly==6.1.1
|
| 157 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 158 |
+
timm==0.4.12
|
| 159 |
+
torchmetrics==1.7.1
|
| 160 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 161 |
+
stack-data==0.6.3
|
| 162 |
+
python-dateutil==2.9.0.post0
|
| 163 |
+
lazy_loader==0.4
|
| 164 |
+
traitlets==5.14.3
|
| 165 |
+
einops==0.8.1
|
| 166 |
+
salesforce-lavis==1.0.2
|
| 167 |
+
joblib==1.5.1
|
| 168 |
+
msgpack==1.1.0
|
| 169 |
+
tokenizers==0.21.1
|
| 170 |
+
sentry-sdk==2.29.1
|
| 171 |
+
oss2==2.15.0
|
| 172 |
+
setproctitle==1.3.6
|
| 173 |
+
pip==25.1.1
|
| 174 |
+
cffi==1.17.1
|
| 175 |
+
transformers==4.52.3
|
| 176 |
+
narwhals==1.41.0
|
| 177 |
+
aliyun-python-sdk-core==2.16.0
|
| 178 |
+
jsonschema==4.24.0
|
| 179 |
+
flash-attn==2.7.1.post1
|
| 180 |
+
preshed==3.0.10
|
| 181 |
+
multiprocess==0.70.16
|
| 182 |
+
cryptography==45.0.3
|
| 183 |
+
aliyun-python-sdk-kms==2.16.5
|
| 184 |
+
scikit-learn==1.6.1
|
| 185 |
+
huggingface-hub==0.32.1
|
| 186 |
+
crcmod==1.7
|
| 187 |
+
typer==0.16.0
|
| 188 |
+
web.py==0.62
|
| 189 |
+
docker-pycreds==0.4.0
|
| 190 |
+
xxhash==3.5.0
|
| 191 |
+
bigmodelvis==0.0.1
|
| 192 |
+
datasets==3.6.0
|
| 193 |
+
more-itertools==10.7.0
|
| 194 |
+
yacs==0.1.8
|
| 195 |
+
jmespath==0.10.0
|
| 196 |
+
aiohttp==3.12.2
|
| 197 |
+
opencv-python==4.11.0.86
|
| 198 |
+
pycparser==2.22
|
| 199 |
+
threadpoolctl==3.6.0
|
| 200 |
+
jaraco.functools==4.1.0
|
| 201 |
+
click==8.2.1
|
| 202 |
+
wandb==0.19.11
|
| 203 |
+
opendelta==0.3.2
|
| 204 |
+
pycryptodome==3.23.0
|
| 205 |
+
pathlib==1.0.1
|
| 206 |
+
dill==0.3.8
|
| 207 |
+
fsspec==2025.3.0
|
| 208 |
+
delta-center-client==0.0.4
|
| 209 |
+
cheroot==10.0.1
|
| 210 |
+
typing_extensions==4.12.2
|
| 211 |
+
platformdirs==4.2.2
|
| 212 |
+
jaraco.text==3.12.1
|
| 213 |
+
packaging==24.2
|
| 214 |
+
inflect==7.3.1
|
| 215 |
+
jaraco.context==5.3.0
|
| 216 |
+
wheel==0.45.1
|
| 217 |
+
typeguard==4.3.0
|
| 218 |
+
more-itertools==10.3.0
|
| 219 |
+
tomli==2.0.1
|
| 220 |
+
importlib_metadata==8.0.0
|
| 221 |
+
backports.tarfile==1.2.0
|
| 222 |
+
zipp==3.19.2
|
| 223 |
+
jaraco.collections==5.1.0
|
| 224 |
+
autocommand==2.2.2
|
| 225 |
+
jaraco.functools==4.0.1
|
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
|
| 3 |
+
"python": "CPython 3.10.0",
|
| 4 |
+
"startedAt": "2025-07-07T10:54:40.521235Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--devices",
|
| 7 |
+
"0,1,2,3,4,5,6,7",
|
| 8 |
+
"--mode",
|
| 9 |
+
"train",
|
| 10 |
+
"--filename",
|
| 11 |
+
"stage2.5_mol_instruction",
|
| 12 |
+
"--num_query_token",
|
| 13 |
+
"8",
|
| 14 |
+
"--save_every_n_epochs",
|
| 15 |
+
"1",
|
| 16 |
+
"--max_epochs",
|
| 17 |
+
"10",
|
| 18 |
+
"--batch_size",
|
| 19 |
+
"1",
|
| 20 |
+
"--precision",
|
| 21 |
+
"bf16-mixed",
|
| 22 |
+
"--num_workers",
|
| 23 |
+
"8",
|
| 24 |
+
"--plm_model",
|
| 25 |
+
"/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m",
|
| 26 |
+
"--bert_name",
|
| 27 |
+
"/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft",
|
| 28 |
+
"--llm_name",
|
| 29 |
+
"/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300",
|
| 30 |
+
"--llm_tune",
|
| 31 |
+
"mid_lora",
|
| 32 |
+
"--stage1_path",
|
| 33 |
+
"/nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt",
|
| 34 |
+
"--use_wandb_logger"
|
| 35 |
+
],
|
| 36 |
+
"program": "/nas/shared/kilab/wangyujia/ProtT3/stage2.py",
|
| 37 |
+
"codePath": "stage2.py",
|
| 38 |
+
"email": "gia0603yucca@gmail.com",
|
| 39 |
+
"root": "./all_checkpoints/stage2.5_mol_instruction/",
|
| 40 |
+
"host": "dsw-265304-7f6db6b4bb-g4b9r",
|
| 41 |
+
"executable": "/root/miniconda3/envs/protT3/bin/python",
|
| 42 |
+
"codePathLocal": "stage2.py",
|
| 43 |
+
"cpu_count": 64,
|
| 44 |
+
"cpu_count_logical": 64,
|
| 45 |
+
"gpu": "NVIDIA A800-SXM4-80GB",
|
| 46 |
+
"gpu_count": 8,
|
| 47 |
+
"disk": {
|
| 48 |
+
"/": {
|
| 49 |
+
"total": "1623302262784",
|
| 50 |
+
"used": "1260920832"
|
| 51 |
+
}
|
| 52 |
+
},
|
| 53 |
+
"memory": {
|
| 54 |
+
"total": "549755813888"
|
| 55 |
+
},
|
| 56 |
+
"cpu": {
|
| 57 |
+
"count": 64,
|
| 58 |
+
"countLogical": 64
|
| 59 |
+
},
|
| 60 |
+
"gpu_nvidia": [
|
| 61 |
+
{
|
| 62 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 63 |
+
"memoryTotal": "85198045184",
|
| 64 |
+
"architecture": "Ampere"
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 68 |
+
"memoryTotal": "85198045184",
|
| 69 |
+
"architecture": "Ampere"
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 73 |
+
"memoryTotal": "85198045184",
|
| 74 |
+
"architecture": "Ampere"
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 78 |
+
"memoryTotal": "85198045184",
|
| 79 |
+
"architecture": "Ampere"
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 83 |
+
"memoryTotal": "85198045184",
|
| 84 |
+
"architecture": "Ampere"
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 88 |
+
"memoryTotal": "85198045184",
|
| 89 |
+
"architecture": "Ampere"
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 93 |
+
"memoryTotal": "85198045184",
|
| 94 |
+
"architecture": "Ampere"
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"name": "NVIDIA A800-SXM4-80GB",
|
| 98 |
+
"memoryTotal": "85198045184",
|
| 99 |
+
"architecture": "Ampere"
|
| 100 |
+
}
|
| 101 |
+
],
|
| 102 |
+
"cudaVersion": "12.1"
|
| 103 |
+
}
|
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-07-07T18:54:40.526100632+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.11","symlink path":"all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/logs/debug-core.log"}
|
| 2 |
+
{"time":"2025-07-07T18:54:41.559415905+08:00","level":"INFO","msg":"created new stream","id":"p1815hm9"}
|
| 3 |
+
{"time":"2025-07-07T18:54:41.559467213+08:00","level":"INFO","msg":"stream: started","id":"p1815hm9"}
|
| 4 |
+
{"time":"2025-07-07T18:54:41.559497666+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"p1815hm9"}
|
| 5 |
+
{"time":"2025-07-07T18:54:41.559545046+08:00","level":"INFO","msg":"handler: started","stream_id":"p1815hm9"}
|
| 6 |
+
{"time":"2025-07-07T18:54:41.559594522+08:00","level":"INFO","msg":"sender: started","stream_id":"p1815hm9"}
|
| 7 |
+
{"time":"2025-07-07T18:54:42.859018211+08:00","level":"INFO","msg":"Starting system monitor"}
|
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/logs/debug.log
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-07-07 18:54:40,510 INFO MainThread:118453 [wandb_setup.py:_flush():70] Current SDK version is 0.19.11
|
| 2 |
+
2025-07-07 18:54:40,510 INFO MainThread:118453 [wandb_setup.py:_flush():70] Configure stats pid to 118453
|
| 3 |
+
2025-07-07 18:54:40,511 INFO MainThread:118453 [wandb_setup.py:_flush():70] Loading settings from /root/.config/wandb/settings
|
| 4 |
+
2025-07-07 18:54:40,511 INFO MainThread:118453 [wandb_setup.py:_flush():70] Loading settings from /nas/shared/kilab/wangyujia/ProtT3/wandb/settings
|
| 5 |
+
2025-07-07 18:54:40,511 INFO MainThread:118453 [wandb_setup.py:_flush():70] Loading settings from environment variables
|
| 6 |
+
2025-07-07 18:54:40,511 INFO MainThread:118453 [wandb_init.py:setup_run_log_directory():724] Logging user logs to ./all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/logs/debug.log
|
| 7 |
+
2025-07-07 18:54:40,511 INFO MainThread:118453 [wandb_init.py:setup_run_log_directory():725] Logging internal logs to ./all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/logs/debug-internal.log
|
| 8 |
+
2025-07-07 18:54:40,511 INFO MainThread:118453 [wandb_init.py:init():852] calling init triggers
|
| 9 |
+
2025-07-07 18:54:40,511 INFO MainThread:118453 [wandb_init.py:init():857] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'_wandb': {}}
|
| 11 |
+
2025-07-07 18:54:40,511 INFO MainThread:118453 [wandb_init.py:init():893] starting backend
|
| 12 |
+
2025-07-07 18:54:40,511 INFO MainThread:118453 [wandb_init.py:init():897] sending inform_init request
|
| 13 |
+
2025-07-07 18:54:40,512 INFO MainThread:118453 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 14 |
+
2025-07-07 18:54:40,521 INFO MainThread:118453 [wandb_init.py:init():907] backend started and connected
|
| 15 |
+
2025-07-07 18:54:40,522 INFO MainThread:118453 [wandb_init.py:init():1005] updated telemetry
|
| 16 |
+
2025-07-07 18:54:40,523 INFO MainThread:118453 [wandb_init.py:init():1029] communicating run to backend with 90.0 second timeout
|
| 17 |
+
2025-07-07 18:54:42,809 INFO MainThread:118453 [wandb_init.py:init():1104] starting run threads in backend
|
| 18 |
+
2025-07-07 18:54:42,991 INFO MainThread:118453 [wandb_run.py:_console_start():2573] atexit reg
|
| 19 |
+
2025-07-07 18:54:42,991 INFO MainThread:118453 [wandb_run.py:_redirect():2421] redirect: wrap_raw
|
| 20 |
+
2025-07-07 18:54:42,995 INFO MainThread:118453 [wandb_run.py:_redirect():2490] Wrapping output streams.
|
| 21 |
+
2025-07-07 18:54:42,999 INFO MainThread:118453 [wandb_run.py:_redirect():2513] Redirects installed.
|
| 22 |
+
2025-07-07 18:54:43,000 INFO MainThread:118453 [wandb_init.py:init():1150] run started, returning control to user process
|
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_185440-p1815hm9/run-p1815hm9.wandb
ADDED
|
File without changes
|
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_190145-vu5mgolt/files/output.log
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/root/miniconda3/envs/protT3/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2.5_mol_instruction exists and is not empty.
|
| 2 |
+
Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.
|
| 3 |
+
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
|
| 4 |
+
[rank: 5] Child process with PID 122036 terminated with code 1. Forcefully terminating all other processes to avoid zombies 🧟
|
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_190145-vu5mgolt/files/requirements.txt
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pydantic_core==2.33.2
|
| 2 |
+
psutil==7.0.0
|
| 3 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 4 |
+
mpmath==1.3.0
|
| 5 |
+
tzdata==2025.2
|
| 6 |
+
contexttimer==0.3.3
|
| 7 |
+
cycler==0.12.1
|
| 8 |
+
python-magic==0.4.27
|
| 9 |
+
pexpect==4.9.0
|
| 10 |
+
sympy==1.13.1
|
| 11 |
+
wrapt==1.17.2
|
| 12 |
+
marisa-trie==1.2.1
|
| 13 |
+
langcodes==3.5.0
|
| 14 |
+
nvidia-nvtx-cu12==12.4.127
|
| 15 |
+
ipython==8.36.0
|
| 16 |
+
opencv-python-headless==4.5.5.64
|
| 17 |
+
MarkupSafe==3.0.2
|
| 18 |
+
jsonschema-specifications==2025.4.1
|
| 19 |
+
wasabi==1.1.3
|
| 20 |
+
blinker==1.9.0
|
| 21 |
+
cfgv==3.4.0
|
| 22 |
+
numpy==2.2.6
|
| 23 |
+
idna==3.10
|
| 24 |
+
nvidia-cufile-cu12==1.11.1.6
|
| 25 |
+
ninja==1.11.1.4
|
| 26 |
+
nvidia-nccl-cu12==2.21.5
|
| 27 |
+
networkx==3.4.2
|
| 28 |
+
certifi==2025.4.26
|
| 29 |
+
deepspeed==0.16.10+b666844f
|
| 30 |
+
pure_eval==0.2.3
|
| 31 |
+
packaging==24.2
|
| 32 |
+
nltk==3.9.1
|
| 33 |
+
contourpy==1.3.2
|
| 34 |
+
pre_commit==4.2.0
|
| 35 |
+
nodeenv==1.9.1
|
| 36 |
+
setuptools==78.1.1
|
| 37 |
+
annotated-types==0.7.0
|
| 38 |
+
multidict==6.4.4
|
| 39 |
+
requests==2.32.3
|
| 40 |
+
tornado==6.5.1
|
| 41 |
+
triton==3.2.0
|
| 42 |
+
pillow==11.2.1
|
| 43 |
+
decord==0.6.0
|
| 44 |
+
shellingham==1.5.4
|
| 45 |
+
streamlit==1.45.1
|
| 46 |
+
pydeck==0.9.1
|
| 47 |
+
confection==0.1.5
|
| 48 |
+
exceptiongroup==1.3.0
|
| 49 |
+
prompt_toolkit==3.0.51
|
| 50 |
+
text-unidecode==1.3
|
| 51 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 52 |
+
antlr4-python3-runtime==4.9.3
|
| 53 |
+
fairscale==0.4.4
|
| 54 |
+
rouge_score==0.1.2
|
| 55 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 56 |
+
tqdm==4.67.1
|
| 57 |
+
rich==14.0.0
|
| 58 |
+
frozenlist==1.6.0
|
| 59 |
+
webencodings==0.5.1
|
| 60 |
+
altair==5.5.0
|
| 61 |
+
opendatasets==0.1.22
|
| 62 |
+
nvidia-curand-cu12==10.3.5.147
|
| 63 |
+
protobuf==6.31.0
|
| 64 |
+
asttokens==3.0.0
|
| 65 |
+
wheel==0.45.1
|
| 66 |
+
hf-xet==1.1.2
|
| 67 |
+
weasel==0.4.1
|
| 68 |
+
aiosignal==1.3.2
|
| 69 |
+
absl-py==2.2.2
|
| 70 |
+
thinc==8.3.6
|
| 71 |
+
torchvision==0.21.0
|
| 72 |
+
pandas==2.2.3
|
| 73 |
+
fonttools==4.58.0
|
| 74 |
+
bleach==6.2.0
|
| 75 |
+
typing-inspection==0.4.1
|
| 76 |
+
ftfy==6.3.1
|
| 77 |
+
typing_extensions==4.13.2
|
| 78 |
+
nvidia-ml-py==12.575.51
|
| 79 |
+
python-slugify==8.0.4
|
| 80 |
+
lightning-utilities==0.14.3
|
| 81 |
+
py-cpuinfo==9.0.0
|
| 82 |
+
smmap==5.0.2
|
| 83 |
+
regex==2024.11.6
|
| 84 |
+
scikit-image==0.25.2
|
| 85 |
+
iopath==0.1.10
|
| 86 |
+
spacy-legacy==3.0.12
|
| 87 |
+
hjson==3.1.0
|
| 88 |
+
executing==2.2.0
|
| 89 |
+
kiwisolver==1.4.8
|
| 90 |
+
scipy==1.15.3
|
| 91 |
+
aiohappyeyeballs==2.6.1
|
| 92 |
+
toml==0.10.2
|
| 93 |
+
jedi==0.19.2
|
| 94 |
+
GitPython==3.1.44
|
| 95 |
+
ptyprocess==0.7.0
|
| 96 |
+
kaggle==1.7.4.5
|
| 97 |
+
braceexpand==0.1.7
|
| 98 |
+
wcwidth==0.2.13
|
| 99 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 100 |
+
pytorch-lightning==2.5.1.post0
|
| 101 |
+
Jinja2==3.1.6
|
| 102 |
+
urllib3==2.4.0
|
| 103 |
+
watchdog==6.0.0
|
| 104 |
+
filelock==3.18.0
|
| 105 |
+
propcache==0.3.1
|
| 106 |
+
torch==2.6.0
|
| 107 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 108 |
+
cymem==2.0.11
|
| 109 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 110 |
+
murmurhash==1.0.13
|
| 111 |
+
catalogue==2.0.10
|
| 112 |
+
yarl==1.20.0
|
| 113 |
+
charset-normalizer==3.4.2
|
| 114 |
+
gitdb==4.0.12
|
| 115 |
+
matplotlib==3.10.3
|
| 116 |
+
portalocker==3.1.1
|
| 117 |
+
platformdirs==4.3.8
|
| 118 |
+
async-timeout==5.0.1
|
| 119 |
+
parso==0.8.4
|
| 120 |
+
markdown-it-py==3.0.0
|
| 121 |
+
omegaconf==2.3.0
|
| 122 |
+
cloudpathlib==0.21.1
|
| 123 |
+
nvidia-cusparselt-cu12==0.6.2
|
| 124 |
+
spacy-loggers==1.0.5
|
| 125 |
+
srsly==2.5.1
|
| 126 |
+
identify==2.6.12
|
| 127 |
+
rpds-py==0.25.1
|
| 128 |
+
spacy==3.8.7
|
| 129 |
+
matplotlib-inline==0.1.7
|
| 130 |
+
smart-open==7.1.0
|
| 131 |
+
pydantic==2.11.5
|
| 132 |
+
mdurl==0.1.2
|
| 133 |
+
virtualenv==20.31.2
|
| 134 |
+
pytz==2025.2
|
| 135 |
+
pycocotools==2.0.8
|
| 136 |
+
six==1.17.0
|
| 137 |
+
decorator==5.2.1
|
| 138 |
+
referencing==0.36.2
|
| 139 |
+
sentencepiece==0.2.0
|
| 140 |
+
PyYAML==6.0.2
|
| 141 |
+
pycocoevalcap==1.2
|
| 142 |
+
imageio==2.37.0
|
| 143 |
+
distlib==0.3.9
|
| 144 |
+
pyarrow==20.0.0
|
| 145 |
+
tenacity==9.1.2
|
| 146 |
+
language_data==1.3.0
|
| 147 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 148 |
+
blis==1.3.0
|
| 149 |
+
Pygments==2.19.1
|
| 150 |
+
tifffile==2025.5.10
|
| 151 |
+
pyparsing==3.2.3
|
| 152 |
+
cachetools==5.5.2
|
| 153 |
+
safetensors==0.5.3
|
| 154 |
+
attrs==25.3.0
|
| 155 |
+
webdataset==0.2.111
|
| 156 |
+
plotly==6.1.1
|
| 157 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 158 |
+
timm==0.4.12
|
| 159 |
+
torchmetrics==1.7.1
|
| 160 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 161 |
+
stack-data==0.6.3
|
| 162 |
+
python-dateutil==2.9.0.post0
|
| 163 |
+
lazy_loader==0.4
|
| 164 |
+
traitlets==5.14.3
|
| 165 |
+
einops==0.8.1
|
| 166 |
+
salesforce-lavis==1.0.2
|
| 167 |
+
joblib==1.5.1
|
| 168 |
+
msgpack==1.1.0
|
| 169 |
+
tokenizers==0.21.1
|
| 170 |
+
sentry-sdk==2.29.1
|
| 171 |
+
oss2==2.15.0
|
| 172 |
+
setproctitle==1.3.6
|
| 173 |
+
pip==25.1.1
|
| 174 |
+
cffi==1.17.1
|
| 175 |
+
transformers==4.52.3
|
| 176 |
+
narwhals==1.41.0
|
| 177 |
+
aliyun-python-sdk-core==2.16.0
|
| 178 |
+
jsonschema==4.24.0
|
| 179 |
+
flash-attn==2.7.1.post1
|
| 180 |
+
preshed==3.0.10
|
| 181 |
+
multiprocess==0.70.16
|
| 182 |
+
cryptography==45.0.3
|
| 183 |
+
aliyun-python-sdk-kms==2.16.5
|
| 184 |
+
scikit-learn==1.6.1
|
| 185 |
+
huggingface-hub==0.32.1
|
| 186 |
+
crcmod==1.7
|
| 187 |
+
typer==0.16.0
|
| 188 |
+
web.py==0.62
|
| 189 |
+
docker-pycreds==0.4.0
|
| 190 |
+
xxhash==3.5.0
|
| 191 |
+
bigmodelvis==0.0.1
|
| 192 |
+
datasets==3.6.0
|
| 193 |
+
more-itertools==10.7.0
|
| 194 |
+
yacs==0.1.8
|
| 195 |
+
jmespath==0.10.0
|
| 196 |
+
aiohttp==3.12.2
|
| 197 |
+
opencv-python==4.11.0.86
|
| 198 |
+
pycparser==2.22
|
| 199 |
+
threadpoolctl==3.6.0
|
| 200 |
+
jaraco.functools==4.1.0
|
| 201 |
+
click==8.2.1
|
| 202 |
+
wandb==0.19.11
|
| 203 |
+
opendelta==0.3.2
|
| 204 |
+
pycryptodome==3.23.0
|
| 205 |
+
pathlib==1.0.1
|
| 206 |
+
dill==0.3.8
|
| 207 |
+
fsspec==2025.3.0
|
| 208 |
+
delta-center-client==0.0.4
|
| 209 |
+
cheroot==10.0.1
|
| 210 |
+
typing_extensions==4.12.2
|
| 211 |
+
platformdirs==4.2.2
|
| 212 |
+
jaraco.text==3.12.1
|
| 213 |
+
packaging==24.2
|
| 214 |
+
inflect==7.3.1
|
| 215 |
+
jaraco.context==5.3.0
|
| 216 |
+
wheel==0.45.1
|
| 217 |
+
typeguard==4.3.0
|
| 218 |
+
more-itertools==10.3.0
|
| 219 |
+
tomli==2.0.1
|
| 220 |
+
importlib_metadata==8.0.0
|
| 221 |
+
backports.tarfile==1.2.0
|
| 222 |
+
zipp==3.19.2
|
| 223 |
+
jaraco.collections==5.1.0
|
| 224 |
+
autocommand==2.2.2
|
| 225 |
+
jaraco.functools==4.0.1
|
ProtT3/all_checkpoints/stage2.5_mol_instruction/wandb/run-20250707_190145-vu5mgolt/run-vu5mgolt.wandb
ADDED
|
File without changes
|