arashakb commited on
Commit
984216a
·
verified ·
1 Parent(s): 0666eb1

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +10 -0
  2. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/checkpoints/latest-checkpoint.pt +3 -0
  3. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/checkpoints/step-055444-epoch-01-loss=0.6558.pt +3 -0
  4. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/config.json +59 -0
  5. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/config.yaml +52 -0
  6. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7.jsonl +3 -0
  7. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/run-metrics.jsonl +1 -0
  8. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/debug-internal.log +15 -0
  9. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/debug.log +23 -0
  10. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/files/config.yaml +100 -0
  11. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/files/output.log +2 -0
  12. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/files/requirements.txt +78 -0
  13. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/files/wandb-metadata.json +94 -0
  14. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/files/wandb-summary.json +1 -0
  15. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/logs/debug-core.log +10 -0
  16. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/logs/debug-internal.log +15 -0
  17. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/logs/debug.log +23 -0
  18. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/run-6hhs8vrz.wandb +3 -0
  19. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/files/config.yaml +100 -0
  20. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/files/output.log +2 -0
  21. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/files/requirements.txt +94 -0
  22. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/files/wandb-metadata.json +94 -0
  23. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/files/wandb-summary.json +1 -0
  24. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/logs/debug-core.log +10 -0
  25. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/logs/debug-internal.log +15 -0
  26. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/logs/debug.log +23 -0
  27. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/run-e4gyde5j.wandb +0 -0
  28. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/files/config.yaml +99 -0
  29. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/files/output.log +45 -0
  30. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/files/requirements.txt +94 -0
  31. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/files/wandb-metadata.json +94 -0
  32. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/files/wandb-summary.json +1 -0
  33. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/logs/debug-core.log +14 -0
  34. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/logs/debug-internal.log +15 -0
  35. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/logs/debug.log +23 -0
  36. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/run-cpx6iuc8.wandb +0 -0
  37. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/files/config.yaml +99 -0
  38. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/files/output.log +15 -0
  39. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/files/requirements.txt +94 -0
  40. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/files/wandb-metadata.json +94 -0
  41. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/files/wandb-summary.json +1 -0
  42. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/logs/debug-core.log +13 -0
  43. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/logs/debug-internal.log +15 -0
  44. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/logs/debug.log +23 -0
  45. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/run-722cxxmu.wandb +0 -0
  46. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100320-p7tc08z7/files/config.yaml +99 -0
  47. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100320-p7tc08z7/files/output.log +15 -0
  48. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100320-p7tc08z7/files/requirements.txt +94 -0
  49. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100320-p7tc08z7/files/wandb-metadata.json +94 -0
  50. prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100320-p7tc08z7/files/wandb-summary.json +1 -0
.gitattributes CHANGED
@@ -33,3 +33,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7.jsonl filter=lfs diff=lfs merge=lfs -text
37
+ prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/latest-run/run-6hhs8vrz.wandb filter=lfs diff=lfs merge=lfs -text
38
+ prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250422_100515-lvbcavkp/run-lvbcavkp.wandb filter=lfs diff=lfs merge=lfs -text
39
+ prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250422_111805-th1rdka5/run-th1rdka5.wandb filter=lfs diff=lfs merge=lfs -text
40
+ prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250502_114824-6hhs8vrz/run-6hhs8vrz.wandb filter=lfs diff=lfs merge=lfs -text
41
+ prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7.jsonl filter=lfs diff=lfs merge=lfs -text
42
+ prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/run-6hhs8vrz.wandb filter=lfs diff=lfs merge=lfs -text
43
+ prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100515-lvbcavkp/run-lvbcavkp.wandb filter=lfs diff=lfs merge=lfs -text
44
+ prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_111805-th1rdka5/run-th1rdka5.wandb filter=lfs diff=lfs merge=lfs -text
45
+ prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250502_114824-6hhs8vrz/run-6hhs8vrz.wandb filter=lfs diff=lfs merge=lfs -text
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/checkpoints/latest-checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:521937aca5962573d06f63e6fd4cfff59566869578a4eb8dade3e5c7cfcb19ae
3
+ size 7247200761
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/checkpoints/step-055444-epoch-01-loss=0.6558.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:521937aca5962573d06f63e6fd4cfff59566869578a4eb8dade3e5c7cfcb19ae
3
+ size 7247200761
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": {
3
+ "align_stage_components": [
4
+ "download/llava-laion-cc-sbu-558k/chat.json",
5
+ "download/llava-laion-cc-sbu-558k"
6
+ ],
7
+ "dataset_id": "llava-v15",
8
+ "dataset_root_dir": "data",
9
+ "finetune_stage_components": [
10
+ "download/llava-v1.5-instruct/llava_v1_5_mix665k.json",
11
+ "download/llava-v1.5-instruct"
12
+ ],
13
+ "type": "llava-v15"
14
+ },
15
+ "hf_token": "hf_token.txt",
16
+ "model": {
17
+ "align_epochs": 1,
18
+ "align_global_batch_size": 8,
19
+ "align_learning_rate": 0.001,
20
+ "align_lr_scheduler_type": "linear-warmup+cosine-decay",
21
+ "align_max_grad_norm": 1.0,
22
+ "align_max_steps": null,
23
+ "align_per_device_batch_size": 8,
24
+ "align_train_strategy": "fsdp-shard-grad-op",
25
+ "align_warmup_ratio": 0.03,
26
+ "align_weight_decay": 0.0,
27
+ "arch_specifier": "no-align+fused-gelu-mlp",
28
+ "enable_gradient_checkpointing": true,
29
+ "enable_mixed_precision_training": true,
30
+ "finetune_epochs": 2,
31
+ "finetune_global_batch_size": 24,
32
+ "finetune_learning_rate": 2e-05,
33
+ "finetune_lr_scheduler_type": "linear-warmup+cosine-decay",
34
+ "finetune_max_grad_norm": 1.0,
35
+ "finetune_max_steps": null,
36
+ "finetune_per_device_batch_size": 6,
37
+ "finetune_train_strategy": "fsdp-full-shard",
38
+ "finetune_warmup_ratio": 0.03,
39
+ "finetune_weight_decay": 0.1,
40
+ "image_resize_strategy": "resize-naive",
41
+ "llm_backbone_id": "qwen25-1_5b-extra",
42
+ "llm_max_length": 32768,
43
+ "model_id": "prism-qwen25-extra-dinosiglip-224px+1_5b",
44
+ "reduce_in_full_precision": false,
45
+ "type": "prism-qwen25-extra-dinosiglip-224px+1_5b",
46
+ "vision_backbone_id": "dinosiglip-vit-so-224px"
47
+ },
48
+ "pretrained_checkpoint": null,
49
+ "run_id": "prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7",
50
+ "run_root_dir": "runs",
51
+ "seed": 7,
52
+ "stage": "finetune",
53
+ "trackers": [
54
+ "jsonl",
55
+ "wandb"
56
+ ],
57
+ "wandb_entity": "arash-akbari-stu-northeastern-university",
58
+ "wandb_project": "Moxin-VLM"
59
+ }
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/config.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset:
2
+ align_stage_components:
3
+ - download/llava-laion-cc-sbu-558k/chat.json
4
+ - download/llava-laion-cc-sbu-558k
5
+ dataset_id: llava-v15
6
+ dataset_root_dir: data
7
+ finetune_stage_components:
8
+ - download/llava-v1.5-instruct/llava_v1_5_mix665k.json
9
+ - download/llava-v1.5-instruct
10
+ type: llava-v15
11
+ hf_token: hf_token.txt
12
+ model:
13
+ align_epochs: 1
14
+ align_global_batch_size: 8
15
+ align_learning_rate: 0.001
16
+ align_lr_scheduler_type: linear-warmup+cosine-decay
17
+ align_max_grad_norm: 1.0
18
+ align_max_steps: null
19
+ align_per_device_batch_size: 8
20
+ align_train_strategy: fsdp-shard-grad-op
21
+ align_warmup_ratio: 0.03
22
+ align_weight_decay: 0.0
23
+ arch_specifier: no-align+fused-gelu-mlp
24
+ enable_gradient_checkpointing: true
25
+ enable_mixed_precision_training: true
26
+ finetune_epochs: 2
27
+ finetune_global_batch_size: 24
28
+ finetune_learning_rate: 2.0e-05
29
+ finetune_lr_scheduler_type: linear-warmup+cosine-decay
30
+ finetune_max_grad_norm: 1.0
31
+ finetune_max_steps: null
32
+ finetune_per_device_batch_size: 6
33
+ finetune_train_strategy: fsdp-full-shard
34
+ finetune_warmup_ratio: 0.03
35
+ finetune_weight_decay: 0.1
36
+ image_resize_strategy: resize-naive
37
+ llm_backbone_id: qwen25-1_5b-extra
38
+ llm_max_length: 32768
39
+ model_id: prism-qwen25-extra-dinosiglip-224px+1_5b
40
+ reduce_in_full_precision: false
41
+ type: prism-qwen25-extra-dinosiglip-224px+1_5b
42
+ vision_backbone_id: dinosiglip-vit-so-224px
43
+ pretrained_checkpoint: null
44
+ run_id: prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7
45
+ run_root_dir: runs
46
+ seed: 7
47
+ stage: finetune
48
+ trackers:
49
+ - jsonl
50
+ - wandb
51
+ wandb_entity: arash-akbari-stu-northeastern-university
52
+ wandb_project: Moxin-VLM
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0960f58bd0a805c19ffbfe2ec514c34218a1bdf2636b4444f0aa326ec2e87333
3
+ size 11339854
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/run-metrics.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"hparams": {"dataset": {"align_stage_components": ["download/llava-laion-cc-sbu-558k/chat.json", "download/llava-laion-cc-sbu-558k"], "dataset_id": "llava-v15", "dataset_root_dir": "data", "finetune_stage_components": ["download/llava-v1.5-instruct/llava_v1_5_mix665k.json", "download/llava-v1.5-instruct"], "type": "llava-v15"}, "hf_token": "hf_token.txt", "model": {"align_epochs": 1, "align_global_batch_size": 8, "align_learning_rate": 0.001, "align_lr_scheduler_type": "linear-warmup+cosine-decay", "align_max_grad_norm": 1.0, "align_max_steps": null, "align_per_device_batch_size": 8, "align_train_strategy": "fsdp-shard-grad-op", "align_warmup_ratio": 0.03, "align_weight_decay": 0.0, "arch_specifier": "no-align+fused-gelu-mlp", "enable_gradient_checkpointing": true, "enable_mixed_precision_training": true, "finetune_epochs": 2, "finetune_global_batch_size": 24, "finetune_learning_rate": 2e-05, "finetune_lr_scheduler_type": "linear-warmup+cosine-decay", "finetune_max_grad_norm": 1.0, "finetune_max_steps": null, "finetune_per_device_batch_size": 6, "finetune_train_strategy": "fsdp-full-shard", "finetune_warmup_ratio": 0.03, "finetune_weight_decay": 0.1, "image_resize_strategy": "resize-naive", "llm_backbone_id": "qwen25-1_5b-extra", "llm_max_length": 32768, "model_id": "prism-qwen25-extra-dinosiglip-224px+1_5b", "reduce_in_full_precision": false, "type": "prism-qwen25-extra-dinosiglip-224px+1_5b", "vision_backbone_id": "dinosiglip-vit-so-224px"}, "pretrained_checkpoint": null, "run_id": "prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7", "run_root_dir": "runs", "seed": 7, "stage": "finetune", "trackers": ["jsonl", "wandb"], "wandb_entity": "arash-akbari-stu-northeastern-university", "wandb_project": "Moxin-VLM"}, "run_id": "prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7"}
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/debug-internal.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-05-02T11:48:24.512248306-07:00","level":"INFO","msg":"stream: starting","core version":"0.19.9","symlink path":"runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250502_114824-6hhs8vrz/logs/debug-core.log"}
2
+ {"time":"2025-05-02T11:48:25.628691797-07:00","level":"INFO","msg":"created new stream","id":"6hhs8vrz"}
3
+ {"time":"2025-05-02T11:48:25.62873432-07:00","level":"INFO","msg":"stream: started","id":"6hhs8vrz"}
4
+ {"time":"2025-05-02T11:48:25.62879471-07:00","level":"INFO","msg":"handler: started","stream_id":"6hhs8vrz"}
5
+ {"time":"2025-05-02T11:48:25.628819016-07:00","level":"INFO","msg":"writer: Do: started","stream_id":"6hhs8vrz"}
6
+ {"time":"2025-05-02T11:48:25.628835531-07:00","level":"INFO","msg":"sender: started","stream_id":"6hhs8vrz"}
7
+ {"time":"2025-05-02T11:48:25.959126298-07:00","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-05-02T11:50:45.633111697-07:00","level":"INFO","msg":"stream: closing","id":"6hhs8vrz"}
9
+ {"time":"2025-05-02T11:50:45.633155452-07:00","level":"INFO","msg":"Stopping system monitor"}
10
+ {"time":"2025-05-02T11:50:45.633187911-07:00","level":"INFO","msg":"Stopped system monitor"}
11
+ {"time":"2025-05-02T11:50:46.081032653-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
12
+ {"time":"2025-05-02T11:50:46.287259059-07:00","level":"INFO","msg":"handler: closed","stream_id":"6hhs8vrz"}
13
+ {"time":"2025-05-02T11:50:46.287318307-07:00","level":"INFO","msg":"writer: Close: closed","stream_id":"6hhs8vrz"}
14
+ {"time":"2025-05-02T11:50:46.287337876-07:00","level":"INFO","msg":"sender: closed","stream_id":"6hhs8vrz"}
15
+ {"time":"2025-05-02T11:50:46.287531815-07:00","level":"INFO","msg":"stream: closed","id":"6hhs8vrz"}
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_setup.py:_flush():67] Current SDK version is 0.19.9
2
+ 2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_setup.py:_flush():67] Configure stats pid to 1932762
3
+ 2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_setup.py:_flush():67] Loading settings from /home/user1/.config/wandb/settings
4
+ 2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_setup.py:_flush():67] Loading settings from /home/user1/arashwork/prismatic-vlms/wandb/settings
5
+ 2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_setup.py:_flush():67] Loading settings from environment variables
6
+ 2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:setup_run_log_directory():662] Logging user logs to runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250502_114824-6hhs8vrz/logs/debug.log
7
+ 2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:setup_run_log_directory():663] Logging internal logs to runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250502_114824-6hhs8vrz/logs/debug-internal.log
8
+ 2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:init():781] calling init triggers
9
+ 2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:init():786] wandb.init called with sweep_config: {}
10
+ config: {'model': {'type': 'prism-qwen25-extra-dinosiglip-224px+1_5b', 'model_id': 'prism-qwen25-extra-dinosiglip-224px+1_5b', 'arch_specifier': 'no-align+fused-gelu-mlp', 'vision_backbone_id': 'dinosiglip-vit-so-224px', 'llm_backbone_id': 'qwen25-1_5b-extra', 'image_resize_strategy': 'resize-naive', 'llm_max_length': 32768, 'align_epochs': 1, 'align_max_steps': None, 'align_global_batch_size': 8, 'align_per_device_batch_size': 8, 'align_learning_rate': 0.001, 'align_weight_decay': 0.0, 'align_max_grad_norm': 1.0, 'align_lr_scheduler_type': 'linear-warmup+cosine-decay', 'align_warmup_ratio': 0.03, 'align_train_strategy': 'fsdp-shard-grad-op', 'finetune_epochs': 2, 'finetune_max_steps': None, 'finetune_global_batch_size': 24, 'finetune_per_device_batch_size': 6, 'finetune_learning_rate': 2e-05, 'finetune_weight_decay': 0.1, 'finetune_max_grad_norm': 1.0, 'finetune_lr_scheduler_type': 'linear-warmup+cosine-decay', 'finetune_warmup_ratio': 0.03, 'finetune_train_strategy': 'fsdp-full-shard', 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': False}, 'dataset': {'type': 'llava-v15', 'dataset_id': 'llava-v15', 'align_stage_components': ['download/llava-laion-cc-sbu-558k/chat.json', 'download/llava-laion-cc-sbu-558k'], 'finetune_stage_components': ['download/llava-v1.5-instruct/llava_v1_5_mix665k.json', 'download/llava-v1.5-instruct'], 'dataset_root_dir': 'data'}, 'stage': 'finetune', 'pretrained_checkpoint': None, 'run_id': 'prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7', 'run_root_dir': 'runs', 'seed': 7, 'hf_token': 'hf_token.txt', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'Moxin-VLM', 'wandb_entity': 'arash-akbari-stu-northeastern-university', '_wandb': {}}
11
+ 2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:init():809] starting backend
12
+ 2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:init():813] sending inform_init request
13
+ 2025-05-02 11:48:24,510 INFO MainThread:1932762 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-05-02 11:48:24,510 INFO MainThread:1932762 [wandb_init.py:init():823] backend started and connected
15
+ 2025-05-02 11:48:24,512 INFO MainThread:1932762 [wandb_init.py:init():915] updated telemetry
16
+ 2025-05-02 11:48:24,517 INFO MainThread:1932762 [wandb_init.py:init():939] communicating run to backend with 90.0 second timeout
17
+ 2025-05-02 11:48:25,957 INFO MainThread:1932762 [wandb_init.py:init():1014] starting run threads in backend
18
+ 2025-05-02 11:48:26,000 INFO MainThread:1932762 [wandb_run.py:_console_start():2454] atexit reg
19
+ 2025-05-02 11:48:26,000 INFO MainThread:1932762 [wandb_run.py:_redirect():2306] redirect: wrap_raw
20
+ 2025-05-02 11:48:26,000 INFO MainThread:1932762 [wandb_run.py:_redirect():2371] Wrapping output streams.
21
+ 2025-05-02 11:48:26,000 INFO MainThread:1932762 [wandb_run.py:_redirect():2394] Redirects installed.
22
+ 2025-05-02 11:48:26,002 INFO MainThread:1932762 [wandb_init.py:init():1056] run started, returning control to user process
23
+ 2025-05-02 11:50:45,632 INFO MsgRouterThr:1932762 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/files/config.yaml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.19.9
4
+ m: []
5
+ python_version: 3.10.16
6
+ t:
7
+ "1":
8
+ - 1
9
+ - 11
10
+ - 41
11
+ - 49
12
+ - 55
13
+ - 63
14
+ - 71
15
+ - 98
16
+ "2":
17
+ - 1
18
+ - 11
19
+ - 41
20
+ - 49
21
+ - 55
22
+ - 63
23
+ - 71
24
+ - 98
25
+ "3":
26
+ - 13
27
+ - 16
28
+ - 23
29
+ - 55
30
+ - 61
31
+ "4": 3.10.16
32
+ "5": 0.19.9
33
+ "6": 4.38.1
34
+ "8":
35
+ - 5
36
+ "12": 0.19.9
37
+ "13": linux-x86_64
38
+ dataset:
39
+ value:
40
+ align_stage_components:
41
+ - download/llava-laion-cc-sbu-558k/chat.json
42
+ - download/llava-laion-cc-sbu-558k
43
+ dataset_id: llava-v15
44
+ dataset_root_dir: data
45
+ finetune_stage_components:
46
+ - download/llava-v1.5-instruct/llava_v1_5_mix665k.json
47
+ - download/llava-v1.5-instruct
48
+ type: llava-v15
49
+ hf_token:
50
+ value: hf_token.txt
51
+ model:
52
+ value:
53
+ align_epochs: 1
54
+ align_global_batch_size: 8
55
+ align_learning_rate: 0.001
56
+ align_lr_scheduler_type: linear-warmup+cosine-decay
57
+ align_max_grad_norm: 1
58
+ align_max_steps: null
59
+ align_per_device_batch_size: 8
60
+ align_train_strategy: fsdp-shard-grad-op
61
+ align_warmup_ratio: 0.03
62
+ align_weight_decay: 0
63
+ arch_specifier: no-align+fused-gelu-mlp
64
+ enable_gradient_checkpointing: true
65
+ enable_mixed_precision_training: true
66
+ finetune_epochs: 2
67
+ finetune_global_batch_size: 24
68
+ finetune_learning_rate: 2e-05
69
+ finetune_lr_scheduler_type: linear-warmup+cosine-decay
70
+ finetune_max_grad_norm: 1
71
+ finetune_max_steps: null
72
+ finetune_per_device_batch_size: 6
73
+ finetune_train_strategy: fsdp-full-shard
74
+ finetune_warmup_ratio: 0.03
75
+ finetune_weight_decay: 0.1
76
+ image_resize_strategy: resize-naive
77
+ llm_backbone_id: qwen25-1_5b-extra
78
+ llm_max_length: 32768
79
+ model_id: prism-qwen25-extra-dinosiglip-224px+1_5b
80
+ reduce_in_full_precision: false
81
+ type: prism-qwen25-extra-dinosiglip-224px+1_5b
82
+ vision_backbone_id: dinosiglip-vit-so-224px
83
+ pretrained_checkpoint:
84
+ value: null
85
+ run_id:
86
+ value: prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7
87
+ run_root_dir:
88
+ value: runs
89
+ seed:
90
+ value: 7
91
+ stage:
92
+ value: finetune
93
+ trackers:
94
+ value:
95
+ - jsonl
96
+ - wandb
97
+ wandb_entity:
98
+ value: arash-akbari-stu-northeastern-university
99
+ wandb_project:
100
+ value: Moxin-VLM
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/files/output.log ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ 05/02 [11:48:26] INFO | >> [*] Starting Training Loop pretrain.py:238
2
+ =>> [Global Step] 000112 =>> LR :: 0.000001 -- Loss :: 2.4465: 0%| | 112/55442 [02:15<16:32:24, 1.08s/it]
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/files/requirements.txt ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pyyaml-include==1.4.1
2
+ torchaudio==2.2.0
3
+ psutil==7.0.0
4
+ nvidia-cufft-cu12==11.0.2.54
5
+ nvidia-nvtx-cu12==12.1.105
6
+ wheel==0.45.1
7
+ nvidia-cuda-cupti-cu12==12.1.105
8
+ pillow==11.1.0
9
+ draccus==0.10.0
10
+ Jinja2==3.1.6
11
+ ninja==1.11.1.4
12
+ MarkupSafe==3.0.2
13
+ nvidia-cublas-cu12==12.1.3.1
14
+ tqdm==4.67.1
15
+ fsspec==2025.3.2
16
+ wandb==0.19.9
17
+ GitPython==3.1.44
18
+ click==8.1.8
19
+ timm==0.9.10
20
+ gitdb==4.0.12
21
+ nvidia-nvjitlink-cu12==12.8.93
22
+ nvidia-cudnn-cu12==8.9.2.26
23
+ mergedeep==1.3.4
24
+ annotated-types==0.7.0
25
+ protobuf==5.29.4
26
+ huggingface-hub==0.30.2
27
+ mdurl==0.1.2
28
+ urllib3==2.3.0
29
+ typing_extensions==4.13.1
30
+ numpy==1.26.4
31
+ torchvision==0.17.0
32
+ nvidia-cusparse-cu12==12.1.0.106
33
+ networkx==3.4.2
34
+ regex==2024.11.6
35
+ mypy-extensions==1.0.0
36
+ pip==25.0
37
+ sentencepiece==0.2.0
38
+ Pygments==2.19.1
39
+ smmap==5.0.2
40
+ sympy==1.13.3
41
+ setuptools==75.8.0
42
+ nvidia-nccl-cu12==2.19.3
43
+ charset-normalizer==3.4.1
44
+ typing-inspection==0.4.0
45
+ platformdirs==4.3.7
46
+ packaging==24.2
47
+ setproctitle==1.3.5
48
+ idna==3.10
49
+ markdown-it-py==3.0.0
50
+ safetensors==0.5.3
51
+ rich==14.0.0
52
+ requests==2.32.3
53
+ sentry-sdk==2.25.1
54
+ jsonlines==4.0.0
55
+ transformers==4.38.1
56
+ PyYAML==6.0.2
57
+ pydantic_core==2.33.1
58
+ flash-attn==2.5.5
59
+ mpmath==1.3.0
60
+ attrs==25.3.0
61
+ einops==0.8.1
62
+ nvidia-cuda-runtime-cu12==12.1.105
63
+ nvidia-curand-cu12==10.3.2.106
64
+ filelock==3.18.0
65
+ prismatic==0.0.2
66
+ certifi==2025.1.31
67
+ peft==0.5.0
68
+ typing-inspect==0.9.0
69
+ nvidia-cuda-nvrtc-cu12==12.1.105
70
+ accelerate==0.25.0
71
+ pydantic==2.11.3
72
+ six==1.17.0
73
+ nvidia-cusolver-cu12==11.4.5.107
74
+ torch==2.2.0
75
+ docker-pycreds==0.4.0
76
+ tokenizers==0.15.2
77
+ toml==0.10.2
78
+ triton==2.2.0
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/files/wandb-metadata.json ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-52-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.16",
4
+ "startedAt": "2025-05-02T18:48:24.510790Z",
5
+ "args": [
6
+ "--model.type",
7
+ "prism-qwen25-extra-dinosiglip-224px+1_5b",
8
+ "--wandb_project",
9
+ "Moxin-VLM",
10
+ "--wandb_entity",
11
+ "arash-akbari-stu-northeastern-university",
12
+ "--model.enable_mixed_precision_training",
13
+ "True"
14
+ ],
15
+ "program": "/home/user1/arashwork/prismatic-vlms/scripts/pretrain.py",
16
+ "codePath": "scripts/pretrain.py",
17
+ "git": {
18
+ "remote": "git@github.com:arashakb/prismatic-vlms.git",
19
+ "commit": "f94a585b2bb45d34a6947f2c80f2378f2af6ca66"
20
+ },
21
+ "email": "arash.akbari.stu@gmail.com",
22
+ "root": "runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7",
23
+ "host": "nnmc72",
24
+ "executable": "/home/user1/anaconda3/envs/prism/bin/python",
25
+ "codePathLocal": "scripts/pretrain.py",
26
+ "cpu_count": 48,
27
+ "cpu_count_logical": 96,
28
+ "gpu": "NVIDIA H100 NVL",
29
+ "gpu_count": 8,
30
+ "disk": {
31
+ "/": {
32
+ "total": "30476149334016",
33
+ "used": "19451255332864"
34
+ }
35
+ },
36
+ "memory": {
37
+ "total": "811294752768"
38
+ },
39
+ "cpu": {
40
+ "count": 48,
41
+ "countLogical": 96
42
+ },
43
+ "gpu_nvidia": [
44
+ {
45
+ "name": "NVIDIA H100 NVL",
46
+ "memoryTotal": "100485038080",
47
+ "cudaCores": 16896,
48
+ "architecture": "Hopper"
49
+ },
50
+ {
51
+ "name": "NVIDIA H100 NVL",
52
+ "memoryTotal": "100485038080",
53
+ "cudaCores": 16896,
54
+ "architecture": "Hopper"
55
+ },
56
+ {
57
+ "name": "NVIDIA H100 NVL",
58
+ "memoryTotal": "100485038080",
59
+ "cudaCores": 16896,
60
+ "architecture": "Hopper"
61
+ },
62
+ {
63
+ "name": "NVIDIA H100 NVL",
64
+ "memoryTotal": "100485038080",
65
+ "cudaCores": 16896,
66
+ "architecture": "Hopper"
67
+ },
68
+ {
69
+ "name": "NVIDIA H100 NVL",
70
+ "memoryTotal": "100485038080",
71
+ "cudaCores": 16896,
72
+ "architecture": "Hopper"
73
+ },
74
+ {
75
+ "name": "NVIDIA H100 NVL",
76
+ "memoryTotal": "100485038080",
77
+ "cudaCores": 16896,
78
+ "architecture": "Hopper"
79
+ },
80
+ {
81
+ "name": "NVIDIA H100 NVL",
82
+ "memoryTotal": "100485038080",
83
+ "cudaCores": 16896,
84
+ "architecture": "Hopper"
85
+ },
86
+ {
87
+ "name": "NVIDIA H100 NVL",
88
+ "memoryTotal": "100485038080",
89
+ "cudaCores": 16896,
90
+ "architecture": "Hopper"
91
+ }
92
+ ],
93
+ "cudaVersion": "12.4"
94
+ }
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"Finetune/Learning Rate":1.3469633193024655e-06,"Finetune/Step":112,"Finetune/Loss (Raw)":2.335305690765381,"Finetune/Step Time":1.2350007040160043,"_step":112,"Finetune/Loss":2.4464573860168457,"_runtime":141.122340856,"_wandb":{"runtime":141},"_timestamp":1.7462118443747003e+09}
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/logs/debug-core.log ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-05-02T11:48:23.936394432-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpsxhf49n8/port-1932762.txt","pid":1932762,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
2
+ {"time":"2025-05-02T11:48:23.938033658-07:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":1932762}
3
+ {"time":"2025-05-02T11:48:23.938396597-07:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":36947,"Zone":""}}
4
+ {"time":"2025-05-02T11:48:24.08241367-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:39034"}
5
+ {"time":"2025-05-02T11:48:24.511951866-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"6hhs8vrz","id":"127.0.0.1:39034"}
6
+ {"time":"2025-05-02T11:48:25.628737745-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"6hhs8vrz","id":"127.0.0.1:39034"}
7
+ {"time":"2025-05-02T11:50:45.632975375-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:39034"}
8
+ {"time":"2025-05-02T11:50:45.633108663-07:00","level":"INFO","msg":"connection: closing","id":"127.0.0.1:39034"}
9
+ {"time":"2025-05-02T11:50:45.63317393-07:00","level":"INFO","msg":"server is shutting down"}
10
+ {"time":"2025-05-02T11:50:45.633311554-07:00","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:39034"}
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/logs/debug-internal.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-05-02T11:48:24.512248306-07:00","level":"INFO","msg":"stream: starting","core version":"0.19.9","symlink path":"runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250502_114824-6hhs8vrz/logs/debug-core.log"}
2
+ {"time":"2025-05-02T11:48:25.628691797-07:00","level":"INFO","msg":"created new stream","id":"6hhs8vrz"}
3
+ {"time":"2025-05-02T11:48:25.62873432-07:00","level":"INFO","msg":"stream: started","id":"6hhs8vrz"}
4
+ {"time":"2025-05-02T11:48:25.62879471-07:00","level":"INFO","msg":"handler: started","stream_id":"6hhs8vrz"}
5
+ {"time":"2025-05-02T11:48:25.628819016-07:00","level":"INFO","msg":"writer: Do: started","stream_id":"6hhs8vrz"}
6
+ {"time":"2025-05-02T11:48:25.628835531-07:00","level":"INFO","msg":"sender: started","stream_id":"6hhs8vrz"}
7
+ {"time":"2025-05-02T11:48:25.959126298-07:00","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-05-02T11:50:45.633111697-07:00","level":"INFO","msg":"stream: closing","id":"6hhs8vrz"}
9
+ {"time":"2025-05-02T11:50:45.633155452-07:00","level":"INFO","msg":"Stopping system monitor"}
10
+ {"time":"2025-05-02T11:50:45.633187911-07:00","level":"INFO","msg":"Stopped system monitor"}
11
+ {"time":"2025-05-02T11:50:46.081032653-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
12
+ {"time":"2025-05-02T11:50:46.287259059-07:00","level":"INFO","msg":"handler: closed","stream_id":"6hhs8vrz"}
13
+ {"time":"2025-05-02T11:50:46.287318307-07:00","level":"INFO","msg":"writer: Close: closed","stream_id":"6hhs8vrz"}
14
+ {"time":"2025-05-02T11:50:46.287337876-07:00","level":"INFO","msg":"sender: closed","stream_id":"6hhs8vrz"}
15
+ {"time":"2025-05-02T11:50:46.287531815-07:00","level":"INFO","msg":"stream: closed","id":"6hhs8vrz"}
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/logs/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_setup.py:_flush():67] Current SDK version is 0.19.9
2
+ 2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_setup.py:_flush():67] Configure stats pid to 1932762
3
+ 2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_setup.py:_flush():67] Loading settings from /home/user1/.config/wandb/settings
4
+ 2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_setup.py:_flush():67] Loading settings from /home/user1/arashwork/prismatic-vlms/wandb/settings
5
+ 2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_setup.py:_flush():67] Loading settings from environment variables
6
+ 2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:setup_run_log_directory():662] Logging user logs to runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250502_114824-6hhs8vrz/logs/debug.log
7
+ 2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:setup_run_log_directory():663] Logging internal logs to runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250502_114824-6hhs8vrz/logs/debug-internal.log
8
+ 2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:init():781] calling init triggers
9
+ 2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:init():786] wandb.init called with sweep_config: {}
10
+ config: {'model': {'type': 'prism-qwen25-extra-dinosiglip-224px+1_5b', 'model_id': 'prism-qwen25-extra-dinosiglip-224px+1_5b', 'arch_specifier': 'no-align+fused-gelu-mlp', 'vision_backbone_id': 'dinosiglip-vit-so-224px', 'llm_backbone_id': 'qwen25-1_5b-extra', 'image_resize_strategy': 'resize-naive', 'llm_max_length': 32768, 'align_epochs': 1, 'align_max_steps': None, 'align_global_batch_size': 8, 'align_per_device_batch_size': 8, 'align_learning_rate': 0.001, 'align_weight_decay': 0.0, 'align_max_grad_norm': 1.0, 'align_lr_scheduler_type': 'linear-warmup+cosine-decay', 'align_warmup_ratio': 0.03, 'align_train_strategy': 'fsdp-shard-grad-op', 'finetune_epochs': 2, 'finetune_max_steps': None, 'finetune_global_batch_size': 24, 'finetune_per_device_batch_size': 6, 'finetune_learning_rate': 2e-05, 'finetune_weight_decay': 0.1, 'finetune_max_grad_norm': 1.0, 'finetune_lr_scheduler_type': 'linear-warmup+cosine-decay', 'finetune_warmup_ratio': 0.03, 'finetune_train_strategy': 'fsdp-full-shard', 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': False}, 'dataset': {'type': 'llava-v15', 'dataset_id': 'llava-v15', 'align_stage_components': ['download/llava-laion-cc-sbu-558k/chat.json', 'download/llava-laion-cc-sbu-558k'], 'finetune_stage_components': ['download/llava-v1.5-instruct/llava_v1_5_mix665k.json', 'download/llava-v1.5-instruct'], 'dataset_root_dir': 'data'}, 'stage': 'finetune', 'pretrained_checkpoint': None, 'run_id': 'prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7', 'run_root_dir': 'runs', 'seed': 7, 'hf_token': 'hf_token.txt', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'Moxin-VLM', 'wandb_entity': 'arash-akbari-stu-northeastern-university', '_wandb': {}}
11
+ 2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:init():809] starting backend
12
+ 2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:init():813] sending inform_init request
13
+ 2025-05-02 11:48:24,510 INFO MainThread:1932762 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-05-02 11:48:24,510 INFO MainThread:1932762 [wandb_init.py:init():823] backend started and connected
15
+ 2025-05-02 11:48:24,512 INFO MainThread:1932762 [wandb_init.py:init():915] updated telemetry
16
+ 2025-05-02 11:48:24,517 INFO MainThread:1932762 [wandb_init.py:init():939] communicating run to backend with 90.0 second timeout
17
+ 2025-05-02 11:48:25,957 INFO MainThread:1932762 [wandb_init.py:init():1014] starting run threads in backend
18
+ 2025-05-02 11:48:26,000 INFO MainThread:1932762 [wandb_run.py:_console_start():2454] atexit reg
19
+ 2025-05-02 11:48:26,000 INFO MainThread:1932762 [wandb_run.py:_redirect():2306] redirect: wrap_raw
20
+ 2025-05-02 11:48:26,000 INFO MainThread:1932762 [wandb_run.py:_redirect():2371] Wrapping output streams.
21
+ 2025-05-02 11:48:26,000 INFO MainThread:1932762 [wandb_run.py:_redirect():2394] Redirects installed.
22
+ 2025-05-02 11:48:26,002 INFO MainThread:1932762 [wandb_init.py:init():1056] run started, returning control to user process
23
+ 2025-05-02 11:50:45,632 INFO MsgRouterThr:1932762 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/run-6hhs8vrz.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:514635c836cd32f2106e2050edf14ff70f51a51daf2b1e5aa29387effe9be97f
3
+ size 143258
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/files/config.yaml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.19.9
4
+ m: []
5
+ python_version: 3.10.16
6
+ t:
7
+ "1":
8
+ - 1
9
+ - 11
10
+ - 41
11
+ - 49
12
+ - 55
13
+ - 63
14
+ - 71
15
+ - 98
16
+ "2":
17
+ - 1
18
+ - 11
19
+ - 41
20
+ - 49
21
+ - 55
22
+ - 63
23
+ - 71
24
+ - 98
25
+ "3":
26
+ - 13
27
+ - 16
28
+ - 23
29
+ - 55
30
+ - 61
31
+ "4": 3.10.16
32
+ "5": 0.19.9
33
+ "6": 4.51.1
34
+ "8":
35
+ - 5
36
+ "12": 0.19.9
37
+ "13": linux-x86_64
38
+ dataset:
39
+ value:
40
+ align_stage_components:
41
+ - download/llava-laion-cc-sbu-558k/chat.json
42
+ - download/llava-laion-cc-sbu-558k
43
+ dataset_id: llava-v15
44
+ dataset_root_dir: data
45
+ finetune_stage_components:
46
+ - download/llava-v1.5-instruct/llava_v1_5_mix665k.json
47
+ - download/llava-v1.5-instruct
48
+ type: llava-v15
49
+ hf_token:
50
+ value: hf_token.txt
51
+ model:
52
+ value:
53
+ align_epochs: 1
54
+ align_global_batch_size: 8
55
+ align_learning_rate: 0.001
56
+ align_lr_scheduler_type: linear-warmup+cosine-decay
57
+ align_max_grad_norm: 1
58
+ align_max_steps: null
59
+ align_per_device_batch_size: 8
60
+ align_train_strategy: fsdp-shard-grad-op
61
+ align_warmup_ratio: 0.03
62
+ align_weight_decay: 0
63
+ arch_specifier: no-align+fused-gelu-mlp
64
+ enable_gradient_checkpointing: true
65
+ enable_mixed_precision_training: true
66
+ finetune_epochs: 2
67
+ finetune_global_batch_size: 2
68
+ finetune_learning_rate: 2e-05
69
+ finetune_lr_scheduler_type: linear-warmup+cosine-decay
70
+ finetune_max_grad_norm: 1
71
+ finetune_max_steps: null
72
+ finetune_per_device_batch_size: 2
73
+ finetune_train_strategy: fsdp-full-shard
74
+ finetune_warmup_ratio: 0.03
75
+ finetune_weight_decay: 0.1
76
+ image_resize_strategy: resize-naive
77
+ llm_backbone_id: qwen25-1_5b-extra
78
+ llm_max_length: 32768
79
+ model_id: prism-qwen25-extra-dinosiglip-224px+1_5b
80
+ reduce_in_full_precision: false
81
+ type: prism-qwen25-extra-dinosiglip-224px+1_5b
82
+ vision_backbone_id: dinosiglip-vit-so-224px
83
+ pretrained_checkpoint:
84
+ value: null
85
+ run_id:
86
+ value: prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7
87
+ run_root_dir:
88
+ value: runs
89
+ seed:
90
+ value: 7
91
+ stage:
92
+ value: finetune
93
+ trackers:
94
+ value:
95
+ - jsonl
96
+ - wandb
97
+ wandb_entity:
98
+ value: arash-akbari-stu-northeastern-university
99
+ wandb_project:
100
+ value: Qwen25-Extra-DINOSigLIP-224px-1_5B-full-finetune
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/files/output.log ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ 04/22 [09:52:04] INFO | >> [*] Starting Training Loop pretrain.py:238
2
+ =>> [Global Step] 000015 =>> LR :: 0.000000 -- Loss :: 2.7283: 0%| | 15/665298 [00:11<46:11:45, 4.00it/s]
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/files/requirements.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pyyaml-include==1.4.1
2
+ torchaudio==2.2.0
3
+ psutil==7.0.0
4
+ nvidia-cufft-cu12==11.0.2.54
5
+ nvidia-nvtx-cu12==12.1.105
6
+ wheel==0.45.1
7
+ nvidia-cuda-cupti-cu12==12.1.105
8
+ pillow==11.1.0
9
+ draccus==0.10.0
10
+ Jinja2==3.1.6
11
+ ninja==1.11.1.4
12
+ MarkupSafe==3.0.2
13
+ nvidia-cublas-cu12==12.1.3.1
14
+ tqdm==4.67.1
15
+ fsspec==2025.3.2
16
+ wandb==0.19.9
17
+ GitPython==3.1.44
18
+ click==8.1.8
19
+ timm==0.9.10
20
+ gitdb==4.0.12
21
+ nvidia-nvjitlink-cu12==12.8.93
22
+ nvidia-cudnn-cu12==8.9.2.26
23
+ mergedeep==1.3.4
24
+ annotated-types==0.7.0
25
+ protobuf==5.29.4
26
+ huggingface-hub==0.30.2
27
+ mdurl==0.1.2
28
+ urllib3==2.3.0
29
+ typing_extensions==4.13.1
30
+ numpy==1.26.4
31
+ torchvision==0.17.0
32
+ nvidia-cusparse-cu12==12.1.0.106
33
+ networkx==3.4.2
34
+ regex==2024.11.6
35
+ mypy-extensions==1.0.0
36
+ pip==25.0
37
+ peft==0.15.1
38
+ sentencepiece==0.2.0
39
+ Pygments==2.19.1
40
+ smmap==5.0.2
41
+ sympy==1.13.3
42
+ setuptools==75.8.0
43
+ nvidia-nccl-cu12==2.19.3
44
+ tokenizers==0.21.1
45
+ charset-normalizer==3.4.1
46
+ typing-inspection==0.4.0
47
+ platformdirs==4.3.7
48
+ packaging==24.2
49
+ setproctitle==1.3.5
50
+ idna==3.10
51
+ markdown-it-py==3.0.0
52
+ safetensors==0.5.3
53
+ rich==14.0.0
54
+ transformers==4.51.1
55
+ requests==2.32.3
56
+ sentry-sdk==2.25.1
57
+ jsonlines==4.0.0
58
+ PyYAML==6.0.2
59
+ pydantic_core==2.33.1
60
+ flash-attn==2.5.5
61
+ mpmath==1.3.0
62
+ attrs==25.3.0
63
+ einops==0.8.1
64
+ nvidia-cuda-runtime-cu12==12.1.105
65
+ nvidia-curand-cu12==10.3.2.106
66
+ filelock==3.18.0
67
+ prismatic==0.0.2
68
+ certifi==2025.1.31
69
+ accelerate==1.6.0
70
+ typing-inspect==0.9.0
71
+ nvidia-cuda-nvrtc-cu12==12.1.105
72
+ pydantic==2.11.3
73
+ six==1.17.0
74
+ nvidia-cusolver-cu12==11.4.5.107
75
+ torch==2.2.0
76
+ docker-pycreds==0.4.0
77
+ toml==0.10.2
78
+ triton==2.2.0
79
+ importlib_metadata==8.0.0
80
+ tomli==2.0.1
81
+ zipp==3.19.2
82
+ jaraco.context==5.3.0
83
+ inflect==7.3.1
84
+ autocommand==2.2.2
85
+ typing_extensions==4.12.2
86
+ jaraco.collections==5.1.0
87
+ jaraco.functools==4.0.1
88
+ packaging==24.2
89
+ wheel==0.43.0
90
+ backports.tarfile==1.2.0
91
+ platformdirs==4.2.2
92
+ more-itertools==10.3.0
93
+ jaraco.text==3.12.1
94
+ typeguard==4.3.0
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/files/wandb-metadata.json ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-52-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.16",
4
+ "startedAt": "2025-04-22T16:52:03.056575Z",
5
+ "args": [
6
+ "--model.type",
7
+ "prism-qwen25-extra-dinosiglip-224px+1_5b",
8
+ "--wandb_project",
9
+ "Qwen25-Extra-DINOSigLIP-224px-1_5B-full-finetune",
10
+ "--wandb_entity",
11
+ "arash-akbari-stu-northeastern-university",
12
+ "--model.enable_mixed_precision_training",
13
+ "True"
14
+ ],
15
+ "program": "/home/user1/arashwork/prismatic-vlms/scripts/pretrain.py",
16
+ "codePath": "scripts/pretrain.py",
17
+ "git": {
18
+ "remote": "git@github.com:arashakb/prismatic-vlms.git",
19
+ "commit": "8082f651fe3ba730b13899691a222fdef82d2c66"
20
+ },
21
+ "email": "arash.akbari.stu@gmail.com",
22
+ "root": "runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7",
23
+ "host": "nnmc72",
24
+ "executable": "/home/user1/anaconda3/envs/prism/bin/python",
25
+ "codePathLocal": "scripts/pretrain.py",
26
+ "cpu_count": 48,
27
+ "cpu_count_logical": 96,
28
+ "gpu": "NVIDIA H100 NVL",
29
+ "gpu_count": 8,
30
+ "disk": {
31
+ "/": {
32
+ "total": "30476149334016",
33
+ "used": "18270204149760"
34
+ }
35
+ },
36
+ "memory": {
37
+ "total": "811294752768"
38
+ },
39
+ "cpu": {
40
+ "count": 48,
41
+ "countLogical": 96
42
+ },
43
+ "gpu_nvidia": [
44
+ {
45
+ "name": "NVIDIA H100 NVL",
46
+ "memoryTotal": "100485038080",
47
+ "cudaCores": 16896,
48
+ "architecture": "Hopper"
49
+ },
50
+ {
51
+ "name": "NVIDIA H100 NVL",
52
+ "memoryTotal": "100485038080",
53
+ "cudaCores": 16896,
54
+ "architecture": "Hopper"
55
+ },
56
+ {
57
+ "name": "NVIDIA H100 NVL",
58
+ "memoryTotal": "100485038080",
59
+ "cudaCores": 16896,
60
+ "architecture": "Hopper"
61
+ },
62
+ {
63
+ "name": "NVIDIA H100 NVL",
64
+ "memoryTotal": "100485038080",
65
+ "cudaCores": 16896,
66
+ "architecture": "Hopper"
67
+ },
68
+ {
69
+ "name": "NVIDIA H100 NVL",
70
+ "memoryTotal": "100485038080",
71
+ "cudaCores": 16896,
72
+ "architecture": "Hopper"
73
+ },
74
+ {
75
+ "name": "NVIDIA H100 NVL",
76
+ "memoryTotal": "100485038080",
77
+ "cudaCores": 16896,
78
+ "architecture": "Hopper"
79
+ },
80
+ {
81
+ "name": "NVIDIA H100 NVL",
82
+ "memoryTotal": "100485038080",
83
+ "cudaCores": 16896,
84
+ "architecture": "Hopper"
85
+ },
86
+ {
87
+ "name": "NVIDIA H100 NVL",
88
+ "memoryTotal": "100485038080",
89
+ "cudaCores": 16896,
90
+ "architecture": "Hopper"
91
+ }
92
+ ],
93
+ "cudaVersion": "12.4"
94
+ }
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"Finetune/Step Time":0.97449951171875,"Finetune/Loss":2.72833514213562,"_wandb":{"runtime":17},"Finetune/Learning Rate":1.5031566289207335e-08,"Finetune/Loss (Raw)":2.510840654373169,"_step":15,"_timestamp":1.7453407389854724e+09,"Finetune/Step":15,"_runtime":17.423786677}
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/logs/debug-core.log ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-04-22T09:52:02.705725804-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpmvl60rms/port-491736.txt","pid":491736,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
2
+ {"time":"2025-04-22T09:52:02.706961119-07:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":491736}
3
+ {"time":"2025-04-22T09:52:02.706929822-07:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":39925,"Zone":""}}
4
+ {"time":"2025-04-22T09:52:02.895174529-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:55192"}
5
+ {"time":"2025-04-22T09:52:03.059360916-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"e4gyde5j","id":"127.0.0.1:55192"}
6
+ {"time":"2025-04-22T09:52:03.956493803-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"e4gyde5j","id":"127.0.0.1:55192"}
7
+ {"time":"2025-04-22T09:52:20.480226436-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:55192"}
8
+ {"time":"2025-04-22T09:52:20.480361056-07:00","level":"INFO","msg":"connection: closing","id":"127.0.0.1:55192"}
9
+ {"time":"2025-04-22T09:52:20.480421636-07:00","level":"INFO","msg":"server is shutting down"}
10
+ {"time":"2025-04-22T09:52:20.480583807-07:00","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:55192"}
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/logs/debug-internal.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-04-22T09:52:03.059719599-07:00","level":"INFO","msg":"stream: starting","core version":"0.19.9","symlink path":"runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250422_095203-e4gyde5j/logs/debug-core.log"}
2
+ {"time":"2025-04-22T09:52:03.956429127-07:00","level":"INFO","msg":"created new stream","id":"e4gyde5j"}
3
+ {"time":"2025-04-22T09:52:03.956484028-07:00","level":"INFO","msg":"stream: started","id":"e4gyde5j"}
4
+ {"time":"2025-04-22T09:52:03.956526962-07:00","level":"INFO","msg":"handler: started","stream_id":"e4gyde5j"}
5
+ {"time":"2025-04-22T09:52:03.956515976-07:00","level":"INFO","msg":"writer: Do: started","stream_id":"e4gyde5j"}
6
+ {"time":"2025-04-22T09:52:03.956572249-07:00","level":"INFO","msg":"sender: started","stream_id":"e4gyde5j"}
7
+ {"time":"2025-04-22T09:52:04.277054892-07:00","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-04-22T09:52:20.480344461-07:00","level":"INFO","msg":"stream: closing","id":"e4gyde5j"}
9
+ {"time":"2025-04-22T09:52:20.480372753-07:00","level":"INFO","msg":"Stopping system monitor"}
10
+ {"time":"2025-04-22T09:52:20.480413764-07:00","level":"INFO","msg":"Stopped system monitor"}
11
+ {"time":"2025-04-22T09:52:20.820531468-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
12
+ {"time":"2025-04-22T09:52:21.041731773-07:00","level":"INFO","msg":"handler: closed","stream_id":"e4gyde5j"}
13
+ {"time":"2025-04-22T09:52:21.041794947-07:00","level":"INFO","msg":"sender: closed","stream_id":"e4gyde5j"}
14
+ {"time":"2025-04-22T09:52:21.041797381-07:00","level":"INFO","msg":"writer: Close: closed","stream_id":"e4gyde5j"}
15
+ {"time":"2025-04-22T09:52:21.042007493-07:00","level":"INFO","msg":"stream: closed","id":"e4gyde5j"}
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/logs/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-04-22 09:52:03,050 INFO MainThread:491736 [wandb_setup.py:_flush():67] Current SDK version is 0.19.9
2
+ 2025-04-22 09:52:03,050 INFO MainThread:491736 [wandb_setup.py:_flush():67] Configure stats pid to 491736
3
+ 2025-04-22 09:52:03,050 INFO MainThread:491736 [wandb_setup.py:_flush():67] Loading settings from /home/user1/.config/wandb/settings
4
+ 2025-04-22 09:52:03,050 INFO MainThread:491736 [wandb_setup.py:_flush():67] Loading settings from /home/user1/arashwork/prismatic-vlms/wandb/settings
5
+ 2025-04-22 09:52:03,050 INFO MainThread:491736 [wandb_setup.py:_flush():67] Loading settings from environment variables
6
+ 2025-04-22 09:52:03,050 INFO MainThread:491736 [wandb_init.py:setup_run_log_directory():662] Logging user logs to runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250422_095203-e4gyde5j/logs/debug.log
7
+ 2025-04-22 09:52:03,051 INFO MainThread:491736 [wandb_init.py:setup_run_log_directory():663] Logging internal logs to runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250422_095203-e4gyde5j/logs/debug-internal.log
8
+ 2025-04-22 09:52:03,051 INFO MainThread:491736 [wandb_init.py:init():781] calling init triggers
9
+ 2025-04-22 09:52:03,051 INFO MainThread:491736 [wandb_init.py:init():786] wandb.init called with sweep_config: {}
10
+ config: {'model': {'type': 'prism-qwen25-extra-dinosiglip-224px+1_5b', 'model_id': 'prism-qwen25-extra-dinosiglip-224px+1_5b', 'arch_specifier': 'no-align+fused-gelu-mlp', 'vision_backbone_id': 'dinosiglip-vit-so-224px', 'llm_backbone_id': 'qwen25-1_5b-extra', 'image_resize_strategy': 'resize-naive', 'llm_max_length': 32768, 'align_epochs': 1, 'align_max_steps': None, 'align_global_batch_size': 8, 'align_per_device_batch_size': 8, 'align_learning_rate': 0.001, 'align_weight_decay': 0.0, 'align_max_grad_norm': 1.0, 'align_lr_scheduler_type': 'linear-warmup+cosine-decay', 'align_warmup_ratio': 0.03, 'align_train_strategy': 'fsdp-shard-grad-op', 'finetune_epochs': 2, 'finetune_max_steps': None, 'finetune_global_batch_size': 2, 'finetune_per_device_batch_size': 2, 'finetune_learning_rate': 2e-05, 'finetune_weight_decay': 0.1, 'finetune_max_grad_norm': 1.0, 'finetune_lr_scheduler_type': 'linear-warmup+cosine-decay', 'finetune_warmup_ratio': 0.03, 'finetune_train_strategy': 'fsdp-full-shard', 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': False}, 'dataset': {'type': 'llava-v15', 'dataset_id': 'llava-v15', 'align_stage_components': ['download/llava-laion-cc-sbu-558k/chat.json', 'download/llava-laion-cc-sbu-558k'], 'finetune_stage_components': ['download/llava-v1.5-instruct/llava_v1_5_mix665k.json', 'download/llava-v1.5-instruct'], 'dataset_root_dir': 'data'}, 'stage': 'finetune', 'pretrained_checkpoint': None, 'run_id': 'prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7', 'run_root_dir': 'runs', 'seed': 7, 'hf_token': 'hf_token.txt', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'Qwen25-Extra-DINOSigLIP-224px-1_5B-full-finetune', 'wandb_entity': 'arash-akbari-stu-northeastern-university', '_wandb': {}}
11
+ 2025-04-22 09:52:03,051 INFO MainThread:491736 [wandb_init.py:init():809] starting backend
12
+ 2025-04-22 09:52:03,051 INFO MainThread:491736 [wandb_init.py:init():813] sending inform_init request
13
+ 2025-04-22 09:52:03,056 INFO MainThread:491736 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-04-22 09:52:03,056 INFO MainThread:491736 [wandb_init.py:init():823] backend started and connected
15
+ 2025-04-22 09:52:03,058 INFO MainThread:491736 [wandb_init.py:init():915] updated telemetry
16
+ 2025-04-22 09:52:03,064 INFO MainThread:491736 [wandb_init.py:init():939] communicating run to backend with 90.0 second timeout
17
+ 2025-04-22 09:52:04,234 INFO MainThread:491736 [wandb_init.py:init():1014] starting run threads in backend
18
+ 2025-04-22 09:52:04,313 INFO MainThread:491736 [wandb_run.py:_console_start():2454] atexit reg
19
+ 2025-04-22 09:52:04,314 INFO MainThread:491736 [wandb_run.py:_redirect():2306] redirect: wrap_raw
20
+ 2025-04-22 09:52:04,314 INFO MainThread:491736 [wandb_run.py:_redirect():2371] Wrapping output streams.
21
+ 2025-04-22 09:52:04,314 INFO MainThread:491736 [wandb_run.py:_redirect():2394] Redirects installed.
22
+ 2025-04-22 09:52:04,315 INFO MainThread:491736 [wandb_init.py:init():1056] run started, returning control to user process
23
+ 2025-04-22 09:52:20,479 INFO MsgRouterThr:491736 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/run-e4gyde5j.wandb ADDED
Binary file (21.2 kB). View file
 
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/files/config.yaml ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.19.9
4
+ m: []
5
+ python_version: 3.10.16
6
+ t:
7
+ "1":
8
+ - 1
9
+ - 11
10
+ - 41
11
+ - 49
12
+ - 55
13
+ - 63
14
+ - 71
15
+ - 98
16
+ "2":
17
+ - 1
18
+ - 11
19
+ - 41
20
+ - 49
21
+ - 55
22
+ - 63
23
+ - 71
24
+ - 98
25
+ "3":
26
+ - 13
27
+ - 16
28
+ - 23
29
+ - 55
30
+ "4": 3.10.16
31
+ "5": 0.19.9
32
+ "6": 4.51.1
33
+ "8":
34
+ - 5
35
+ "12": 0.19.9
36
+ "13": linux-x86_64
37
+ dataset:
38
+ value:
39
+ align_stage_components:
40
+ - download/llava-laion-cc-sbu-558k/chat.json
41
+ - download/llava-laion-cc-sbu-558k
42
+ dataset_id: llava-v15
43
+ dataset_root_dir: data
44
+ finetune_stage_components:
45
+ - download/llava-v1.5-instruct/llava_v1_5_mix665k.json
46
+ - download/llava-v1.5-instruct
47
+ type: llava-v15
48
+ hf_token:
49
+ value: hf_token.txt
50
+ model:
51
+ value:
52
+ align_epochs: 1
53
+ align_global_batch_size: 8
54
+ align_learning_rate: 0.001
55
+ align_lr_scheduler_type: linear-warmup+cosine-decay
56
+ align_max_grad_norm: 1
57
+ align_max_steps: null
58
+ align_per_device_batch_size: 8
59
+ align_train_strategy: fsdp-shard-grad-op
60
+ align_warmup_ratio: 0.03
61
+ align_weight_decay: 0
62
+ arch_specifier: no-align+fused-gelu-mlp
63
+ enable_gradient_checkpointing: true
64
+ enable_mixed_precision_training: true
65
+ finetune_epochs: 2
66
+ finetune_global_batch_size: 128
67
+ finetune_learning_rate: 2e-05
68
+ finetune_lr_scheduler_type: linear-warmup+cosine-decay
69
+ finetune_max_grad_norm: 1
70
+ finetune_max_steps: null
71
+ finetune_per_device_batch_size: 16
72
+ finetune_train_strategy: fsdp-full-shard
73
+ finetune_warmup_ratio: 0.03
74
+ finetune_weight_decay: 0.1
75
+ image_resize_strategy: resize-naive
76
+ llm_backbone_id: qwen25-1_5b-extra
77
+ llm_max_length: 32768
78
+ model_id: prism-qwen25-extra-dinosiglip-224px+1_5b
79
+ reduce_in_full_precision: false
80
+ type: prism-qwen25-extra-dinosiglip-224px+1_5b
81
+ vision_backbone_id: dinosiglip-vit-so-224px
82
+ pretrained_checkpoint:
83
+ value: null
84
+ run_id:
85
+ value: prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7
86
+ run_root_dir:
87
+ value: runs
88
+ seed:
89
+ value: 7
90
+ stage:
91
+ value: finetune
92
+ trackers:
93
+ value:
94
+ - jsonl
95
+ - wandb
96
+ wandb_entity:
97
+ value: arash-akbari-stu-northeastern-university
98
+ wandb_project:
99
+ value: Qwen25-Extra-DINOSigLIP-224px-1_5B-full-finetune
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/files/output.log ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 04/22 [09:58:06] INFO | >> [*] Starting Training Loop pretrain.py:238
2
+ Traceback (most recent call last):
3
+ File "/home/user1/arashwork/prismatic-vlms/scripts/pretrain.py", line 252, in <module>
4
+ pretrain()
5
+ File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/draccus/argparsing.py", line 225, in wrapper_inner
6
+ response = fn(cfg, *args, **kwargs)
7
+ File "/home/user1/arashwork/prismatic-vlms/scripts/pretrain.py", line 239, in pretrain
8
+ train_strategy.run_training(train_dataset, collator, metrics, stage=cfg.stage, seed=cfg.seed)
9
+ File "/home/user1/arashwork/prismatic-vlms/prismatic/training/strategies/base_strategy.py", line 183, in run_training
10
+ output: CausalLMOutputWithPast = self.vlm(
11
+ File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
12
+ return self._call_impl(*args, **kwargs)
13
+ File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
14
+ return forward_call(*args, **kwargs)
15
+ File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 849, in forward
16
+ output = self._fsdp_wrapped_module(*args, **kwargs)
17
+ File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
18
+ return self._call_impl(*args, **kwargs)
19
+ File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
20
+ return forward_call(*args, **kwargs)
21
+ File "/home/user1/arashwork/prismatic-vlms/prismatic/models/vlms/prismatic.py", line 410, in forward
22
+ return self.llm_backbone(
23
+ File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
24
+ return self._call_impl(*args, **kwargs)
25
+ File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
26
+ return forward_call(*args, **kwargs)
27
+ File "/home/user1/arashwork/prismatic-vlms/prismatic/models/backbones/llm/base_llm.py", line 229, in forward
28
+ output: CausalLMOutputWithPast = self.llm(
29
+ File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
30
+ return self._call_impl(*args, **kwargs)
31
+ File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
32
+ return forward_call(*args, **kwargs)
33
+ File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/transformers/utils/generic.py", line 965, in wrapper
34
+ output = func(self, *args, **kwargs)
35
+ File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/transformers/utils/deprecation.py", line 172, in wrapped_func
36
+ return func(*args, **kwargs)
37
+ File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 843, in forward
38
+ loss = self.loss_function(logits=logits, labels=labels, vocab_size=self.config.vocab_size, **kwargs)
39
+ File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/transformers/loss/loss_utils.py", line 63, in ForCausalLMLoss
40
+ loss = fixed_cross_entropy(logits, shift_labels, num_items_in_batch, ignore_index, **kwargs)
41
+ File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/transformers/loss/loss_utils.py", line 35, in fixed_cross_entropy
42
+ loss = nn.functional.cross_entropy(source, target, ignore_index=ignore_index, reduction=reduction)
43
+ File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/nn/functional.py", line 3059, in cross_entropy
44
+ return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
45
+ torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 49.14 GiB. GPU 0 has a total capacity of 93.00 GiB of which 6.57 GiB is free. Including non-PyTorch memory, this process has 86.41 GiB memory in use. Of the allocated memory 83.47 GiB is allocated by PyTorch, and 1.97 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/files/requirements.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pyyaml-include==1.4.1
2
+ torchaudio==2.2.0
3
+ psutil==7.0.0
4
+ nvidia-cufft-cu12==11.0.2.54
5
+ nvidia-nvtx-cu12==12.1.105
6
+ wheel==0.45.1
7
+ nvidia-cuda-cupti-cu12==12.1.105
8
+ pillow==11.1.0
9
+ draccus==0.10.0
10
+ Jinja2==3.1.6
11
+ ninja==1.11.1.4
12
+ MarkupSafe==3.0.2
13
+ nvidia-cublas-cu12==12.1.3.1
14
+ tqdm==4.67.1
15
+ fsspec==2025.3.2
16
+ wandb==0.19.9
17
+ GitPython==3.1.44
18
+ click==8.1.8
19
+ timm==0.9.10
20
+ gitdb==4.0.12
21
+ nvidia-nvjitlink-cu12==12.8.93
22
+ nvidia-cudnn-cu12==8.9.2.26
23
+ mergedeep==1.3.4
24
+ annotated-types==0.7.0
25
+ protobuf==5.29.4
26
+ huggingface-hub==0.30.2
27
+ mdurl==0.1.2
28
+ urllib3==2.3.0
29
+ typing_extensions==4.13.1
30
+ numpy==1.26.4
31
+ torchvision==0.17.0
32
+ nvidia-cusparse-cu12==12.1.0.106
33
+ networkx==3.4.2
34
+ regex==2024.11.6
35
+ mypy-extensions==1.0.0
36
+ pip==25.0
37
+ peft==0.15.1
38
+ sentencepiece==0.2.0
39
+ Pygments==2.19.1
40
+ smmap==5.0.2
41
+ sympy==1.13.3
42
+ setuptools==75.8.0
43
+ nvidia-nccl-cu12==2.19.3
44
+ tokenizers==0.21.1
45
+ charset-normalizer==3.4.1
46
+ typing-inspection==0.4.0
47
+ platformdirs==4.3.7
48
+ packaging==24.2
49
+ setproctitle==1.3.5
50
+ idna==3.10
51
+ markdown-it-py==3.0.0
52
+ safetensors==0.5.3
53
+ rich==14.0.0
54
+ transformers==4.51.1
55
+ requests==2.32.3
56
+ sentry-sdk==2.25.1
57
+ jsonlines==4.0.0
58
+ PyYAML==6.0.2
59
+ pydantic_core==2.33.1
60
+ flash-attn==2.5.5
61
+ mpmath==1.3.0
62
+ attrs==25.3.0
63
+ einops==0.8.1
64
+ nvidia-cuda-runtime-cu12==12.1.105
65
+ nvidia-curand-cu12==10.3.2.106
66
+ filelock==3.18.0
67
+ prismatic==0.0.2
68
+ certifi==2025.1.31
69
+ accelerate==1.6.0
70
+ typing-inspect==0.9.0
71
+ nvidia-cuda-nvrtc-cu12==12.1.105
72
+ pydantic==2.11.3
73
+ six==1.17.0
74
+ nvidia-cusolver-cu12==11.4.5.107
75
+ torch==2.2.0
76
+ docker-pycreds==0.4.0
77
+ toml==0.10.2
78
+ triton==2.2.0
79
+ importlib_metadata==8.0.0
80
+ tomli==2.0.1
81
+ zipp==3.19.2
82
+ jaraco.context==5.3.0
83
+ inflect==7.3.1
84
+ autocommand==2.2.2
85
+ typing_extensions==4.12.2
86
+ jaraco.collections==5.1.0
87
+ jaraco.functools==4.0.1
88
+ packaging==24.2
89
+ wheel==0.43.0
90
+ backports.tarfile==1.2.0
91
+ platformdirs==4.2.2
92
+ more-itertools==10.3.0
93
+ jaraco.text==3.12.1
94
+ typeguard==4.3.0
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/files/wandb-metadata.json ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-52-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.16",
4
+ "startedAt": "2025-04-22T16:58:05.167030Z",
5
+ "args": [
6
+ "--model.type",
7
+ "prism-qwen25-extra-dinosiglip-224px+1_5b",
8
+ "--wandb_project",
9
+ "Qwen25-Extra-DINOSigLIP-224px-1_5B-full-finetune",
10
+ "--wandb_entity",
11
+ "arash-akbari-stu-northeastern-university",
12
+ "--model.enable_mixed_precision_training",
13
+ "True"
14
+ ],
15
+ "program": "/home/user1/arashwork/prismatic-vlms/scripts/pretrain.py",
16
+ "codePath": "scripts/pretrain.py",
17
+ "git": {
18
+ "remote": "git@github.com:arashakb/prismatic-vlms.git",
19
+ "commit": "8082f651fe3ba730b13899691a222fdef82d2c66"
20
+ },
21
+ "email": "arash.akbari.stu@gmail.com",
22
+ "root": "runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7",
23
+ "host": "nnmc72",
24
+ "executable": "/home/user1/anaconda3/envs/prism/bin/python",
25
+ "codePathLocal": "scripts/pretrain.py",
26
+ "cpu_count": 48,
27
+ "cpu_count_logical": 96,
28
+ "gpu": "NVIDIA H100 NVL",
29
+ "gpu_count": 8,
30
+ "disk": {
31
+ "/": {
32
+ "total": "30476149334016",
33
+ "used": "18270204944384"
34
+ }
35
+ },
36
+ "memory": {
37
+ "total": "811294752768"
38
+ },
39
+ "cpu": {
40
+ "count": 48,
41
+ "countLogical": 96
42
+ },
43
+ "gpu_nvidia": [
44
+ {
45
+ "name": "NVIDIA H100 NVL",
46
+ "memoryTotal": "100485038080",
47
+ "cudaCores": 16896,
48
+ "architecture": "Hopper"
49
+ },
50
+ {
51
+ "name": "NVIDIA H100 NVL",
52
+ "memoryTotal": "100485038080",
53
+ "cudaCores": 16896,
54
+ "architecture": "Hopper"
55
+ },
56
+ {
57
+ "name": "NVIDIA H100 NVL",
58
+ "memoryTotal": "100485038080",
59
+ "cudaCores": 16896,
60
+ "architecture": "Hopper"
61
+ },
62
+ {
63
+ "name": "NVIDIA H100 NVL",
64
+ "memoryTotal": "100485038080",
65
+ "cudaCores": 16896,
66
+ "architecture": "Hopper"
67
+ },
68
+ {
69
+ "name": "NVIDIA H100 NVL",
70
+ "memoryTotal": "100485038080",
71
+ "cudaCores": 16896,
72
+ "architecture": "Hopper"
73
+ },
74
+ {
75
+ "name": "NVIDIA H100 NVL",
76
+ "memoryTotal": "100485038080",
77
+ "cudaCores": 16896,
78
+ "architecture": "Hopper"
79
+ },
80
+ {
81
+ "name": "NVIDIA H100 NVL",
82
+ "memoryTotal": "100485038080",
83
+ "cudaCores": 16896,
84
+ "architecture": "Hopper"
85
+ },
86
+ {
87
+ "name": "NVIDIA H100 NVL",
88
+ "memoryTotal": "100485038080",
89
+ "cudaCores": 16896,
90
+ "architecture": "Hopper"
91
+ }
92
+ ],
93
+ "cudaVersion": "12.4"
94
+ }
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":15}}
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-04-22T09:58:04.848042565-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp7evbevo7/port-494110.txt","pid":494110,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
2
+ {"time":"2025-04-22T09:58:04.849131413-07:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":494110}
3
+ {"time":"2025-04-22T09:58:04.849105444-07:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":34993,"Zone":""}}
4
+ {"time":"2025-04-22T09:58:05.037650239-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:42808"}
5
+ {"time":"2025-04-22T09:58:05.168079456-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"cpx6iuc8","id":"127.0.0.1:42808"}
6
+ {"time":"2025-04-22T09:58:05.747564085-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"cpx6iuc8","id":"127.0.0.1:42808"}
7
+ {"time":"2025-04-22T09:58:21.093776359-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:42808"}
8
+ {"time":"2025-04-22T09:58:21.093854295-07:00","level":"INFO","msg":"server is shutting down"}
9
+ {"time":"2025-04-22T09:58:21.093848236-07:00","level":"INFO","msg":"connection: closing","id":"127.0.0.1:42808"}
10
+ {"time":"2025-04-22T09:58:21.094006501-07:00","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:42808"}
11
+ {"time":"2025-04-22T09:58:21.1669656-07:00","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:34993->127.0.0.1:42808: use of closed network connection","id":"127.0.0.1:42808"}
12
+ {"time":"2025-04-22T09:58:21.728742764-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:42808"}
13
+ {"time":"2025-04-22T09:58:21.728765407-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:42808"}
14
+ {"time":"2025-04-22T09:58:21.728779829-07:00","level":"INFO","msg":"server is closed"}
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/logs/debug-internal.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-04-22T09:58:05.168284731-07:00","level":"INFO","msg":"stream: starting","core version":"0.19.9","symlink path":"runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250422_095805-cpx6iuc8/logs/debug-core.log"}
2
+ {"time":"2025-04-22T09:58:05.74750679-07:00","level":"INFO","msg":"created new stream","id":"cpx6iuc8"}
3
+ {"time":"2025-04-22T09:58:05.747559949-07:00","level":"INFO","msg":"stream: started","id":"cpx6iuc8"}
4
+ {"time":"2025-04-22T09:58:05.74760112-07:00","level":"INFO","msg":"sender: started","stream_id":"cpx6iuc8"}
5
+ {"time":"2025-04-22T09:58:05.747609823-07:00","level":"INFO","msg":"writer: Do: started","stream_id":"cpx6iuc8"}
6
+ {"time":"2025-04-22T09:58:05.747646928-07:00","level":"INFO","msg":"handler: started","stream_id":"cpx6iuc8"}
7
+ {"time":"2025-04-22T09:58:06.048552657-07:00","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-04-22T09:58:21.093879462-07:00","level":"INFO","msg":"stream: closing","id":"cpx6iuc8"}
9
+ {"time":"2025-04-22T09:58:21.093897119-07:00","level":"INFO","msg":"Stopping system monitor"}
10
+ {"time":"2025-04-22T09:58:21.093946622-07:00","level":"INFO","msg":"Stopped system monitor"}
11
+ {"time":"2025-04-22T09:58:21.630469097-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
12
+ {"time":"2025-04-22T09:58:21.728396559-07:00","level":"INFO","msg":"handler: closed","stream_id":"cpx6iuc8"}
13
+ {"time":"2025-04-22T09:58:21.728458532-07:00","level":"INFO","msg":"writer: Close: closed","stream_id":"cpx6iuc8"}
14
+ {"time":"2025-04-22T09:58:21.728467014-07:00","level":"INFO","msg":"sender: closed","stream_id":"cpx6iuc8"}
15
+ {"time":"2025-04-22T09:58:21.728627693-07:00","level":"INFO","msg":"stream: closed","id":"cpx6iuc8"}
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/logs/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-04-22 09:58:05,162 INFO MainThread:494110 [wandb_setup.py:_flush():67] Current SDK version is 0.19.9
2
+ 2025-04-22 09:58:05,162 INFO MainThread:494110 [wandb_setup.py:_flush():67] Configure stats pid to 494110
3
+ 2025-04-22 09:58:05,162 INFO MainThread:494110 [wandb_setup.py:_flush():67] Loading settings from /home/user1/.config/wandb/settings
4
+ 2025-04-22 09:58:05,162 INFO MainThread:494110 [wandb_setup.py:_flush():67] Loading settings from /home/user1/arashwork/prismatic-vlms/wandb/settings
5
+ 2025-04-22 09:58:05,162 INFO MainThread:494110 [wandb_setup.py:_flush():67] Loading settings from environment variables
6
+ 2025-04-22 09:58:05,162 INFO MainThread:494110 [wandb_init.py:setup_run_log_directory():662] Logging user logs to runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250422_095805-cpx6iuc8/logs/debug.log
7
+ 2025-04-22 09:58:05,162 INFO MainThread:494110 [wandb_init.py:setup_run_log_directory():663] Logging internal logs to runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250422_095805-cpx6iuc8/logs/debug-internal.log
8
+ 2025-04-22 09:58:05,162 INFO MainThread:494110 [wandb_init.py:init():781] calling init triggers
9
+ 2025-04-22 09:58:05,162 INFO MainThread:494110 [wandb_init.py:init():786] wandb.init called with sweep_config: {}
10
+ config: {'model': {'type': 'prism-qwen25-extra-dinosiglip-224px+1_5b', 'model_id': 'prism-qwen25-extra-dinosiglip-224px+1_5b', 'arch_specifier': 'no-align+fused-gelu-mlp', 'vision_backbone_id': 'dinosiglip-vit-so-224px', 'llm_backbone_id': 'qwen25-1_5b-extra', 'image_resize_strategy': 'resize-naive', 'llm_max_length': 32768, 'align_epochs': 1, 'align_max_steps': None, 'align_global_batch_size': 8, 'align_per_device_batch_size': 8, 'align_learning_rate': 0.001, 'align_weight_decay': 0.0, 'align_max_grad_norm': 1.0, 'align_lr_scheduler_type': 'linear-warmup+cosine-decay', 'align_warmup_ratio': 0.03, 'align_train_strategy': 'fsdp-shard-grad-op', 'finetune_epochs': 2, 'finetune_max_steps': None, 'finetune_global_batch_size': 128, 'finetune_per_device_batch_size': 16, 'finetune_learning_rate': 2e-05, 'finetune_weight_decay': 0.1, 'finetune_max_grad_norm': 1.0, 'finetune_lr_scheduler_type': 'linear-warmup+cosine-decay', 'finetune_warmup_ratio': 0.03, 'finetune_train_strategy': 'fsdp-full-shard', 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': False}, 'dataset': {'type': 'llava-v15', 'dataset_id': 'llava-v15', 'align_stage_components': ['download/llava-laion-cc-sbu-558k/chat.json', 'download/llava-laion-cc-sbu-558k'], 'finetune_stage_components': ['download/llava-v1.5-instruct/llava_v1_5_mix665k.json', 'download/llava-v1.5-instruct'], 'dataset_root_dir': 'data'}, 'stage': 'finetune', 'pretrained_checkpoint': None, 'run_id': 'prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7', 'run_root_dir': 'runs', 'seed': 7, 'hf_token': 'hf_token.txt', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'Qwen25-Extra-DINOSigLIP-224px-1_5B-full-finetune', 'wandb_entity': 'arash-akbari-stu-northeastern-university', '_wandb': {}}
11
+ 2025-04-22 09:58:05,162 INFO MainThread:494110 [wandb_init.py:init():809] starting backend
12
+ 2025-04-22 09:58:05,162 INFO MainThread:494110 [wandb_init.py:init():813] sending inform_init request
13
+ 2025-04-22 09:58:05,166 INFO MainThread:494110 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-04-22 09:58:05,166 INFO MainThread:494110 [wandb_init.py:init():823] backend started and connected
15
+ 2025-04-22 09:58:05,169 INFO MainThread:494110 [wandb_init.py:init():915] updated telemetry
16
+ 2025-04-22 09:58:05,175 INFO MainThread:494110 [wandb_init.py:init():939] communicating run to backend with 90.0 second timeout
17
+ 2025-04-22 09:58:06,046 INFO MainThread:494110 [wandb_init.py:init():1014] starting run threads in backend
18
+ 2025-04-22 09:58:06,086 INFO MainThread:494110 [wandb_run.py:_console_start():2454] atexit reg
19
+ 2025-04-22 09:58:06,086 INFO MainThread:494110 [wandb_run.py:_redirect():2306] redirect: wrap_raw
20
+ 2025-04-22 09:58:06,087 INFO MainThread:494110 [wandb_run.py:_redirect():2371] Wrapping output streams.
21
+ 2025-04-22 09:58:06,087 INFO MainThread:494110 [wandb_run.py:_redirect():2394] Redirects installed.
22
+ 2025-04-22 09:58:06,088 INFO MainThread:494110 [wandb_init.py:init():1056] run started, returning control to user process
23
+ 2025-04-22 09:58:21,093 INFO MsgRouterThr:494110 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 2 handles.
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/run-cpx6iuc8.wandb ADDED
Binary file (13.9 kB). View file
 
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/files/config.yaml ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.19.9
4
+ m: []
5
+ python_version: 3.10.16
6
+ t:
7
+ "1":
8
+ - 1
9
+ - 11
10
+ - 41
11
+ - 49
12
+ - 55
13
+ - 63
14
+ - 71
15
+ - 98
16
+ "2":
17
+ - 1
18
+ - 11
19
+ - 41
20
+ - 49
21
+ - 55
22
+ - 63
23
+ - 71
24
+ - 98
25
+ "3":
26
+ - 13
27
+ - 16
28
+ - 23
29
+ - 55
30
+ "4": 3.10.16
31
+ "5": 0.19.9
32
+ "6": 4.51.1
33
+ "8":
34
+ - 5
35
+ "12": 0.19.9
36
+ "13": linux-x86_64
37
+ dataset:
38
+ value:
39
+ align_stage_components:
40
+ - download/llava-laion-cc-sbu-558k/chat.json
41
+ - download/llava-laion-cc-sbu-558k
42
+ dataset_id: llava-v15
43
+ dataset_root_dir: data
44
+ finetune_stage_components:
45
+ - download/llava-v1.5-instruct/llava_v1_5_mix665k.json
46
+ - download/llava-v1.5-instruct
47
+ type: llava-v15
48
+ hf_token:
49
+ value: hf_token.txt
50
+ model:
51
+ value:
52
+ align_epochs: 1
53
+ align_global_batch_size: 8
54
+ align_learning_rate: 0.001
55
+ align_lr_scheduler_type: linear-warmup+cosine-decay
56
+ align_max_grad_norm: 1
57
+ align_max_steps: null
58
+ align_per_device_batch_size: 8
59
+ align_train_strategy: fsdp-shard-grad-op
60
+ align_warmup_ratio: 0.03
61
+ align_weight_decay: 0
62
+ arch_specifier: no-align+fused-gelu-mlp
63
+ enable_gradient_checkpointing: true
64
+ enable_mixed_precision_training: true
65
+ finetune_epochs: 2
66
+ finetune_global_batch_size: 64
67
+ finetune_learning_rate: 2e-05
68
+ finetune_lr_scheduler_type: linear-warmup+cosine-decay
69
+ finetune_max_grad_norm: 1
70
+ finetune_max_steps: null
71
+ finetune_per_device_batch_size: 8
72
+ finetune_train_strategy: fsdp-full-shard
73
+ finetune_warmup_ratio: 0.03
74
+ finetune_weight_decay: 0.1
75
+ image_resize_strategy: resize-naive
76
+ llm_backbone_id: qwen25-1_5b-extra
77
+ llm_max_length: 32768
78
+ model_id: prism-qwen25-extra-dinosiglip-224px+1_5b
79
+ reduce_in_full_precision: false
80
+ type: prism-qwen25-extra-dinosiglip-224px+1_5b
81
+ vision_backbone_id: dinosiglip-vit-so-224px
82
+ pretrained_checkpoint:
83
+ value: null
84
+ run_id:
85
+ value: prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7
86
+ run_root_dir:
87
+ value: runs
88
+ seed:
89
+ value: 7
90
+ stage:
91
+ value: finetune
92
+ trackers:
93
+ value:
94
+ - jsonl
95
+ - wandb
96
+ wandb_entity:
97
+ value: arash-akbari-stu-northeastern-university
98
+ wandb_project:
99
+ value: Qwen25-Extra-DINOSigLIP-224px-1_5B-full-finetune
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/files/output.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 04/22 [10:00:10] INFO | >> [*] Starting Training Loop pretrain.py:238
2
+ Traceback (most recent call last):
3
+ File "/home/user1/arashwork/prismatic-vlms/scripts/pretrain.py", line 252, in <module>
4
+ pretrain()
5
+ File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/draccus/argparsing.py", line 225, in wrapper_inner
6
+ response = fn(cfg, *args, **kwargs)
7
+ File "/home/user1/arashwork/prismatic-vlms/scripts/pretrain.py", line 239, in pretrain
8
+ train_strategy.run_training(train_dataset, collator, metrics, stage=cfg.stage, seed=cfg.seed)
9
+ File "/home/user1/arashwork/prismatic-vlms/prismatic/training/strategies/base_strategy.py", line 208, in run_training
10
+ normalized_loss.backward()
11
+ File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/_tensor.py", line 522, in backward
12
+ torch.autograd.backward(
13
+ File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/autograd/__init__.py", line 266, in backward
14
+ Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
15
+ torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 24.57 GiB. GPU 0 has a total capacity of 93.00 GiB of which 23.65 GiB is free. Including non-PyTorch memory, this process has 69.33 GiB memory in use. Of the allocated memory 66.95 GiB is allocated by PyTorch, and 1.40 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/files/requirements.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pyyaml-include==1.4.1
2
+ torchaudio==2.2.0
3
+ psutil==7.0.0
4
+ nvidia-cufft-cu12==11.0.2.54
5
+ nvidia-nvtx-cu12==12.1.105
6
+ wheel==0.45.1
7
+ nvidia-cuda-cupti-cu12==12.1.105
8
+ pillow==11.1.0
9
+ draccus==0.10.0
10
+ Jinja2==3.1.6
11
+ ninja==1.11.1.4
12
+ MarkupSafe==3.0.2
13
+ nvidia-cublas-cu12==12.1.3.1
14
+ tqdm==4.67.1
15
+ fsspec==2025.3.2
16
+ wandb==0.19.9
17
+ GitPython==3.1.44
18
+ click==8.1.8
19
+ timm==0.9.10
20
+ gitdb==4.0.12
21
+ nvidia-nvjitlink-cu12==12.8.93
22
+ nvidia-cudnn-cu12==8.9.2.26
23
+ mergedeep==1.3.4
24
+ annotated-types==0.7.0
25
+ protobuf==5.29.4
26
+ huggingface-hub==0.30.2
27
+ mdurl==0.1.2
28
+ urllib3==2.3.0
29
+ typing_extensions==4.13.1
30
+ numpy==1.26.4
31
+ torchvision==0.17.0
32
+ nvidia-cusparse-cu12==12.1.0.106
33
+ networkx==3.4.2
34
+ regex==2024.11.6
35
+ mypy-extensions==1.0.0
36
+ pip==25.0
37
+ peft==0.15.1
38
+ sentencepiece==0.2.0
39
+ Pygments==2.19.1
40
+ smmap==5.0.2
41
+ sympy==1.13.3
42
+ setuptools==75.8.0
43
+ nvidia-nccl-cu12==2.19.3
44
+ tokenizers==0.21.1
45
+ charset-normalizer==3.4.1
46
+ typing-inspection==0.4.0
47
+ platformdirs==4.3.7
48
+ packaging==24.2
49
+ setproctitle==1.3.5
50
+ idna==3.10
51
+ markdown-it-py==3.0.0
52
+ safetensors==0.5.3
53
+ rich==14.0.0
54
+ transformers==4.51.1
55
+ requests==2.32.3
56
+ sentry-sdk==2.25.1
57
+ jsonlines==4.0.0
58
+ PyYAML==6.0.2
59
+ pydantic_core==2.33.1
60
+ flash-attn==2.5.5
61
+ mpmath==1.3.0
62
+ attrs==25.3.0
63
+ einops==0.8.1
64
+ nvidia-cuda-runtime-cu12==12.1.105
65
+ nvidia-curand-cu12==10.3.2.106
66
+ filelock==3.18.0
67
+ prismatic==0.0.2
68
+ certifi==2025.1.31
69
+ accelerate==1.6.0
70
+ typing-inspect==0.9.0
71
+ nvidia-cuda-nvrtc-cu12==12.1.105
72
+ pydantic==2.11.3
73
+ six==1.17.0
74
+ nvidia-cusolver-cu12==11.4.5.107
75
+ torch==2.2.0
76
+ docker-pycreds==0.4.0
77
+ toml==0.10.2
78
+ triton==2.2.0
79
+ importlib_metadata==8.0.0
80
+ tomli==2.0.1
81
+ zipp==3.19.2
82
+ jaraco.context==5.3.0
83
+ inflect==7.3.1
84
+ autocommand==2.2.2
85
+ typing_extensions==4.12.2
86
+ jaraco.collections==5.1.0
87
+ jaraco.functools==4.0.1
88
+ packaging==24.2
89
+ wheel==0.43.0
90
+ backports.tarfile==1.2.0
91
+ platformdirs==4.2.2
92
+ more-itertools==10.3.0
93
+ jaraco.text==3.12.1
94
+ typeguard==4.3.0
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/files/wandb-metadata.json ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-52-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.16",
4
+ "startedAt": "2025-04-22T17:00:09.283012Z",
5
+ "args": [
6
+ "--model.type",
7
+ "prism-qwen25-extra-dinosiglip-224px+1_5b",
8
+ "--wandb_project",
9
+ "Qwen25-Extra-DINOSigLIP-224px-1_5B-full-finetune",
10
+ "--wandb_entity",
11
+ "arash-akbari-stu-northeastern-university",
12
+ "--model.enable_mixed_precision_training",
13
+ "True"
14
+ ],
15
+ "program": "/home/user1/arashwork/prismatic-vlms/scripts/pretrain.py",
16
+ "codePath": "scripts/pretrain.py",
17
+ "git": {
18
+ "remote": "git@github.com:arashakb/prismatic-vlms.git",
19
+ "commit": "8082f651fe3ba730b13899691a222fdef82d2c66"
20
+ },
21
+ "email": "arash.akbari.stu@gmail.com",
22
+ "root": "runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7",
23
+ "host": "nnmc72",
24
+ "executable": "/home/user1/anaconda3/envs/prism/bin/python",
25
+ "codePathLocal": "scripts/pretrain.py",
26
+ "cpu_count": 48,
27
+ "cpu_count_logical": 96,
28
+ "gpu": "NVIDIA H100 NVL",
29
+ "gpu_count": 8,
30
+ "disk": {
31
+ "/": {
32
+ "total": "30476149334016",
33
+ "used": "18270205763584"
34
+ }
35
+ },
36
+ "memory": {
37
+ "total": "811294752768"
38
+ },
39
+ "cpu": {
40
+ "count": 48,
41
+ "countLogical": 96
42
+ },
43
+ "gpu_nvidia": [
44
+ {
45
+ "name": "NVIDIA H100 NVL",
46
+ "memoryTotal": "100485038080",
47
+ "cudaCores": 16896,
48
+ "architecture": "Hopper"
49
+ },
50
+ {
51
+ "name": "NVIDIA H100 NVL",
52
+ "memoryTotal": "100485038080",
53
+ "cudaCores": 16896,
54
+ "architecture": "Hopper"
55
+ },
56
+ {
57
+ "name": "NVIDIA H100 NVL",
58
+ "memoryTotal": "100485038080",
59
+ "cudaCores": 16896,
60
+ "architecture": "Hopper"
61
+ },
62
+ {
63
+ "name": "NVIDIA H100 NVL",
64
+ "memoryTotal": "100485038080",
65
+ "cudaCores": 16896,
66
+ "architecture": "Hopper"
67
+ },
68
+ {
69
+ "name": "NVIDIA H100 NVL",
70
+ "memoryTotal": "100485038080",
71
+ "cudaCores": 16896,
72
+ "architecture": "Hopper"
73
+ },
74
+ {
75
+ "name": "NVIDIA H100 NVL",
76
+ "memoryTotal": "100485038080",
77
+ "cudaCores": 16896,
78
+ "architecture": "Hopper"
79
+ },
80
+ {
81
+ "name": "NVIDIA H100 NVL",
82
+ "memoryTotal": "100485038080",
83
+ "cudaCores": 16896,
84
+ "architecture": "Hopper"
85
+ },
86
+ {
87
+ "name": "NVIDIA H100 NVL",
88
+ "memoryTotal": "100485038080",
89
+ "cudaCores": 16896,
90
+ "architecture": "Hopper"
91
+ }
92
+ ],
93
+ "cudaVersion": "12.4"
94
+ }
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":13}}
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/logs/debug-core.log ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-04-22T10:00:08.948681376-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpki64cq_w/port-497136.txt","pid":497136,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
2
+ {"time":"2025-04-22T10:00:08.949621573-07:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":497136}
3
+ {"time":"2025-04-22T10:00:08.949626079-07:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":44241,"Zone":""}}
4
+ {"time":"2025-04-22T10:00:09.137709691-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:56432"}
5
+ {"time":"2025-04-22T10:00:09.285230582-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"722cxxmu","id":"127.0.0.1:56432"}
6
+ {"time":"2025-04-22T10:00:09.9820411-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"722cxxmu","id":"127.0.0.1:56432"}
7
+ {"time":"2025-04-22T10:00:22.283734161-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:56432"}
8
+ {"time":"2025-04-22T10:00:22.283869291-07:00","level":"INFO","msg":"server is shutting down"}
9
+ {"time":"2025-04-22T10:00:22.283862411-07:00","level":"INFO","msg":"connection: closing","id":"127.0.0.1:56432"}
10
+ {"time":"2025-04-22T10:00:22.28400978-07:00","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:56432"}
11
+ {"time":"2025-04-22T10:00:22.820172517-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:56432"}
12
+ {"time":"2025-04-22T10:00:22.820205436-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:56432"}
13
+ {"time":"2025-04-22T10:00:22.820226758-07:00","level":"INFO","msg":"server is closed"}
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/logs/debug-internal.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-04-22T10:00:09.285532831-07:00","level":"INFO","msg":"stream: starting","core version":"0.19.9","symlink path":"runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250422_100009-722cxxmu/logs/debug-core.log"}
2
+ {"time":"2025-04-22T10:00:09.98198035-07:00","level":"INFO","msg":"created new stream","id":"722cxxmu"}
3
+ {"time":"2025-04-22T10:00:09.98203457-07:00","level":"INFO","msg":"stream: started","id":"722cxxmu"}
4
+ {"time":"2025-04-22T10:00:09.982109321-07:00","level":"INFO","msg":"writer: Do: started","stream_id":"722cxxmu"}
5
+ {"time":"2025-04-22T10:00:09.982130132-07:00","level":"INFO","msg":"handler: started","stream_id":"722cxxmu"}
6
+ {"time":"2025-04-22T10:00:09.982171173-07:00","level":"INFO","msg":"sender: started","stream_id":"722cxxmu"}
7
+ {"time":"2025-04-22T10:00:10.212439726-07:00","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-04-22T10:00:22.283892446-07:00","level":"INFO","msg":"stream: closing","id":"722cxxmu"}
9
+ {"time":"2025-04-22T10:00:22.283943562-07:00","level":"INFO","msg":"Stopping system monitor"}
10
+ {"time":"2025-04-22T10:00:22.284020005-07:00","level":"INFO","msg":"Stopped system monitor"}
11
+ {"time":"2025-04-22T10:00:22.67777009-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
12
+ {"time":"2025-04-22T10:00:22.819743269-07:00","level":"INFO","msg":"handler: closed","stream_id":"722cxxmu"}
13
+ {"time":"2025-04-22T10:00:22.819852882-07:00","level":"INFO","msg":"sender: closed","stream_id":"722cxxmu"}
14
+ {"time":"2025-04-22T10:00:22.819847424-07:00","level":"INFO","msg":"writer: Close: closed","stream_id":"722cxxmu"}
15
+ {"time":"2025-04-22T10:00:22.819995704-07:00","level":"INFO","msg":"stream: closed","id":"722cxxmu"}
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/logs/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-04-22 10:00:09,276 INFO MainThread:497136 [wandb_setup.py:_flush():67] Current SDK version is 0.19.9
2
+ 2025-04-22 10:00:09,276 INFO MainThread:497136 [wandb_setup.py:_flush():67] Configure stats pid to 497136
3
+ 2025-04-22 10:00:09,276 INFO MainThread:497136 [wandb_setup.py:_flush():67] Loading settings from /home/user1/.config/wandb/settings
4
+ 2025-04-22 10:00:09,276 INFO MainThread:497136 [wandb_setup.py:_flush():67] Loading settings from /home/user1/arashwork/prismatic-vlms/wandb/settings
5
+ 2025-04-22 10:00:09,276 INFO MainThread:497136 [wandb_setup.py:_flush():67] Loading settings from environment variables
6
+ 2025-04-22 10:00:09,276 INFO MainThread:497136 [wandb_init.py:setup_run_log_directory():662] Logging user logs to runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250422_100009-722cxxmu/logs/debug.log
7
+ 2025-04-22 10:00:09,276 INFO MainThread:497136 [wandb_init.py:setup_run_log_directory():663] Logging internal logs to runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250422_100009-722cxxmu/logs/debug-internal.log
8
+ 2025-04-22 10:00:09,276 INFO MainThread:497136 [wandb_init.py:init():781] calling init triggers
9
+ 2025-04-22 10:00:09,277 INFO MainThread:497136 [wandb_init.py:init():786] wandb.init called with sweep_config: {}
10
+ config: {'model': {'type': 'prism-qwen25-extra-dinosiglip-224px+1_5b', 'model_id': 'prism-qwen25-extra-dinosiglip-224px+1_5b', 'arch_specifier': 'no-align+fused-gelu-mlp', 'vision_backbone_id': 'dinosiglip-vit-so-224px', 'llm_backbone_id': 'qwen25-1_5b-extra', 'image_resize_strategy': 'resize-naive', 'llm_max_length': 32768, 'align_epochs': 1, 'align_max_steps': None, 'align_global_batch_size': 8, 'align_per_device_batch_size': 8, 'align_learning_rate': 0.001, 'align_weight_decay': 0.0, 'align_max_grad_norm': 1.0, 'align_lr_scheduler_type': 'linear-warmup+cosine-decay', 'align_warmup_ratio': 0.03, 'align_train_strategy': 'fsdp-shard-grad-op', 'finetune_epochs': 2, 'finetune_max_steps': None, 'finetune_global_batch_size': 64, 'finetune_per_device_batch_size': 8, 'finetune_learning_rate': 2e-05, 'finetune_weight_decay': 0.1, 'finetune_max_grad_norm': 1.0, 'finetune_lr_scheduler_type': 'linear-warmup+cosine-decay', 'finetune_warmup_ratio': 0.03, 'finetune_train_strategy': 'fsdp-full-shard', 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': False}, 'dataset': {'type': 'llava-v15', 'dataset_id': 'llava-v15', 'align_stage_components': ['download/llava-laion-cc-sbu-558k/chat.json', 'download/llava-laion-cc-sbu-558k'], 'finetune_stage_components': ['download/llava-v1.5-instruct/llava_v1_5_mix665k.json', 'download/llava-v1.5-instruct'], 'dataset_root_dir': 'data'}, 'stage': 'finetune', 'pretrained_checkpoint': None, 'run_id': 'prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7', 'run_root_dir': 'runs', 'seed': 7, 'hf_token': 'hf_token.txt', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'Qwen25-Extra-DINOSigLIP-224px-1_5B-full-finetune', 'wandb_entity': 'arash-akbari-stu-northeastern-university', '_wandb': {}}
11
+ 2025-04-22 10:00:09,277 INFO MainThread:497136 [wandb_init.py:init():809] starting backend
12
+ 2025-04-22 10:00:09,277 INFO MainThread:497136 [wandb_init.py:init():813] sending inform_init request
13
+ 2025-04-22 10:00:09,282 INFO MainThread:497136 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-04-22 10:00:09,282 INFO MainThread:497136 [wandb_init.py:init():823] backend started and connected
15
+ 2025-04-22 10:00:09,285 INFO MainThread:497136 [wandb_init.py:init():915] updated telemetry
16
+ 2025-04-22 10:00:09,290 INFO MainThread:497136 [wandb_init.py:init():939] communicating run to backend with 90.0 second timeout
17
+ 2025-04-22 10:00:10,209 INFO MainThread:497136 [wandb_init.py:init():1014] starting run threads in backend
18
+ 2025-04-22 10:00:10,279 INFO MainThread:497136 [wandb_run.py:_console_start():2454] atexit reg
19
+ 2025-04-22 10:00:10,279 INFO MainThread:497136 [wandb_run.py:_redirect():2306] redirect: wrap_raw
20
+ 2025-04-22 10:00:10,280 INFO MainThread:497136 [wandb_run.py:_redirect():2371] Wrapping output streams.
21
+ 2025-04-22 10:00:10,280 INFO MainThread:497136 [wandb_run.py:_redirect():2394] Redirects installed.
22
+ 2025-04-22 10:00:10,281 INFO MainThread:497136 [wandb_init.py:init():1056] run started, returning control to user process
23
+ 2025-04-22 10:00:22,283 INFO MsgRouterThr:497136 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/run-722cxxmu.wandb ADDED
Binary file (5.85 kB). View file
 
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100320-p7tc08z7/files/config.yaml ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.19.9
4
+ m: []
5
+ python_version: 3.10.16
6
+ t:
7
+ "1":
8
+ - 1
9
+ - 11
10
+ - 41
11
+ - 49
12
+ - 55
13
+ - 63
14
+ - 71
15
+ - 98
16
+ "2":
17
+ - 1
18
+ - 11
19
+ - 41
20
+ - 49
21
+ - 55
22
+ - 63
23
+ - 71
24
+ - 98
25
+ "3":
26
+ - 13
27
+ - 16
28
+ - 23
29
+ - 55
30
+ "4": 3.10.16
31
+ "5": 0.19.9
32
+ "6": 4.51.1
33
+ "8":
34
+ - 5
35
+ "12": 0.19.9
36
+ "13": linux-x86_64
37
+ dataset:
38
+ value:
39
+ align_stage_components:
40
+ - download/llava-laion-cc-sbu-558k/chat.json
41
+ - download/llava-laion-cc-sbu-558k
42
+ dataset_id: llava-v15
43
+ dataset_root_dir: data
44
+ finetune_stage_components:
45
+ - download/llava-v1.5-instruct/llava_v1_5_mix665k.json
46
+ - download/llava-v1.5-instruct
47
+ type: llava-v15
48
+ hf_token:
49
+ value: hf_token.txt
50
+ model:
51
+ value:
52
+ align_epochs: 1
53
+ align_global_batch_size: 8
54
+ align_learning_rate: 0.001
55
+ align_lr_scheduler_type: linear-warmup+cosine-decay
56
+ align_max_grad_norm: 1
57
+ align_max_steps: null
58
+ align_per_device_batch_size: 8
59
+ align_train_strategy: fsdp-shard-grad-op
60
+ align_warmup_ratio: 0.03
61
+ align_weight_decay: 0
62
+ arch_specifier: no-align+fused-gelu-mlp
63
+ enable_gradient_checkpointing: true
64
+ enable_mixed_precision_training: true
65
+ finetune_epochs: 2
66
+ finetune_global_batch_size: 56
67
+ finetune_learning_rate: 2e-05
68
+ finetune_lr_scheduler_type: linear-warmup+cosine-decay
69
+ finetune_max_grad_norm: 1
70
+ finetune_max_steps: null
71
+ finetune_per_device_batch_size: 8
72
+ finetune_train_strategy: fsdp-full-shard
73
+ finetune_warmup_ratio: 0.03
74
+ finetune_weight_decay: 0.1
75
+ image_resize_strategy: resize-naive
76
+ llm_backbone_id: qwen25-1_5b-extra
77
+ llm_max_length: 32768
78
+ model_id: prism-qwen25-extra-dinosiglip-224px+1_5b
79
+ reduce_in_full_precision: false
80
+ type: prism-qwen25-extra-dinosiglip-224px+1_5b
81
+ vision_backbone_id: dinosiglip-vit-so-224px
82
+ pretrained_checkpoint:
83
+ value: null
84
+ run_id:
85
+ value: prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7
86
+ run_root_dir:
87
+ value: runs
88
+ seed:
89
+ value: 7
90
+ stage:
91
+ value: finetune
92
+ trackers:
93
+ value:
94
+ - jsonl
95
+ - wandb
96
+ wandb_entity:
97
+ value: arash-akbari-stu-northeastern-university
98
+ wandb_project:
99
+ value: Qwen25-Extra-DINOSigLIP-224px-1_5B-full-finetune
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100320-p7tc08z7/files/output.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 04/22 [10:03:21] INFO | >> [*] Starting Training Loop pretrain.py:238
2
+ Traceback (most recent call last):
3
+ File "/home/user1/arashwork/prismatic-vlms/scripts/pretrain.py", line 252, in <module>
4
+ pretrain()
5
+ File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/draccus/argparsing.py", line 225, in wrapper_inner
6
+ response = fn(cfg, *args, **kwargs)
7
+ File "/home/user1/arashwork/prismatic-vlms/scripts/pretrain.py", line 239, in pretrain
8
+ train_strategy.run_training(train_dataset, collator, metrics, stage=cfg.stage, seed=cfg.seed)
9
+ File "/home/user1/arashwork/prismatic-vlms/prismatic/training/strategies/base_strategy.py", line 208, in run_training
10
+ normalized_loss.backward()
11
+ File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/_tensor.py", line 522, in backward
12
+ torch.autograd.backward(
13
+ File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/autograd/__init__.py", line 266, in backward
14
+ Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
15
+ torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 24.57 GiB. GPU 0 has a total capacity of 93.00 GiB of which 23.56 GiB is free. Including non-PyTorch memory, this process has 69.42 GiB memory in use. Of the allocated memory 67.09 GiB is allocated by PyTorch, and 1.35 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100320-p7tc08z7/files/requirements.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pyyaml-include==1.4.1
2
+ torchaudio==2.2.0
3
+ psutil==7.0.0
4
+ nvidia-cufft-cu12==11.0.2.54
5
+ nvidia-nvtx-cu12==12.1.105
6
+ wheel==0.45.1
7
+ nvidia-cuda-cupti-cu12==12.1.105
8
+ pillow==11.1.0
9
+ draccus==0.10.0
10
+ Jinja2==3.1.6
11
+ ninja==1.11.1.4
12
+ MarkupSafe==3.0.2
13
+ nvidia-cublas-cu12==12.1.3.1
14
+ tqdm==4.67.1
15
+ fsspec==2025.3.2
16
+ wandb==0.19.9
17
+ GitPython==3.1.44
18
+ click==8.1.8
19
+ timm==0.9.10
20
+ gitdb==4.0.12
21
+ nvidia-nvjitlink-cu12==12.8.93
22
+ nvidia-cudnn-cu12==8.9.2.26
23
+ mergedeep==1.3.4
24
+ annotated-types==0.7.0
25
+ protobuf==5.29.4
26
+ huggingface-hub==0.30.2
27
+ mdurl==0.1.2
28
+ urllib3==2.3.0
29
+ typing_extensions==4.13.1
30
+ numpy==1.26.4
31
+ torchvision==0.17.0
32
+ nvidia-cusparse-cu12==12.1.0.106
33
+ networkx==3.4.2
34
+ regex==2024.11.6
35
+ mypy-extensions==1.0.0
36
+ pip==25.0
37
+ peft==0.15.1
38
+ sentencepiece==0.2.0
39
+ Pygments==2.19.1
40
+ smmap==5.0.2
41
+ sympy==1.13.3
42
+ setuptools==75.8.0
43
+ nvidia-nccl-cu12==2.19.3
44
+ tokenizers==0.21.1
45
+ charset-normalizer==3.4.1
46
+ typing-inspection==0.4.0
47
+ platformdirs==4.3.7
48
+ packaging==24.2
49
+ setproctitle==1.3.5
50
+ idna==3.10
51
+ markdown-it-py==3.0.0
52
+ safetensors==0.5.3
53
+ rich==14.0.0
54
+ transformers==4.51.1
55
+ requests==2.32.3
56
+ sentry-sdk==2.25.1
57
+ jsonlines==4.0.0
58
+ PyYAML==6.0.2
59
+ pydantic_core==2.33.1
60
+ flash-attn==2.5.5
61
+ mpmath==1.3.0
62
+ attrs==25.3.0
63
+ einops==0.8.1
64
+ nvidia-cuda-runtime-cu12==12.1.105
65
+ nvidia-curand-cu12==10.3.2.106
66
+ filelock==3.18.0
67
+ prismatic==0.0.2
68
+ certifi==2025.1.31
69
+ accelerate==1.6.0
70
+ typing-inspect==0.9.0
71
+ nvidia-cuda-nvrtc-cu12==12.1.105
72
+ pydantic==2.11.3
73
+ six==1.17.0
74
+ nvidia-cusolver-cu12==11.4.5.107
75
+ torch==2.2.0
76
+ docker-pycreds==0.4.0
77
+ toml==0.10.2
78
+ triton==2.2.0
79
+ importlib_metadata==8.0.0
80
+ tomli==2.0.1
81
+ zipp==3.19.2
82
+ jaraco.context==5.3.0
83
+ inflect==7.3.1
84
+ autocommand==2.2.2
85
+ typing_extensions==4.12.2
86
+ jaraco.collections==5.1.0
87
+ jaraco.functools==4.0.1
88
+ packaging==24.2
89
+ wheel==0.43.0
90
+ backports.tarfile==1.2.0
91
+ platformdirs==4.2.2
92
+ more-itertools==10.3.0
93
+ jaraco.text==3.12.1
94
+ typeguard==4.3.0
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100320-p7tc08z7/files/wandb-metadata.json ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-52-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.16",
4
+ "startedAt": "2025-04-22T17:03:20.553675Z",
5
+ "args": [
6
+ "--model.type",
7
+ "prism-qwen25-extra-dinosiglip-224px+1_5b",
8
+ "--wandb_project",
9
+ "Qwen25-Extra-DINOSigLIP-224px-1_5B-full-finetune",
10
+ "--wandb_entity",
11
+ "arash-akbari-stu-northeastern-university",
12
+ "--model.enable_mixed_precision_training",
13
+ "True"
14
+ ],
15
+ "program": "/home/user1/arashwork/prismatic-vlms/scripts/pretrain.py",
16
+ "codePath": "scripts/pretrain.py",
17
+ "git": {
18
+ "remote": "git@github.com:arashakb/prismatic-vlms.git",
19
+ "commit": "8082f651fe3ba730b13899691a222fdef82d2c66"
20
+ },
21
+ "email": "arash.akbari.stu@gmail.com",
22
+ "root": "runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7",
23
+ "host": "nnmc72",
24
+ "executable": "/home/user1/anaconda3/envs/prism/bin/python",
25
+ "codePathLocal": "scripts/pretrain.py",
26
+ "cpu_count": 48,
27
+ "cpu_count_logical": 96,
28
+ "gpu": "NVIDIA H100 NVL",
29
+ "gpu_count": 8,
30
+ "disk": {
31
+ "/": {
32
+ "total": "30476149334016",
33
+ "used": "18270206685184"
34
+ }
35
+ },
36
+ "memory": {
37
+ "total": "811294752768"
38
+ },
39
+ "cpu": {
40
+ "count": 48,
41
+ "countLogical": 96
42
+ },
43
+ "gpu_nvidia": [
44
+ {
45
+ "name": "NVIDIA H100 NVL",
46
+ "memoryTotal": "100485038080",
47
+ "cudaCores": 16896,
48
+ "architecture": "Hopper"
49
+ },
50
+ {
51
+ "name": "NVIDIA H100 NVL",
52
+ "memoryTotal": "100485038080",
53
+ "cudaCores": 16896,
54
+ "architecture": "Hopper"
55
+ },
56
+ {
57
+ "name": "NVIDIA H100 NVL",
58
+ "memoryTotal": "100485038080",
59
+ "cudaCores": 16896,
60
+ "architecture": "Hopper"
61
+ },
62
+ {
63
+ "name": "NVIDIA H100 NVL",
64
+ "memoryTotal": "100485038080",
65
+ "cudaCores": 16896,
66
+ "architecture": "Hopper"
67
+ },
68
+ {
69
+ "name": "NVIDIA H100 NVL",
70
+ "memoryTotal": "100485038080",
71
+ "cudaCores": 16896,
72
+ "architecture": "Hopper"
73
+ },
74
+ {
75
+ "name": "NVIDIA H100 NVL",
76
+ "memoryTotal": "100485038080",
77
+ "cudaCores": 16896,
78
+ "architecture": "Hopper"
79
+ },
80
+ {
81
+ "name": "NVIDIA H100 NVL",
82
+ "memoryTotal": "100485038080",
83
+ "cudaCores": 16896,
84
+ "architecture": "Hopper"
85
+ },
86
+ {
87
+ "name": "NVIDIA H100 NVL",
88
+ "memoryTotal": "100485038080",
89
+ "cudaCores": 16896,
90
+ "architecture": "Hopper"
91
+ }
92
+ ],
93
+ "cudaVersion": "12.4"
94
+ }
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100320-p7tc08z7/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":11}}