Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +10 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/checkpoints/latest-checkpoint.pt +3 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/checkpoints/step-055444-epoch-01-loss=0.6558.pt +3 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/config.json +59 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/config.yaml +52 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7.jsonl +3 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/run-metrics.jsonl +1 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/debug-internal.log +15 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/debug.log +23 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/files/config.yaml +100 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/files/output.log +2 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/files/requirements.txt +78 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/files/wandb-metadata.json +94 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/files/wandb-summary.json +1 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/logs/debug-core.log +10 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/logs/debug-internal.log +15 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/logs/debug.log +23 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/run-6hhs8vrz.wandb +3 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/files/config.yaml +100 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/files/output.log +2 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/files/requirements.txt +94 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/files/wandb-metadata.json +94 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/files/wandb-summary.json +1 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/logs/debug-core.log +10 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/logs/debug-internal.log +15 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/logs/debug.log +23 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/run-e4gyde5j.wandb +0 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/files/config.yaml +99 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/files/output.log +45 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/files/requirements.txt +94 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/files/wandb-metadata.json +94 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/files/wandb-summary.json +1 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/logs/debug-core.log +14 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/logs/debug-internal.log +15 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/logs/debug.log +23 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/run-cpx6iuc8.wandb +0 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/files/config.yaml +99 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/files/output.log +15 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/files/requirements.txt +94 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/files/wandb-metadata.json +94 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/files/wandb-summary.json +1 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/logs/debug-core.log +13 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/logs/debug-internal.log +15 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/logs/debug.log +23 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/run-722cxxmu.wandb +0 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100320-p7tc08z7/files/config.yaml +99 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100320-p7tc08z7/files/output.log +15 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100320-p7tc08z7/files/requirements.txt +94 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100320-p7tc08z7/files/wandb-metadata.json +94 -0
- prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100320-p7tc08z7/files/wandb-summary.json +1 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/latest-run/run-6hhs8vrz.wandb filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250422_100515-lvbcavkp/run-lvbcavkp.wandb filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250422_111805-th1rdka5/run-th1rdka5.wandb filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250502_114824-6hhs8vrz/run-6hhs8vrz.wandb filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/run-6hhs8vrz.wandb filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100515-lvbcavkp/run-lvbcavkp.wandb filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_111805-th1rdka5/run-th1rdka5.wandb filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250502_114824-6hhs8vrz/run-6hhs8vrz.wandb filter=lfs diff=lfs merge=lfs -text
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/checkpoints/latest-checkpoint.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:521937aca5962573d06f63e6fd4cfff59566869578a4eb8dade3e5c7cfcb19ae
|
| 3 |
+
size 7247200761
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/checkpoints/step-055444-epoch-01-loss=0.6558.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:521937aca5962573d06f63e6fd4cfff59566869578a4eb8dade3e5c7cfcb19ae
|
| 3 |
+
size 7247200761
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/config.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": {
|
| 3 |
+
"align_stage_components": [
|
| 4 |
+
"download/llava-laion-cc-sbu-558k/chat.json",
|
| 5 |
+
"download/llava-laion-cc-sbu-558k"
|
| 6 |
+
],
|
| 7 |
+
"dataset_id": "llava-v15",
|
| 8 |
+
"dataset_root_dir": "data",
|
| 9 |
+
"finetune_stage_components": [
|
| 10 |
+
"download/llava-v1.5-instruct/llava_v1_5_mix665k.json",
|
| 11 |
+
"download/llava-v1.5-instruct"
|
| 12 |
+
],
|
| 13 |
+
"type": "llava-v15"
|
| 14 |
+
},
|
| 15 |
+
"hf_token": "hf_token.txt",
|
| 16 |
+
"model": {
|
| 17 |
+
"align_epochs": 1,
|
| 18 |
+
"align_global_batch_size": 8,
|
| 19 |
+
"align_learning_rate": 0.001,
|
| 20 |
+
"align_lr_scheduler_type": "linear-warmup+cosine-decay",
|
| 21 |
+
"align_max_grad_norm": 1.0,
|
| 22 |
+
"align_max_steps": null,
|
| 23 |
+
"align_per_device_batch_size": 8,
|
| 24 |
+
"align_train_strategy": "fsdp-shard-grad-op",
|
| 25 |
+
"align_warmup_ratio": 0.03,
|
| 26 |
+
"align_weight_decay": 0.0,
|
| 27 |
+
"arch_specifier": "no-align+fused-gelu-mlp",
|
| 28 |
+
"enable_gradient_checkpointing": true,
|
| 29 |
+
"enable_mixed_precision_training": true,
|
| 30 |
+
"finetune_epochs": 2,
|
| 31 |
+
"finetune_global_batch_size": 24,
|
| 32 |
+
"finetune_learning_rate": 2e-05,
|
| 33 |
+
"finetune_lr_scheduler_type": "linear-warmup+cosine-decay",
|
| 34 |
+
"finetune_max_grad_norm": 1.0,
|
| 35 |
+
"finetune_max_steps": null,
|
| 36 |
+
"finetune_per_device_batch_size": 6,
|
| 37 |
+
"finetune_train_strategy": "fsdp-full-shard",
|
| 38 |
+
"finetune_warmup_ratio": 0.03,
|
| 39 |
+
"finetune_weight_decay": 0.1,
|
| 40 |
+
"image_resize_strategy": "resize-naive",
|
| 41 |
+
"llm_backbone_id": "qwen25-1_5b-extra",
|
| 42 |
+
"llm_max_length": 32768,
|
| 43 |
+
"model_id": "prism-qwen25-extra-dinosiglip-224px+1_5b",
|
| 44 |
+
"reduce_in_full_precision": false,
|
| 45 |
+
"type": "prism-qwen25-extra-dinosiglip-224px+1_5b",
|
| 46 |
+
"vision_backbone_id": "dinosiglip-vit-so-224px"
|
| 47 |
+
},
|
| 48 |
+
"pretrained_checkpoint": null,
|
| 49 |
+
"run_id": "prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7",
|
| 50 |
+
"run_root_dir": "runs",
|
| 51 |
+
"seed": 7,
|
| 52 |
+
"stage": "finetune",
|
| 53 |
+
"trackers": [
|
| 54 |
+
"jsonl",
|
| 55 |
+
"wandb"
|
| 56 |
+
],
|
| 57 |
+
"wandb_entity": "arash-akbari-stu-northeastern-university",
|
| 58 |
+
"wandb_project": "Moxin-VLM"
|
| 59 |
+
}
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/config.yaml
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset:
|
| 2 |
+
align_stage_components:
|
| 3 |
+
- download/llava-laion-cc-sbu-558k/chat.json
|
| 4 |
+
- download/llava-laion-cc-sbu-558k
|
| 5 |
+
dataset_id: llava-v15
|
| 6 |
+
dataset_root_dir: data
|
| 7 |
+
finetune_stage_components:
|
| 8 |
+
- download/llava-v1.5-instruct/llava_v1_5_mix665k.json
|
| 9 |
+
- download/llava-v1.5-instruct
|
| 10 |
+
type: llava-v15
|
| 11 |
+
hf_token: hf_token.txt
|
| 12 |
+
model:
|
| 13 |
+
align_epochs: 1
|
| 14 |
+
align_global_batch_size: 8
|
| 15 |
+
align_learning_rate: 0.001
|
| 16 |
+
align_lr_scheduler_type: linear-warmup+cosine-decay
|
| 17 |
+
align_max_grad_norm: 1.0
|
| 18 |
+
align_max_steps: null
|
| 19 |
+
align_per_device_batch_size: 8
|
| 20 |
+
align_train_strategy: fsdp-shard-grad-op
|
| 21 |
+
align_warmup_ratio: 0.03
|
| 22 |
+
align_weight_decay: 0.0
|
| 23 |
+
arch_specifier: no-align+fused-gelu-mlp
|
| 24 |
+
enable_gradient_checkpointing: true
|
| 25 |
+
enable_mixed_precision_training: true
|
| 26 |
+
finetune_epochs: 2
|
| 27 |
+
finetune_global_batch_size: 24
|
| 28 |
+
finetune_learning_rate: 2.0e-05
|
| 29 |
+
finetune_lr_scheduler_type: linear-warmup+cosine-decay
|
| 30 |
+
finetune_max_grad_norm: 1.0
|
| 31 |
+
finetune_max_steps: null
|
| 32 |
+
finetune_per_device_batch_size: 6
|
| 33 |
+
finetune_train_strategy: fsdp-full-shard
|
| 34 |
+
finetune_warmup_ratio: 0.03
|
| 35 |
+
finetune_weight_decay: 0.1
|
| 36 |
+
image_resize_strategy: resize-naive
|
| 37 |
+
llm_backbone_id: qwen25-1_5b-extra
|
| 38 |
+
llm_max_length: 32768
|
| 39 |
+
model_id: prism-qwen25-extra-dinosiglip-224px+1_5b
|
| 40 |
+
reduce_in_full_precision: false
|
| 41 |
+
type: prism-qwen25-extra-dinosiglip-224px+1_5b
|
| 42 |
+
vision_backbone_id: dinosiglip-vit-so-224px
|
| 43 |
+
pretrained_checkpoint: null
|
| 44 |
+
run_id: prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7
|
| 45 |
+
run_root_dir: runs
|
| 46 |
+
seed: 7
|
| 47 |
+
stage: finetune
|
| 48 |
+
trackers:
|
| 49 |
+
- jsonl
|
| 50 |
+
- wandb
|
| 51 |
+
wandb_entity: arash-akbari-stu-northeastern-university
|
| 52 |
+
wandb_project: Moxin-VLM
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0960f58bd0a805c19ffbfe2ec514c34218a1bdf2636b4444f0aa326ec2e87333
|
| 3 |
+
size 11339854
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/run-metrics.jsonl
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"hparams": {"dataset": {"align_stage_components": ["download/llava-laion-cc-sbu-558k/chat.json", "download/llava-laion-cc-sbu-558k"], "dataset_id": "llava-v15", "dataset_root_dir": "data", "finetune_stage_components": ["download/llava-v1.5-instruct/llava_v1_5_mix665k.json", "download/llava-v1.5-instruct"], "type": "llava-v15"}, "hf_token": "hf_token.txt", "model": {"align_epochs": 1, "align_global_batch_size": 8, "align_learning_rate": 0.001, "align_lr_scheduler_type": "linear-warmup+cosine-decay", "align_max_grad_norm": 1.0, "align_max_steps": null, "align_per_device_batch_size": 8, "align_train_strategy": "fsdp-shard-grad-op", "align_warmup_ratio": 0.03, "align_weight_decay": 0.0, "arch_specifier": "no-align+fused-gelu-mlp", "enable_gradient_checkpointing": true, "enable_mixed_precision_training": true, "finetune_epochs": 2, "finetune_global_batch_size": 24, "finetune_learning_rate": 2e-05, "finetune_lr_scheduler_type": "linear-warmup+cosine-decay", "finetune_max_grad_norm": 1.0, "finetune_max_steps": null, "finetune_per_device_batch_size": 6, "finetune_train_strategy": "fsdp-full-shard", "finetune_warmup_ratio": 0.03, "finetune_weight_decay": 0.1, "image_resize_strategy": "resize-naive", "llm_backbone_id": "qwen25-1_5b-extra", "llm_max_length": 32768, "model_id": "prism-qwen25-extra-dinosiglip-224px+1_5b", "reduce_in_full_precision": false, "type": "prism-qwen25-extra-dinosiglip-224px+1_5b", "vision_backbone_id": "dinosiglip-vit-so-224px"}, "pretrained_checkpoint": null, "run_id": "prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7", "run_root_dir": "runs", "seed": 7, "stage": "finetune", "trackers": ["jsonl", "wandb"], "wandb_entity": "arash-akbari-stu-northeastern-university", "wandb_project": "Moxin-VLM"}, "run_id": "prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7"}
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/debug-internal.log
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-05-02T11:48:24.512248306-07:00","level":"INFO","msg":"stream: starting","core version":"0.19.9","symlink path":"runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250502_114824-6hhs8vrz/logs/debug-core.log"}
|
| 2 |
+
{"time":"2025-05-02T11:48:25.628691797-07:00","level":"INFO","msg":"created new stream","id":"6hhs8vrz"}
|
| 3 |
+
{"time":"2025-05-02T11:48:25.62873432-07:00","level":"INFO","msg":"stream: started","id":"6hhs8vrz"}
|
| 4 |
+
{"time":"2025-05-02T11:48:25.62879471-07:00","level":"INFO","msg":"handler: started","stream_id":"6hhs8vrz"}
|
| 5 |
+
{"time":"2025-05-02T11:48:25.628819016-07:00","level":"INFO","msg":"writer: Do: started","stream_id":"6hhs8vrz"}
|
| 6 |
+
{"time":"2025-05-02T11:48:25.628835531-07:00","level":"INFO","msg":"sender: started","stream_id":"6hhs8vrz"}
|
| 7 |
+
{"time":"2025-05-02T11:48:25.959126298-07:00","level":"INFO","msg":"Starting system monitor"}
|
| 8 |
+
{"time":"2025-05-02T11:50:45.633111697-07:00","level":"INFO","msg":"stream: closing","id":"6hhs8vrz"}
|
| 9 |
+
{"time":"2025-05-02T11:50:45.633155452-07:00","level":"INFO","msg":"Stopping system monitor"}
|
| 10 |
+
{"time":"2025-05-02T11:50:45.633187911-07:00","level":"INFO","msg":"Stopped system monitor"}
|
| 11 |
+
{"time":"2025-05-02T11:50:46.081032653-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 12 |
+
{"time":"2025-05-02T11:50:46.287259059-07:00","level":"INFO","msg":"handler: closed","stream_id":"6hhs8vrz"}
|
| 13 |
+
{"time":"2025-05-02T11:50:46.287318307-07:00","level":"INFO","msg":"writer: Close: closed","stream_id":"6hhs8vrz"}
|
| 14 |
+
{"time":"2025-05-02T11:50:46.287337876-07:00","level":"INFO","msg":"sender: closed","stream_id":"6hhs8vrz"}
|
| 15 |
+
{"time":"2025-05-02T11:50:46.287531815-07:00","level":"INFO","msg":"stream: closed","id":"6hhs8vrz"}
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/debug.log
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_setup.py:_flush():67] Current SDK version is 0.19.9
|
| 2 |
+
2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_setup.py:_flush():67] Configure stats pid to 1932762
|
| 3 |
+
2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_setup.py:_flush():67] Loading settings from /home/user1/.config/wandb/settings
|
| 4 |
+
2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_setup.py:_flush():67] Loading settings from /home/user1/arashwork/prismatic-vlms/wandb/settings
|
| 5 |
+
2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_setup.py:_flush():67] Loading settings from environment variables
|
| 6 |
+
2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:setup_run_log_directory():662] Logging user logs to runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250502_114824-6hhs8vrz/logs/debug.log
|
| 7 |
+
2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:setup_run_log_directory():663] Logging internal logs to runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250502_114824-6hhs8vrz/logs/debug-internal.log
|
| 8 |
+
2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:init():781] calling init triggers
|
| 9 |
+
2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:init():786] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'model': {'type': 'prism-qwen25-extra-dinosiglip-224px+1_5b', 'model_id': 'prism-qwen25-extra-dinosiglip-224px+1_5b', 'arch_specifier': 'no-align+fused-gelu-mlp', 'vision_backbone_id': 'dinosiglip-vit-so-224px', 'llm_backbone_id': 'qwen25-1_5b-extra', 'image_resize_strategy': 'resize-naive', 'llm_max_length': 32768, 'align_epochs': 1, 'align_max_steps': None, 'align_global_batch_size': 8, 'align_per_device_batch_size': 8, 'align_learning_rate': 0.001, 'align_weight_decay': 0.0, 'align_max_grad_norm': 1.0, 'align_lr_scheduler_type': 'linear-warmup+cosine-decay', 'align_warmup_ratio': 0.03, 'align_train_strategy': 'fsdp-shard-grad-op', 'finetune_epochs': 2, 'finetune_max_steps': None, 'finetune_global_batch_size': 24, 'finetune_per_device_batch_size': 6, 'finetune_learning_rate': 2e-05, 'finetune_weight_decay': 0.1, 'finetune_max_grad_norm': 1.0, 'finetune_lr_scheduler_type': 'linear-warmup+cosine-decay', 'finetune_warmup_ratio': 0.03, 'finetune_train_strategy': 'fsdp-full-shard', 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': False}, 'dataset': {'type': 'llava-v15', 'dataset_id': 'llava-v15', 'align_stage_components': ['download/llava-laion-cc-sbu-558k/chat.json', 'download/llava-laion-cc-sbu-558k'], 'finetune_stage_components': ['download/llava-v1.5-instruct/llava_v1_5_mix665k.json', 'download/llava-v1.5-instruct'], 'dataset_root_dir': 'data'}, 'stage': 'finetune', 'pretrained_checkpoint': None, 'run_id': 'prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7', 'run_root_dir': 'runs', 'seed': 7, 'hf_token': 'hf_token.txt', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'Moxin-VLM', 'wandb_entity': 'arash-akbari-stu-northeastern-university', '_wandb': {}}
|
| 11 |
+
2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:init():809] starting backend
|
| 12 |
+
2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:init():813] sending inform_init request
|
| 13 |
+
2025-05-02 11:48:24,510 INFO MainThread:1932762 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 14 |
+
2025-05-02 11:48:24,510 INFO MainThread:1932762 [wandb_init.py:init():823] backend started and connected
|
| 15 |
+
2025-05-02 11:48:24,512 INFO MainThread:1932762 [wandb_init.py:init():915] updated telemetry
|
| 16 |
+
2025-05-02 11:48:24,517 INFO MainThread:1932762 [wandb_init.py:init():939] communicating run to backend with 90.0 second timeout
|
| 17 |
+
2025-05-02 11:48:25,957 INFO MainThread:1932762 [wandb_init.py:init():1014] starting run threads in backend
|
| 18 |
+
2025-05-02 11:48:26,000 INFO MainThread:1932762 [wandb_run.py:_console_start():2454] atexit reg
|
| 19 |
+
2025-05-02 11:48:26,000 INFO MainThread:1932762 [wandb_run.py:_redirect():2306] redirect: wrap_raw
|
| 20 |
+
2025-05-02 11:48:26,000 INFO MainThread:1932762 [wandb_run.py:_redirect():2371] Wrapping output streams.
|
| 21 |
+
2025-05-02 11:48:26,000 INFO MainThread:1932762 [wandb_run.py:_redirect():2394] Redirects installed.
|
| 22 |
+
2025-05-02 11:48:26,002 INFO MainThread:1932762 [wandb_init.py:init():1056] run started, returning control to user process
|
| 23 |
+
2025-05-02 11:50:45,632 INFO MsgRouterThr:1932762 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/files/config.yaml
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_wandb:
|
| 2 |
+
value:
|
| 3 |
+
cli_version: 0.19.9
|
| 4 |
+
m: []
|
| 5 |
+
python_version: 3.10.16
|
| 6 |
+
t:
|
| 7 |
+
"1":
|
| 8 |
+
- 1
|
| 9 |
+
- 11
|
| 10 |
+
- 41
|
| 11 |
+
- 49
|
| 12 |
+
- 55
|
| 13 |
+
- 63
|
| 14 |
+
- 71
|
| 15 |
+
- 98
|
| 16 |
+
"2":
|
| 17 |
+
- 1
|
| 18 |
+
- 11
|
| 19 |
+
- 41
|
| 20 |
+
- 49
|
| 21 |
+
- 55
|
| 22 |
+
- 63
|
| 23 |
+
- 71
|
| 24 |
+
- 98
|
| 25 |
+
"3":
|
| 26 |
+
- 13
|
| 27 |
+
- 16
|
| 28 |
+
- 23
|
| 29 |
+
- 55
|
| 30 |
+
- 61
|
| 31 |
+
"4": 3.10.16
|
| 32 |
+
"5": 0.19.9
|
| 33 |
+
"6": 4.38.1
|
| 34 |
+
"8":
|
| 35 |
+
- 5
|
| 36 |
+
"12": 0.19.9
|
| 37 |
+
"13": linux-x86_64
|
| 38 |
+
dataset:
|
| 39 |
+
value:
|
| 40 |
+
align_stage_components:
|
| 41 |
+
- download/llava-laion-cc-sbu-558k/chat.json
|
| 42 |
+
- download/llava-laion-cc-sbu-558k
|
| 43 |
+
dataset_id: llava-v15
|
| 44 |
+
dataset_root_dir: data
|
| 45 |
+
finetune_stage_components:
|
| 46 |
+
- download/llava-v1.5-instruct/llava_v1_5_mix665k.json
|
| 47 |
+
- download/llava-v1.5-instruct
|
| 48 |
+
type: llava-v15
|
| 49 |
+
hf_token:
|
| 50 |
+
value: hf_token.txt
|
| 51 |
+
model:
|
| 52 |
+
value:
|
| 53 |
+
align_epochs: 1
|
| 54 |
+
align_global_batch_size: 8
|
| 55 |
+
align_learning_rate: 0.001
|
| 56 |
+
align_lr_scheduler_type: linear-warmup+cosine-decay
|
| 57 |
+
align_max_grad_norm: 1
|
| 58 |
+
align_max_steps: null
|
| 59 |
+
align_per_device_batch_size: 8
|
| 60 |
+
align_train_strategy: fsdp-shard-grad-op
|
| 61 |
+
align_warmup_ratio: 0.03
|
| 62 |
+
align_weight_decay: 0
|
| 63 |
+
arch_specifier: no-align+fused-gelu-mlp
|
| 64 |
+
enable_gradient_checkpointing: true
|
| 65 |
+
enable_mixed_precision_training: true
|
| 66 |
+
finetune_epochs: 2
|
| 67 |
+
finetune_global_batch_size: 24
|
| 68 |
+
finetune_learning_rate: 2e-05
|
| 69 |
+
finetune_lr_scheduler_type: linear-warmup+cosine-decay
|
| 70 |
+
finetune_max_grad_norm: 1
|
| 71 |
+
finetune_max_steps: null
|
| 72 |
+
finetune_per_device_batch_size: 6
|
| 73 |
+
finetune_train_strategy: fsdp-full-shard
|
| 74 |
+
finetune_warmup_ratio: 0.03
|
| 75 |
+
finetune_weight_decay: 0.1
|
| 76 |
+
image_resize_strategy: resize-naive
|
| 77 |
+
llm_backbone_id: qwen25-1_5b-extra
|
| 78 |
+
llm_max_length: 32768
|
| 79 |
+
model_id: prism-qwen25-extra-dinosiglip-224px+1_5b
|
| 80 |
+
reduce_in_full_precision: false
|
| 81 |
+
type: prism-qwen25-extra-dinosiglip-224px+1_5b
|
| 82 |
+
vision_backbone_id: dinosiglip-vit-so-224px
|
| 83 |
+
pretrained_checkpoint:
|
| 84 |
+
value: null
|
| 85 |
+
run_id:
|
| 86 |
+
value: prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7
|
| 87 |
+
run_root_dir:
|
| 88 |
+
value: runs
|
| 89 |
+
seed:
|
| 90 |
+
value: 7
|
| 91 |
+
stage:
|
| 92 |
+
value: finetune
|
| 93 |
+
trackers:
|
| 94 |
+
value:
|
| 95 |
+
- jsonl
|
| 96 |
+
- wandb
|
| 97 |
+
wandb_entity:
|
| 98 |
+
value: arash-akbari-stu-northeastern-university
|
| 99 |
+
wandb_project:
|
| 100 |
+
value: Moxin-VLM
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/files/output.log
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
05/02 [11:48:26] INFO | >> [*] Starting Training Loop pretrain.py:238
|
| 2 |
+
=>> [Global Step] 000112 =>> LR :: 0.000001 -- Loss :: 2.4465: 0%| | 112/55442 [02:15<16:32:24, 1.08s/it]
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/files/requirements.txt
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pyyaml-include==1.4.1
|
| 2 |
+
torchaudio==2.2.0
|
| 3 |
+
psutil==7.0.0
|
| 4 |
+
nvidia-cufft-cu12==11.0.2.54
|
| 5 |
+
nvidia-nvtx-cu12==12.1.105
|
| 6 |
+
wheel==0.45.1
|
| 7 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
| 8 |
+
pillow==11.1.0
|
| 9 |
+
draccus==0.10.0
|
| 10 |
+
Jinja2==3.1.6
|
| 11 |
+
ninja==1.11.1.4
|
| 12 |
+
MarkupSafe==3.0.2
|
| 13 |
+
nvidia-cublas-cu12==12.1.3.1
|
| 14 |
+
tqdm==4.67.1
|
| 15 |
+
fsspec==2025.3.2
|
| 16 |
+
wandb==0.19.9
|
| 17 |
+
GitPython==3.1.44
|
| 18 |
+
click==8.1.8
|
| 19 |
+
timm==0.9.10
|
| 20 |
+
gitdb==4.0.12
|
| 21 |
+
nvidia-nvjitlink-cu12==12.8.93
|
| 22 |
+
nvidia-cudnn-cu12==8.9.2.26
|
| 23 |
+
mergedeep==1.3.4
|
| 24 |
+
annotated-types==0.7.0
|
| 25 |
+
protobuf==5.29.4
|
| 26 |
+
huggingface-hub==0.30.2
|
| 27 |
+
mdurl==0.1.2
|
| 28 |
+
urllib3==2.3.0
|
| 29 |
+
typing_extensions==4.13.1
|
| 30 |
+
numpy==1.26.4
|
| 31 |
+
torchvision==0.17.0
|
| 32 |
+
nvidia-cusparse-cu12==12.1.0.106
|
| 33 |
+
networkx==3.4.2
|
| 34 |
+
regex==2024.11.6
|
| 35 |
+
mypy-extensions==1.0.0
|
| 36 |
+
pip==25.0
|
| 37 |
+
sentencepiece==0.2.0
|
| 38 |
+
Pygments==2.19.1
|
| 39 |
+
smmap==5.0.2
|
| 40 |
+
sympy==1.13.3
|
| 41 |
+
setuptools==75.8.0
|
| 42 |
+
nvidia-nccl-cu12==2.19.3
|
| 43 |
+
charset-normalizer==3.4.1
|
| 44 |
+
typing-inspection==0.4.0
|
| 45 |
+
platformdirs==4.3.7
|
| 46 |
+
packaging==24.2
|
| 47 |
+
setproctitle==1.3.5
|
| 48 |
+
idna==3.10
|
| 49 |
+
markdown-it-py==3.0.0
|
| 50 |
+
safetensors==0.5.3
|
| 51 |
+
rich==14.0.0
|
| 52 |
+
requests==2.32.3
|
| 53 |
+
sentry-sdk==2.25.1
|
| 54 |
+
jsonlines==4.0.0
|
| 55 |
+
transformers==4.38.1
|
| 56 |
+
PyYAML==6.0.2
|
| 57 |
+
pydantic_core==2.33.1
|
| 58 |
+
flash-attn==2.5.5
|
| 59 |
+
mpmath==1.3.0
|
| 60 |
+
attrs==25.3.0
|
| 61 |
+
einops==0.8.1
|
| 62 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
| 63 |
+
nvidia-curand-cu12==10.3.2.106
|
| 64 |
+
filelock==3.18.0
|
| 65 |
+
prismatic==0.0.2
|
| 66 |
+
certifi==2025.1.31
|
| 67 |
+
peft==0.5.0
|
| 68 |
+
typing-inspect==0.9.0
|
| 69 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
| 70 |
+
accelerate==0.25.0
|
| 71 |
+
pydantic==2.11.3
|
| 72 |
+
six==1.17.0
|
| 73 |
+
nvidia-cusolver-cu12==11.4.5.107
|
| 74 |
+
torch==2.2.0
|
| 75 |
+
docker-pycreds==0.4.0
|
| 76 |
+
tokenizers==0.15.2
|
| 77 |
+
toml==0.10.2
|
| 78 |
+
triton==2.2.0
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-6.8.0-52-generic-x86_64-with-glibc2.35",
|
| 3 |
+
"python": "CPython 3.10.16",
|
| 4 |
+
"startedAt": "2025-05-02T18:48:24.510790Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--model.type",
|
| 7 |
+
"prism-qwen25-extra-dinosiglip-224px+1_5b",
|
| 8 |
+
"--wandb_project",
|
| 9 |
+
"Moxin-VLM",
|
| 10 |
+
"--wandb_entity",
|
| 11 |
+
"arash-akbari-stu-northeastern-university",
|
| 12 |
+
"--model.enable_mixed_precision_training",
|
| 13 |
+
"True"
|
| 14 |
+
],
|
| 15 |
+
"program": "/home/user1/arashwork/prismatic-vlms/scripts/pretrain.py",
|
| 16 |
+
"codePath": "scripts/pretrain.py",
|
| 17 |
+
"git": {
|
| 18 |
+
"remote": "git@github.com:arashakb/prismatic-vlms.git",
|
| 19 |
+
"commit": "f94a585b2bb45d34a6947f2c80f2378f2af6ca66"
|
| 20 |
+
},
|
| 21 |
+
"email": "arash.akbari.stu@gmail.com",
|
| 22 |
+
"root": "runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7",
|
| 23 |
+
"host": "nnmc72",
|
| 24 |
+
"executable": "/home/user1/anaconda3/envs/prism/bin/python",
|
| 25 |
+
"codePathLocal": "scripts/pretrain.py",
|
| 26 |
+
"cpu_count": 48,
|
| 27 |
+
"cpu_count_logical": 96,
|
| 28 |
+
"gpu": "NVIDIA H100 NVL",
|
| 29 |
+
"gpu_count": 8,
|
| 30 |
+
"disk": {
|
| 31 |
+
"/": {
|
| 32 |
+
"total": "30476149334016",
|
| 33 |
+
"used": "19451255332864"
|
| 34 |
+
}
|
| 35 |
+
},
|
| 36 |
+
"memory": {
|
| 37 |
+
"total": "811294752768"
|
| 38 |
+
},
|
| 39 |
+
"cpu": {
|
| 40 |
+
"count": 48,
|
| 41 |
+
"countLogical": 96
|
| 42 |
+
},
|
| 43 |
+
"gpu_nvidia": [
|
| 44 |
+
{
|
| 45 |
+
"name": "NVIDIA H100 NVL",
|
| 46 |
+
"memoryTotal": "100485038080",
|
| 47 |
+
"cudaCores": 16896,
|
| 48 |
+
"architecture": "Hopper"
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "NVIDIA H100 NVL",
|
| 52 |
+
"memoryTotal": "100485038080",
|
| 53 |
+
"cudaCores": 16896,
|
| 54 |
+
"architecture": "Hopper"
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"name": "NVIDIA H100 NVL",
|
| 58 |
+
"memoryTotal": "100485038080",
|
| 59 |
+
"cudaCores": 16896,
|
| 60 |
+
"architecture": "Hopper"
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"name": "NVIDIA H100 NVL",
|
| 64 |
+
"memoryTotal": "100485038080",
|
| 65 |
+
"cudaCores": 16896,
|
| 66 |
+
"architecture": "Hopper"
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"name": "NVIDIA H100 NVL",
|
| 70 |
+
"memoryTotal": "100485038080",
|
| 71 |
+
"cudaCores": 16896,
|
| 72 |
+
"architecture": "Hopper"
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"name": "NVIDIA H100 NVL",
|
| 76 |
+
"memoryTotal": "100485038080",
|
| 77 |
+
"cudaCores": 16896,
|
| 78 |
+
"architecture": "Hopper"
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"name": "NVIDIA H100 NVL",
|
| 82 |
+
"memoryTotal": "100485038080",
|
| 83 |
+
"cudaCores": 16896,
|
| 84 |
+
"architecture": "Hopper"
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"name": "NVIDIA H100 NVL",
|
| 88 |
+
"memoryTotal": "100485038080",
|
| 89 |
+
"cudaCores": 16896,
|
| 90 |
+
"architecture": "Hopper"
|
| 91 |
+
}
|
| 92 |
+
],
|
| 93 |
+
"cudaVersion": "12.4"
|
| 94 |
+
}
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"Finetune/Learning Rate":1.3469633193024655e-06,"Finetune/Step":112,"Finetune/Loss (Raw)":2.335305690765381,"Finetune/Step Time":1.2350007040160043,"_step":112,"Finetune/Loss":2.4464573860168457,"_runtime":141.122340856,"_wandb":{"runtime":141},"_timestamp":1.7462118443747003e+09}
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/logs/debug-core.log
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-05-02T11:48:23.936394432-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpsxhf49n8/port-1932762.txt","pid":1932762,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
|
| 2 |
+
{"time":"2025-05-02T11:48:23.938033658-07:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":1932762}
|
| 3 |
+
{"time":"2025-05-02T11:48:23.938396597-07:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":36947,"Zone":""}}
|
| 4 |
+
{"time":"2025-05-02T11:48:24.08241367-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:39034"}
|
| 5 |
+
{"time":"2025-05-02T11:48:24.511951866-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"6hhs8vrz","id":"127.0.0.1:39034"}
|
| 6 |
+
{"time":"2025-05-02T11:48:25.628737745-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"6hhs8vrz","id":"127.0.0.1:39034"}
|
| 7 |
+
{"time":"2025-05-02T11:50:45.632975375-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:39034"}
|
| 8 |
+
{"time":"2025-05-02T11:50:45.633108663-07:00","level":"INFO","msg":"connection: closing","id":"127.0.0.1:39034"}
|
| 9 |
+
{"time":"2025-05-02T11:50:45.63317393-07:00","level":"INFO","msg":"server is shutting down"}
|
| 10 |
+
{"time":"2025-05-02T11:50:45.633311554-07:00","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:39034"}
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-05-02T11:48:24.512248306-07:00","level":"INFO","msg":"stream: starting","core version":"0.19.9","symlink path":"runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250502_114824-6hhs8vrz/logs/debug-core.log"}
|
| 2 |
+
{"time":"2025-05-02T11:48:25.628691797-07:00","level":"INFO","msg":"created new stream","id":"6hhs8vrz"}
|
| 3 |
+
{"time":"2025-05-02T11:48:25.62873432-07:00","level":"INFO","msg":"stream: started","id":"6hhs8vrz"}
|
| 4 |
+
{"time":"2025-05-02T11:48:25.62879471-07:00","level":"INFO","msg":"handler: started","stream_id":"6hhs8vrz"}
|
| 5 |
+
{"time":"2025-05-02T11:48:25.628819016-07:00","level":"INFO","msg":"writer: Do: started","stream_id":"6hhs8vrz"}
|
| 6 |
+
{"time":"2025-05-02T11:48:25.628835531-07:00","level":"INFO","msg":"sender: started","stream_id":"6hhs8vrz"}
|
| 7 |
+
{"time":"2025-05-02T11:48:25.959126298-07:00","level":"INFO","msg":"Starting system monitor"}
|
| 8 |
+
{"time":"2025-05-02T11:50:45.633111697-07:00","level":"INFO","msg":"stream: closing","id":"6hhs8vrz"}
|
| 9 |
+
{"time":"2025-05-02T11:50:45.633155452-07:00","level":"INFO","msg":"Stopping system monitor"}
|
| 10 |
+
{"time":"2025-05-02T11:50:45.633187911-07:00","level":"INFO","msg":"Stopped system monitor"}
|
| 11 |
+
{"time":"2025-05-02T11:50:46.081032653-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 12 |
+
{"time":"2025-05-02T11:50:46.287259059-07:00","level":"INFO","msg":"handler: closed","stream_id":"6hhs8vrz"}
|
| 13 |
+
{"time":"2025-05-02T11:50:46.287318307-07:00","level":"INFO","msg":"writer: Close: closed","stream_id":"6hhs8vrz"}
|
| 14 |
+
{"time":"2025-05-02T11:50:46.287337876-07:00","level":"INFO","msg":"sender: closed","stream_id":"6hhs8vrz"}
|
| 15 |
+
{"time":"2025-05-02T11:50:46.287531815-07:00","level":"INFO","msg":"stream: closed","id":"6hhs8vrz"}
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/logs/debug.log
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_setup.py:_flush():67] Current SDK version is 0.19.9
|
| 2 |
+
2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_setup.py:_flush():67] Configure stats pid to 1932762
|
| 3 |
+
2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_setup.py:_flush():67] Loading settings from /home/user1/.config/wandb/settings
|
| 4 |
+
2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_setup.py:_flush():67] Loading settings from /home/user1/arashwork/prismatic-vlms/wandb/settings
|
| 5 |
+
2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_setup.py:_flush():67] Loading settings from environment variables
|
| 6 |
+
2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:setup_run_log_directory():662] Logging user logs to runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250502_114824-6hhs8vrz/logs/debug.log
|
| 7 |
+
2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:setup_run_log_directory():663] Logging internal logs to runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250502_114824-6hhs8vrz/logs/debug-internal.log
|
| 8 |
+
2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:init():781] calling init triggers
|
| 9 |
+
2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:init():786] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'model': {'type': 'prism-qwen25-extra-dinosiglip-224px+1_5b', 'model_id': 'prism-qwen25-extra-dinosiglip-224px+1_5b', 'arch_specifier': 'no-align+fused-gelu-mlp', 'vision_backbone_id': 'dinosiglip-vit-so-224px', 'llm_backbone_id': 'qwen25-1_5b-extra', 'image_resize_strategy': 'resize-naive', 'llm_max_length': 32768, 'align_epochs': 1, 'align_max_steps': None, 'align_global_batch_size': 8, 'align_per_device_batch_size': 8, 'align_learning_rate': 0.001, 'align_weight_decay': 0.0, 'align_max_grad_norm': 1.0, 'align_lr_scheduler_type': 'linear-warmup+cosine-decay', 'align_warmup_ratio': 0.03, 'align_train_strategy': 'fsdp-shard-grad-op', 'finetune_epochs': 2, 'finetune_max_steps': None, 'finetune_global_batch_size': 24, 'finetune_per_device_batch_size': 6, 'finetune_learning_rate': 2e-05, 'finetune_weight_decay': 0.1, 'finetune_max_grad_norm': 1.0, 'finetune_lr_scheduler_type': 'linear-warmup+cosine-decay', 'finetune_warmup_ratio': 0.03, 'finetune_train_strategy': 'fsdp-full-shard', 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': False}, 'dataset': {'type': 'llava-v15', 'dataset_id': 'llava-v15', 'align_stage_components': ['download/llava-laion-cc-sbu-558k/chat.json', 'download/llava-laion-cc-sbu-558k'], 'finetune_stage_components': ['download/llava-v1.5-instruct/llava_v1_5_mix665k.json', 'download/llava-v1.5-instruct'], 'dataset_root_dir': 'data'}, 'stage': 'finetune', 'pretrained_checkpoint': None, 'run_id': 'prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7', 'run_root_dir': 'runs', 'seed': 7, 'hf_token': 'hf_token.txt', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'Moxin-VLM', 'wandb_entity': 'arash-akbari-stu-northeastern-university', '_wandb': {}}
|
| 11 |
+
2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:init():809] starting backend
|
| 12 |
+
2025-05-02 11:48:24,503 INFO MainThread:1932762 [wandb_init.py:init():813] sending inform_init request
|
| 13 |
+
2025-05-02 11:48:24,510 INFO MainThread:1932762 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 14 |
+
2025-05-02 11:48:24,510 INFO MainThread:1932762 [wandb_init.py:init():823] backend started and connected
|
| 15 |
+
2025-05-02 11:48:24,512 INFO MainThread:1932762 [wandb_init.py:init():915] updated telemetry
|
| 16 |
+
2025-05-02 11:48:24,517 INFO MainThread:1932762 [wandb_init.py:init():939] communicating run to backend with 90.0 second timeout
|
| 17 |
+
2025-05-02 11:48:25,957 INFO MainThread:1932762 [wandb_init.py:init():1014] starting run threads in backend
|
| 18 |
+
2025-05-02 11:48:26,000 INFO MainThread:1932762 [wandb_run.py:_console_start():2454] atexit reg
|
| 19 |
+
2025-05-02 11:48:26,000 INFO MainThread:1932762 [wandb_run.py:_redirect():2306] redirect: wrap_raw
|
| 20 |
+
2025-05-02 11:48:26,000 INFO MainThread:1932762 [wandb_run.py:_redirect():2371] Wrapping output streams.
|
| 21 |
+
2025-05-02 11:48:26,000 INFO MainThread:1932762 [wandb_run.py:_redirect():2394] Redirects installed.
|
| 22 |
+
2025-05-02 11:48:26,002 INFO MainThread:1932762 [wandb_init.py:init():1056] run started, returning control to user process
|
| 23 |
+
2025-05-02 11:50:45,632 INFO MsgRouterThr:1932762 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/latest-run/run-6hhs8vrz.wandb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:514635c836cd32f2106e2050edf14ff70f51a51daf2b1e5aa29387effe9be97f
|
| 3 |
+
size 143258
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/files/config.yaml
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_wandb:
|
| 2 |
+
value:
|
| 3 |
+
cli_version: 0.19.9
|
| 4 |
+
m: []
|
| 5 |
+
python_version: 3.10.16
|
| 6 |
+
t:
|
| 7 |
+
"1":
|
| 8 |
+
- 1
|
| 9 |
+
- 11
|
| 10 |
+
- 41
|
| 11 |
+
- 49
|
| 12 |
+
- 55
|
| 13 |
+
- 63
|
| 14 |
+
- 71
|
| 15 |
+
- 98
|
| 16 |
+
"2":
|
| 17 |
+
- 1
|
| 18 |
+
- 11
|
| 19 |
+
- 41
|
| 20 |
+
- 49
|
| 21 |
+
- 55
|
| 22 |
+
- 63
|
| 23 |
+
- 71
|
| 24 |
+
- 98
|
| 25 |
+
"3":
|
| 26 |
+
- 13
|
| 27 |
+
- 16
|
| 28 |
+
- 23
|
| 29 |
+
- 55
|
| 30 |
+
- 61
|
| 31 |
+
"4": 3.10.16
|
| 32 |
+
"5": 0.19.9
|
| 33 |
+
"6": 4.51.1
|
| 34 |
+
"8":
|
| 35 |
+
- 5
|
| 36 |
+
"12": 0.19.9
|
| 37 |
+
"13": linux-x86_64
|
| 38 |
+
dataset:
|
| 39 |
+
value:
|
| 40 |
+
align_stage_components:
|
| 41 |
+
- download/llava-laion-cc-sbu-558k/chat.json
|
| 42 |
+
- download/llava-laion-cc-sbu-558k
|
| 43 |
+
dataset_id: llava-v15
|
| 44 |
+
dataset_root_dir: data
|
| 45 |
+
finetune_stage_components:
|
| 46 |
+
- download/llava-v1.5-instruct/llava_v1_5_mix665k.json
|
| 47 |
+
- download/llava-v1.5-instruct
|
| 48 |
+
type: llava-v15
|
| 49 |
+
hf_token:
|
| 50 |
+
value: hf_token.txt
|
| 51 |
+
model:
|
| 52 |
+
value:
|
| 53 |
+
align_epochs: 1
|
| 54 |
+
align_global_batch_size: 8
|
| 55 |
+
align_learning_rate: 0.001
|
| 56 |
+
align_lr_scheduler_type: linear-warmup+cosine-decay
|
| 57 |
+
align_max_grad_norm: 1
|
| 58 |
+
align_max_steps: null
|
| 59 |
+
align_per_device_batch_size: 8
|
| 60 |
+
align_train_strategy: fsdp-shard-grad-op
|
| 61 |
+
align_warmup_ratio: 0.03
|
| 62 |
+
align_weight_decay: 0
|
| 63 |
+
arch_specifier: no-align+fused-gelu-mlp
|
| 64 |
+
enable_gradient_checkpointing: true
|
| 65 |
+
enable_mixed_precision_training: true
|
| 66 |
+
finetune_epochs: 2
|
| 67 |
+
finetune_global_batch_size: 2
|
| 68 |
+
finetune_learning_rate: 2e-05
|
| 69 |
+
finetune_lr_scheduler_type: linear-warmup+cosine-decay
|
| 70 |
+
finetune_max_grad_norm: 1
|
| 71 |
+
finetune_max_steps: null
|
| 72 |
+
finetune_per_device_batch_size: 2
|
| 73 |
+
finetune_train_strategy: fsdp-full-shard
|
| 74 |
+
finetune_warmup_ratio: 0.03
|
| 75 |
+
finetune_weight_decay: 0.1
|
| 76 |
+
image_resize_strategy: resize-naive
|
| 77 |
+
llm_backbone_id: qwen25-1_5b-extra
|
| 78 |
+
llm_max_length: 32768
|
| 79 |
+
model_id: prism-qwen25-extra-dinosiglip-224px+1_5b
|
| 80 |
+
reduce_in_full_precision: false
|
| 81 |
+
type: prism-qwen25-extra-dinosiglip-224px+1_5b
|
| 82 |
+
vision_backbone_id: dinosiglip-vit-so-224px
|
| 83 |
+
pretrained_checkpoint:
|
| 84 |
+
value: null
|
| 85 |
+
run_id:
|
| 86 |
+
value: prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7
|
| 87 |
+
run_root_dir:
|
| 88 |
+
value: runs
|
| 89 |
+
seed:
|
| 90 |
+
value: 7
|
| 91 |
+
stage:
|
| 92 |
+
value: finetune
|
| 93 |
+
trackers:
|
| 94 |
+
value:
|
| 95 |
+
- jsonl
|
| 96 |
+
- wandb
|
| 97 |
+
wandb_entity:
|
| 98 |
+
value: arash-akbari-stu-northeastern-university
|
| 99 |
+
wandb_project:
|
| 100 |
+
value: Qwen25-Extra-DINOSigLIP-224px-1_5B-full-finetune
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/files/output.log
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
04/22 [09:52:04] INFO | >> [*] Starting Training Loop pretrain.py:238
|
| 2 |
+
=>> [Global Step] 000015 =>> LR :: 0.000000 -- Loss :: 2.7283: 0%| | 15/665298 [00:11<46:11:45, 4.00it/s]
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/files/requirements.txt
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pyyaml-include==1.4.1
|
| 2 |
+
torchaudio==2.2.0
|
| 3 |
+
psutil==7.0.0
|
| 4 |
+
nvidia-cufft-cu12==11.0.2.54
|
| 5 |
+
nvidia-nvtx-cu12==12.1.105
|
| 6 |
+
wheel==0.45.1
|
| 7 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
| 8 |
+
pillow==11.1.0
|
| 9 |
+
draccus==0.10.0
|
| 10 |
+
Jinja2==3.1.6
|
| 11 |
+
ninja==1.11.1.4
|
| 12 |
+
MarkupSafe==3.0.2
|
| 13 |
+
nvidia-cublas-cu12==12.1.3.1
|
| 14 |
+
tqdm==4.67.1
|
| 15 |
+
fsspec==2025.3.2
|
| 16 |
+
wandb==0.19.9
|
| 17 |
+
GitPython==3.1.44
|
| 18 |
+
click==8.1.8
|
| 19 |
+
timm==0.9.10
|
| 20 |
+
gitdb==4.0.12
|
| 21 |
+
nvidia-nvjitlink-cu12==12.8.93
|
| 22 |
+
nvidia-cudnn-cu12==8.9.2.26
|
| 23 |
+
mergedeep==1.3.4
|
| 24 |
+
annotated-types==0.7.0
|
| 25 |
+
protobuf==5.29.4
|
| 26 |
+
huggingface-hub==0.30.2
|
| 27 |
+
mdurl==0.1.2
|
| 28 |
+
urllib3==2.3.0
|
| 29 |
+
typing_extensions==4.13.1
|
| 30 |
+
numpy==1.26.4
|
| 31 |
+
torchvision==0.17.0
|
| 32 |
+
nvidia-cusparse-cu12==12.1.0.106
|
| 33 |
+
networkx==3.4.2
|
| 34 |
+
regex==2024.11.6
|
| 35 |
+
mypy-extensions==1.0.0
|
| 36 |
+
pip==25.0
|
| 37 |
+
peft==0.15.1
|
| 38 |
+
sentencepiece==0.2.0
|
| 39 |
+
Pygments==2.19.1
|
| 40 |
+
smmap==5.0.2
|
| 41 |
+
sympy==1.13.3
|
| 42 |
+
setuptools==75.8.0
|
| 43 |
+
nvidia-nccl-cu12==2.19.3
|
| 44 |
+
tokenizers==0.21.1
|
| 45 |
+
charset-normalizer==3.4.1
|
| 46 |
+
typing-inspection==0.4.0
|
| 47 |
+
platformdirs==4.3.7
|
| 48 |
+
packaging==24.2
|
| 49 |
+
setproctitle==1.3.5
|
| 50 |
+
idna==3.10
|
| 51 |
+
markdown-it-py==3.0.0
|
| 52 |
+
safetensors==0.5.3
|
| 53 |
+
rich==14.0.0
|
| 54 |
+
transformers==4.51.1
|
| 55 |
+
requests==2.32.3
|
| 56 |
+
sentry-sdk==2.25.1
|
| 57 |
+
jsonlines==4.0.0
|
| 58 |
+
PyYAML==6.0.2
|
| 59 |
+
pydantic_core==2.33.1
|
| 60 |
+
flash-attn==2.5.5
|
| 61 |
+
mpmath==1.3.0
|
| 62 |
+
attrs==25.3.0
|
| 63 |
+
einops==0.8.1
|
| 64 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
| 65 |
+
nvidia-curand-cu12==10.3.2.106
|
| 66 |
+
filelock==3.18.0
|
| 67 |
+
prismatic==0.0.2
|
| 68 |
+
certifi==2025.1.31
|
| 69 |
+
accelerate==1.6.0
|
| 70 |
+
typing-inspect==0.9.0
|
| 71 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
| 72 |
+
pydantic==2.11.3
|
| 73 |
+
six==1.17.0
|
| 74 |
+
nvidia-cusolver-cu12==11.4.5.107
|
| 75 |
+
torch==2.2.0
|
| 76 |
+
docker-pycreds==0.4.0
|
| 77 |
+
toml==0.10.2
|
| 78 |
+
triton==2.2.0
|
| 79 |
+
importlib_metadata==8.0.0
|
| 80 |
+
tomli==2.0.1
|
| 81 |
+
zipp==3.19.2
|
| 82 |
+
jaraco.context==5.3.0
|
| 83 |
+
inflect==7.3.1
|
| 84 |
+
autocommand==2.2.2
|
| 85 |
+
typing_extensions==4.12.2
|
| 86 |
+
jaraco.collections==5.1.0
|
| 87 |
+
jaraco.functools==4.0.1
|
| 88 |
+
packaging==24.2
|
| 89 |
+
wheel==0.43.0
|
| 90 |
+
backports.tarfile==1.2.0
|
| 91 |
+
platformdirs==4.2.2
|
| 92 |
+
more-itertools==10.3.0
|
| 93 |
+
jaraco.text==3.12.1
|
| 94 |
+
typeguard==4.3.0
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-6.8.0-52-generic-x86_64-with-glibc2.35",
|
| 3 |
+
"python": "CPython 3.10.16",
|
| 4 |
+
"startedAt": "2025-04-22T16:52:03.056575Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--model.type",
|
| 7 |
+
"prism-qwen25-extra-dinosiglip-224px+1_5b",
|
| 8 |
+
"--wandb_project",
|
| 9 |
+
"Qwen25-Extra-DINOSigLIP-224px-1_5B-full-finetune",
|
| 10 |
+
"--wandb_entity",
|
| 11 |
+
"arash-akbari-stu-northeastern-university",
|
| 12 |
+
"--model.enable_mixed_precision_training",
|
| 13 |
+
"True"
|
| 14 |
+
],
|
| 15 |
+
"program": "/home/user1/arashwork/prismatic-vlms/scripts/pretrain.py",
|
| 16 |
+
"codePath": "scripts/pretrain.py",
|
| 17 |
+
"git": {
|
| 18 |
+
"remote": "git@github.com:arashakb/prismatic-vlms.git",
|
| 19 |
+
"commit": "8082f651fe3ba730b13899691a222fdef82d2c66"
|
| 20 |
+
},
|
| 21 |
+
"email": "arash.akbari.stu@gmail.com",
|
| 22 |
+
"root": "runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7",
|
| 23 |
+
"host": "nnmc72",
|
| 24 |
+
"executable": "/home/user1/anaconda3/envs/prism/bin/python",
|
| 25 |
+
"codePathLocal": "scripts/pretrain.py",
|
| 26 |
+
"cpu_count": 48,
|
| 27 |
+
"cpu_count_logical": 96,
|
| 28 |
+
"gpu": "NVIDIA H100 NVL",
|
| 29 |
+
"gpu_count": 8,
|
| 30 |
+
"disk": {
|
| 31 |
+
"/": {
|
| 32 |
+
"total": "30476149334016",
|
| 33 |
+
"used": "18270204149760"
|
| 34 |
+
}
|
| 35 |
+
},
|
| 36 |
+
"memory": {
|
| 37 |
+
"total": "811294752768"
|
| 38 |
+
},
|
| 39 |
+
"cpu": {
|
| 40 |
+
"count": 48,
|
| 41 |
+
"countLogical": 96
|
| 42 |
+
},
|
| 43 |
+
"gpu_nvidia": [
|
| 44 |
+
{
|
| 45 |
+
"name": "NVIDIA H100 NVL",
|
| 46 |
+
"memoryTotal": "100485038080",
|
| 47 |
+
"cudaCores": 16896,
|
| 48 |
+
"architecture": "Hopper"
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "NVIDIA H100 NVL",
|
| 52 |
+
"memoryTotal": "100485038080",
|
| 53 |
+
"cudaCores": 16896,
|
| 54 |
+
"architecture": "Hopper"
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"name": "NVIDIA H100 NVL",
|
| 58 |
+
"memoryTotal": "100485038080",
|
| 59 |
+
"cudaCores": 16896,
|
| 60 |
+
"architecture": "Hopper"
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"name": "NVIDIA H100 NVL",
|
| 64 |
+
"memoryTotal": "100485038080",
|
| 65 |
+
"cudaCores": 16896,
|
| 66 |
+
"architecture": "Hopper"
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"name": "NVIDIA H100 NVL",
|
| 70 |
+
"memoryTotal": "100485038080",
|
| 71 |
+
"cudaCores": 16896,
|
| 72 |
+
"architecture": "Hopper"
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"name": "NVIDIA H100 NVL",
|
| 76 |
+
"memoryTotal": "100485038080",
|
| 77 |
+
"cudaCores": 16896,
|
| 78 |
+
"architecture": "Hopper"
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"name": "NVIDIA H100 NVL",
|
| 82 |
+
"memoryTotal": "100485038080",
|
| 83 |
+
"cudaCores": 16896,
|
| 84 |
+
"architecture": "Hopper"
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"name": "NVIDIA H100 NVL",
|
| 88 |
+
"memoryTotal": "100485038080",
|
| 89 |
+
"cudaCores": 16896,
|
| 90 |
+
"architecture": "Hopper"
|
| 91 |
+
}
|
| 92 |
+
],
|
| 93 |
+
"cudaVersion": "12.4"
|
| 94 |
+
}
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"Finetune/Step Time":0.97449951171875,"Finetune/Loss":2.72833514213562,"_wandb":{"runtime":17},"Finetune/Learning Rate":1.5031566289207335e-08,"Finetune/Loss (Raw)":2.510840654373169,"_step":15,"_timestamp":1.7453407389854724e+09,"Finetune/Step":15,"_runtime":17.423786677}
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/logs/debug-core.log
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-04-22T09:52:02.705725804-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpmvl60rms/port-491736.txt","pid":491736,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
|
| 2 |
+
{"time":"2025-04-22T09:52:02.706961119-07:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":491736}
|
| 3 |
+
{"time":"2025-04-22T09:52:02.706929822-07:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":39925,"Zone":""}}
|
| 4 |
+
{"time":"2025-04-22T09:52:02.895174529-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:55192"}
|
| 5 |
+
{"time":"2025-04-22T09:52:03.059360916-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"e4gyde5j","id":"127.0.0.1:55192"}
|
| 6 |
+
{"time":"2025-04-22T09:52:03.956493803-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"e4gyde5j","id":"127.0.0.1:55192"}
|
| 7 |
+
{"time":"2025-04-22T09:52:20.480226436-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:55192"}
|
| 8 |
+
{"time":"2025-04-22T09:52:20.480361056-07:00","level":"INFO","msg":"connection: closing","id":"127.0.0.1:55192"}
|
| 9 |
+
{"time":"2025-04-22T09:52:20.480421636-07:00","level":"INFO","msg":"server is shutting down"}
|
| 10 |
+
{"time":"2025-04-22T09:52:20.480583807-07:00","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:55192"}
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-04-22T09:52:03.059719599-07:00","level":"INFO","msg":"stream: starting","core version":"0.19.9","symlink path":"runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250422_095203-e4gyde5j/logs/debug-core.log"}
|
| 2 |
+
{"time":"2025-04-22T09:52:03.956429127-07:00","level":"INFO","msg":"created new stream","id":"e4gyde5j"}
|
| 3 |
+
{"time":"2025-04-22T09:52:03.956484028-07:00","level":"INFO","msg":"stream: started","id":"e4gyde5j"}
|
| 4 |
+
{"time":"2025-04-22T09:52:03.956526962-07:00","level":"INFO","msg":"handler: started","stream_id":"e4gyde5j"}
|
| 5 |
+
{"time":"2025-04-22T09:52:03.956515976-07:00","level":"INFO","msg":"writer: Do: started","stream_id":"e4gyde5j"}
|
| 6 |
+
{"time":"2025-04-22T09:52:03.956572249-07:00","level":"INFO","msg":"sender: started","stream_id":"e4gyde5j"}
|
| 7 |
+
{"time":"2025-04-22T09:52:04.277054892-07:00","level":"INFO","msg":"Starting system monitor"}
|
| 8 |
+
{"time":"2025-04-22T09:52:20.480344461-07:00","level":"INFO","msg":"stream: closing","id":"e4gyde5j"}
|
| 9 |
+
{"time":"2025-04-22T09:52:20.480372753-07:00","level":"INFO","msg":"Stopping system monitor"}
|
| 10 |
+
{"time":"2025-04-22T09:52:20.480413764-07:00","level":"INFO","msg":"Stopped system monitor"}
|
| 11 |
+
{"time":"2025-04-22T09:52:20.820531468-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 12 |
+
{"time":"2025-04-22T09:52:21.041731773-07:00","level":"INFO","msg":"handler: closed","stream_id":"e4gyde5j"}
|
| 13 |
+
{"time":"2025-04-22T09:52:21.041794947-07:00","level":"INFO","msg":"sender: closed","stream_id":"e4gyde5j"}
|
| 14 |
+
{"time":"2025-04-22T09:52:21.041797381-07:00","level":"INFO","msg":"writer: Close: closed","stream_id":"e4gyde5j"}
|
| 15 |
+
{"time":"2025-04-22T09:52:21.042007493-07:00","level":"INFO","msg":"stream: closed","id":"e4gyde5j"}
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/logs/debug.log
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-04-22 09:52:03,050 INFO MainThread:491736 [wandb_setup.py:_flush():67] Current SDK version is 0.19.9
|
| 2 |
+
2025-04-22 09:52:03,050 INFO MainThread:491736 [wandb_setup.py:_flush():67] Configure stats pid to 491736
|
| 3 |
+
2025-04-22 09:52:03,050 INFO MainThread:491736 [wandb_setup.py:_flush():67] Loading settings from /home/user1/.config/wandb/settings
|
| 4 |
+
2025-04-22 09:52:03,050 INFO MainThread:491736 [wandb_setup.py:_flush():67] Loading settings from /home/user1/arashwork/prismatic-vlms/wandb/settings
|
| 5 |
+
2025-04-22 09:52:03,050 INFO MainThread:491736 [wandb_setup.py:_flush():67] Loading settings from environment variables
|
| 6 |
+
2025-04-22 09:52:03,050 INFO MainThread:491736 [wandb_init.py:setup_run_log_directory():662] Logging user logs to runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250422_095203-e4gyde5j/logs/debug.log
|
| 7 |
+
2025-04-22 09:52:03,051 INFO MainThread:491736 [wandb_init.py:setup_run_log_directory():663] Logging internal logs to runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250422_095203-e4gyde5j/logs/debug-internal.log
|
| 8 |
+
2025-04-22 09:52:03,051 INFO MainThread:491736 [wandb_init.py:init():781] calling init triggers
|
| 9 |
+
2025-04-22 09:52:03,051 INFO MainThread:491736 [wandb_init.py:init():786] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'model': {'type': 'prism-qwen25-extra-dinosiglip-224px+1_5b', 'model_id': 'prism-qwen25-extra-dinosiglip-224px+1_5b', 'arch_specifier': 'no-align+fused-gelu-mlp', 'vision_backbone_id': 'dinosiglip-vit-so-224px', 'llm_backbone_id': 'qwen25-1_5b-extra', 'image_resize_strategy': 'resize-naive', 'llm_max_length': 32768, 'align_epochs': 1, 'align_max_steps': None, 'align_global_batch_size': 8, 'align_per_device_batch_size': 8, 'align_learning_rate': 0.001, 'align_weight_decay': 0.0, 'align_max_grad_norm': 1.0, 'align_lr_scheduler_type': 'linear-warmup+cosine-decay', 'align_warmup_ratio': 0.03, 'align_train_strategy': 'fsdp-shard-grad-op', 'finetune_epochs': 2, 'finetune_max_steps': None, 'finetune_global_batch_size': 2, 'finetune_per_device_batch_size': 2, 'finetune_learning_rate': 2e-05, 'finetune_weight_decay': 0.1, 'finetune_max_grad_norm': 1.0, 'finetune_lr_scheduler_type': 'linear-warmup+cosine-decay', 'finetune_warmup_ratio': 0.03, 'finetune_train_strategy': 'fsdp-full-shard', 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': False}, 'dataset': {'type': 'llava-v15', 'dataset_id': 'llava-v15', 'align_stage_components': ['download/llava-laion-cc-sbu-558k/chat.json', 'download/llava-laion-cc-sbu-558k'], 'finetune_stage_components': ['download/llava-v1.5-instruct/llava_v1_5_mix665k.json', 'download/llava-v1.5-instruct'], 'dataset_root_dir': 'data'}, 'stage': 'finetune', 'pretrained_checkpoint': None, 'run_id': 'prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7', 'run_root_dir': 'runs', 'seed': 7, 'hf_token': 'hf_token.txt', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'Qwen25-Extra-DINOSigLIP-224px-1_5B-full-finetune', 'wandb_entity': 'arash-akbari-stu-northeastern-university', '_wandb': {}}
|
| 11 |
+
2025-04-22 09:52:03,051 INFO MainThread:491736 [wandb_init.py:init():809] starting backend
|
| 12 |
+
2025-04-22 09:52:03,051 INFO MainThread:491736 [wandb_init.py:init():813] sending inform_init request
|
| 13 |
+
2025-04-22 09:52:03,056 INFO MainThread:491736 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 14 |
+
2025-04-22 09:52:03,056 INFO MainThread:491736 [wandb_init.py:init():823] backend started and connected
|
| 15 |
+
2025-04-22 09:52:03,058 INFO MainThread:491736 [wandb_init.py:init():915] updated telemetry
|
| 16 |
+
2025-04-22 09:52:03,064 INFO MainThread:491736 [wandb_init.py:init():939] communicating run to backend with 90.0 second timeout
|
| 17 |
+
2025-04-22 09:52:04,234 INFO MainThread:491736 [wandb_init.py:init():1014] starting run threads in backend
|
| 18 |
+
2025-04-22 09:52:04,313 INFO MainThread:491736 [wandb_run.py:_console_start():2454] atexit reg
|
| 19 |
+
2025-04-22 09:52:04,314 INFO MainThread:491736 [wandb_run.py:_redirect():2306] redirect: wrap_raw
|
| 20 |
+
2025-04-22 09:52:04,314 INFO MainThread:491736 [wandb_run.py:_redirect():2371] Wrapping output streams.
|
| 21 |
+
2025-04-22 09:52:04,314 INFO MainThread:491736 [wandb_run.py:_redirect():2394] Redirects installed.
|
| 22 |
+
2025-04-22 09:52:04,315 INFO MainThread:491736 [wandb_init.py:init():1056] run started, returning control to user process
|
| 23 |
+
2025-04-22 09:52:20,479 INFO MsgRouterThr:491736 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095203-e4gyde5j/run-e4gyde5j.wandb
ADDED
|
Binary file (21.2 kB). View file
|
|
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/files/config.yaml
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_wandb:
|
| 2 |
+
value:
|
| 3 |
+
cli_version: 0.19.9
|
| 4 |
+
m: []
|
| 5 |
+
python_version: 3.10.16
|
| 6 |
+
t:
|
| 7 |
+
"1":
|
| 8 |
+
- 1
|
| 9 |
+
- 11
|
| 10 |
+
- 41
|
| 11 |
+
- 49
|
| 12 |
+
- 55
|
| 13 |
+
- 63
|
| 14 |
+
- 71
|
| 15 |
+
- 98
|
| 16 |
+
"2":
|
| 17 |
+
- 1
|
| 18 |
+
- 11
|
| 19 |
+
- 41
|
| 20 |
+
- 49
|
| 21 |
+
- 55
|
| 22 |
+
- 63
|
| 23 |
+
- 71
|
| 24 |
+
- 98
|
| 25 |
+
"3":
|
| 26 |
+
- 13
|
| 27 |
+
- 16
|
| 28 |
+
- 23
|
| 29 |
+
- 55
|
| 30 |
+
"4": 3.10.16
|
| 31 |
+
"5": 0.19.9
|
| 32 |
+
"6": 4.51.1
|
| 33 |
+
"8":
|
| 34 |
+
- 5
|
| 35 |
+
"12": 0.19.9
|
| 36 |
+
"13": linux-x86_64
|
| 37 |
+
dataset:
|
| 38 |
+
value:
|
| 39 |
+
align_stage_components:
|
| 40 |
+
- download/llava-laion-cc-sbu-558k/chat.json
|
| 41 |
+
- download/llava-laion-cc-sbu-558k
|
| 42 |
+
dataset_id: llava-v15
|
| 43 |
+
dataset_root_dir: data
|
| 44 |
+
finetune_stage_components:
|
| 45 |
+
- download/llava-v1.5-instruct/llava_v1_5_mix665k.json
|
| 46 |
+
- download/llava-v1.5-instruct
|
| 47 |
+
type: llava-v15
|
| 48 |
+
hf_token:
|
| 49 |
+
value: hf_token.txt
|
| 50 |
+
model:
|
| 51 |
+
value:
|
| 52 |
+
align_epochs: 1
|
| 53 |
+
align_global_batch_size: 8
|
| 54 |
+
align_learning_rate: 0.001
|
| 55 |
+
align_lr_scheduler_type: linear-warmup+cosine-decay
|
| 56 |
+
align_max_grad_norm: 1
|
| 57 |
+
align_max_steps: null
|
| 58 |
+
align_per_device_batch_size: 8
|
| 59 |
+
align_train_strategy: fsdp-shard-grad-op
|
| 60 |
+
align_warmup_ratio: 0.03
|
| 61 |
+
align_weight_decay: 0
|
| 62 |
+
arch_specifier: no-align+fused-gelu-mlp
|
| 63 |
+
enable_gradient_checkpointing: true
|
| 64 |
+
enable_mixed_precision_training: true
|
| 65 |
+
finetune_epochs: 2
|
| 66 |
+
finetune_global_batch_size: 128
|
| 67 |
+
finetune_learning_rate: 2e-05
|
| 68 |
+
finetune_lr_scheduler_type: linear-warmup+cosine-decay
|
| 69 |
+
finetune_max_grad_norm: 1
|
| 70 |
+
finetune_max_steps: null
|
| 71 |
+
finetune_per_device_batch_size: 16
|
| 72 |
+
finetune_train_strategy: fsdp-full-shard
|
| 73 |
+
finetune_warmup_ratio: 0.03
|
| 74 |
+
finetune_weight_decay: 0.1
|
| 75 |
+
image_resize_strategy: resize-naive
|
| 76 |
+
llm_backbone_id: qwen25-1_5b-extra
|
| 77 |
+
llm_max_length: 32768
|
| 78 |
+
model_id: prism-qwen25-extra-dinosiglip-224px+1_5b
|
| 79 |
+
reduce_in_full_precision: false
|
| 80 |
+
type: prism-qwen25-extra-dinosiglip-224px+1_5b
|
| 81 |
+
vision_backbone_id: dinosiglip-vit-so-224px
|
| 82 |
+
pretrained_checkpoint:
|
| 83 |
+
value: null
|
| 84 |
+
run_id:
|
| 85 |
+
value: prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7
|
| 86 |
+
run_root_dir:
|
| 87 |
+
value: runs
|
| 88 |
+
seed:
|
| 89 |
+
value: 7
|
| 90 |
+
stage:
|
| 91 |
+
value: finetune
|
| 92 |
+
trackers:
|
| 93 |
+
value:
|
| 94 |
+
- jsonl
|
| 95 |
+
- wandb
|
| 96 |
+
wandb_entity:
|
| 97 |
+
value: arash-akbari-stu-northeastern-university
|
| 98 |
+
wandb_project:
|
| 99 |
+
value: Qwen25-Extra-DINOSigLIP-224px-1_5B-full-finetune
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/files/output.log
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
04/22 [09:58:06] INFO | >> [*] Starting Training Loop pretrain.py:238
|
| 2 |
+
Traceback (most recent call last):
|
| 3 |
+
File "/home/user1/arashwork/prismatic-vlms/scripts/pretrain.py", line 252, in <module>
|
| 4 |
+
pretrain()
|
| 5 |
+
File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/draccus/argparsing.py", line 225, in wrapper_inner
|
| 6 |
+
response = fn(cfg, *args, **kwargs)
|
| 7 |
+
File "/home/user1/arashwork/prismatic-vlms/scripts/pretrain.py", line 239, in pretrain
|
| 8 |
+
train_strategy.run_training(train_dataset, collator, metrics, stage=cfg.stage, seed=cfg.seed)
|
| 9 |
+
File "/home/user1/arashwork/prismatic-vlms/prismatic/training/strategies/base_strategy.py", line 183, in run_training
|
| 10 |
+
output: CausalLMOutputWithPast = self.vlm(
|
| 11 |
+
File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
|
| 12 |
+
return self._call_impl(*args, **kwargs)
|
| 13 |
+
File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
|
| 14 |
+
return forward_call(*args, **kwargs)
|
| 15 |
+
File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 849, in forward
|
| 16 |
+
output = self._fsdp_wrapped_module(*args, **kwargs)
|
| 17 |
+
File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
|
| 18 |
+
return self._call_impl(*args, **kwargs)
|
| 19 |
+
File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
|
| 20 |
+
return forward_call(*args, **kwargs)
|
| 21 |
+
File "/home/user1/arashwork/prismatic-vlms/prismatic/models/vlms/prismatic.py", line 410, in forward
|
| 22 |
+
return self.llm_backbone(
|
| 23 |
+
File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
|
| 24 |
+
return self._call_impl(*args, **kwargs)
|
| 25 |
+
File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
|
| 26 |
+
return forward_call(*args, **kwargs)
|
| 27 |
+
File "/home/user1/arashwork/prismatic-vlms/prismatic/models/backbones/llm/base_llm.py", line 229, in forward
|
| 28 |
+
output: CausalLMOutputWithPast = self.llm(
|
| 29 |
+
File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
|
| 30 |
+
return self._call_impl(*args, **kwargs)
|
| 31 |
+
File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
|
| 32 |
+
return forward_call(*args, **kwargs)
|
| 33 |
+
File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/transformers/utils/generic.py", line 965, in wrapper
|
| 34 |
+
output = func(self, *args, **kwargs)
|
| 35 |
+
File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/transformers/utils/deprecation.py", line 172, in wrapped_func
|
| 36 |
+
return func(*args, **kwargs)
|
| 37 |
+
File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 843, in forward
|
| 38 |
+
loss = self.loss_function(logits=logits, labels=labels, vocab_size=self.config.vocab_size, **kwargs)
|
| 39 |
+
File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/transformers/loss/loss_utils.py", line 63, in ForCausalLMLoss
|
| 40 |
+
loss = fixed_cross_entropy(logits, shift_labels, num_items_in_batch, ignore_index, **kwargs)
|
| 41 |
+
File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/transformers/loss/loss_utils.py", line 35, in fixed_cross_entropy
|
| 42 |
+
loss = nn.functional.cross_entropy(source, target, ignore_index=ignore_index, reduction=reduction)
|
| 43 |
+
File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/nn/functional.py", line 3059, in cross_entropy
|
| 44 |
+
return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
|
| 45 |
+
torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 49.14 GiB. GPU 0 has a total capacity of 93.00 GiB of which 6.57 GiB is free. Including non-PyTorch memory, this process has 86.41 GiB memory in use. Of the allocated memory 83.47 GiB is allocated by PyTorch, and 1.97 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/files/requirements.txt
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pyyaml-include==1.4.1
|
| 2 |
+
torchaudio==2.2.0
|
| 3 |
+
psutil==7.0.0
|
| 4 |
+
nvidia-cufft-cu12==11.0.2.54
|
| 5 |
+
nvidia-nvtx-cu12==12.1.105
|
| 6 |
+
wheel==0.45.1
|
| 7 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
| 8 |
+
pillow==11.1.0
|
| 9 |
+
draccus==0.10.0
|
| 10 |
+
Jinja2==3.1.6
|
| 11 |
+
ninja==1.11.1.4
|
| 12 |
+
MarkupSafe==3.0.2
|
| 13 |
+
nvidia-cublas-cu12==12.1.3.1
|
| 14 |
+
tqdm==4.67.1
|
| 15 |
+
fsspec==2025.3.2
|
| 16 |
+
wandb==0.19.9
|
| 17 |
+
GitPython==3.1.44
|
| 18 |
+
click==8.1.8
|
| 19 |
+
timm==0.9.10
|
| 20 |
+
gitdb==4.0.12
|
| 21 |
+
nvidia-nvjitlink-cu12==12.8.93
|
| 22 |
+
nvidia-cudnn-cu12==8.9.2.26
|
| 23 |
+
mergedeep==1.3.4
|
| 24 |
+
annotated-types==0.7.0
|
| 25 |
+
protobuf==5.29.4
|
| 26 |
+
huggingface-hub==0.30.2
|
| 27 |
+
mdurl==0.1.2
|
| 28 |
+
urllib3==2.3.0
|
| 29 |
+
typing_extensions==4.13.1
|
| 30 |
+
numpy==1.26.4
|
| 31 |
+
torchvision==0.17.0
|
| 32 |
+
nvidia-cusparse-cu12==12.1.0.106
|
| 33 |
+
networkx==3.4.2
|
| 34 |
+
regex==2024.11.6
|
| 35 |
+
mypy-extensions==1.0.0
|
| 36 |
+
pip==25.0
|
| 37 |
+
peft==0.15.1
|
| 38 |
+
sentencepiece==0.2.0
|
| 39 |
+
Pygments==2.19.1
|
| 40 |
+
smmap==5.0.2
|
| 41 |
+
sympy==1.13.3
|
| 42 |
+
setuptools==75.8.0
|
| 43 |
+
nvidia-nccl-cu12==2.19.3
|
| 44 |
+
tokenizers==0.21.1
|
| 45 |
+
charset-normalizer==3.4.1
|
| 46 |
+
typing-inspection==0.4.0
|
| 47 |
+
platformdirs==4.3.7
|
| 48 |
+
packaging==24.2
|
| 49 |
+
setproctitle==1.3.5
|
| 50 |
+
idna==3.10
|
| 51 |
+
markdown-it-py==3.0.0
|
| 52 |
+
safetensors==0.5.3
|
| 53 |
+
rich==14.0.0
|
| 54 |
+
transformers==4.51.1
|
| 55 |
+
requests==2.32.3
|
| 56 |
+
sentry-sdk==2.25.1
|
| 57 |
+
jsonlines==4.0.0
|
| 58 |
+
PyYAML==6.0.2
|
| 59 |
+
pydantic_core==2.33.1
|
| 60 |
+
flash-attn==2.5.5
|
| 61 |
+
mpmath==1.3.0
|
| 62 |
+
attrs==25.3.0
|
| 63 |
+
einops==0.8.1
|
| 64 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
| 65 |
+
nvidia-curand-cu12==10.3.2.106
|
| 66 |
+
filelock==3.18.0
|
| 67 |
+
prismatic==0.0.2
|
| 68 |
+
certifi==2025.1.31
|
| 69 |
+
accelerate==1.6.0
|
| 70 |
+
typing-inspect==0.9.0
|
| 71 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
| 72 |
+
pydantic==2.11.3
|
| 73 |
+
six==1.17.0
|
| 74 |
+
nvidia-cusolver-cu12==11.4.5.107
|
| 75 |
+
torch==2.2.0
|
| 76 |
+
docker-pycreds==0.4.0
|
| 77 |
+
toml==0.10.2
|
| 78 |
+
triton==2.2.0
|
| 79 |
+
importlib_metadata==8.0.0
|
| 80 |
+
tomli==2.0.1
|
| 81 |
+
zipp==3.19.2
|
| 82 |
+
jaraco.context==5.3.0
|
| 83 |
+
inflect==7.3.1
|
| 84 |
+
autocommand==2.2.2
|
| 85 |
+
typing_extensions==4.12.2
|
| 86 |
+
jaraco.collections==5.1.0
|
| 87 |
+
jaraco.functools==4.0.1
|
| 88 |
+
packaging==24.2
|
| 89 |
+
wheel==0.43.0
|
| 90 |
+
backports.tarfile==1.2.0
|
| 91 |
+
platformdirs==4.2.2
|
| 92 |
+
more-itertools==10.3.0
|
| 93 |
+
jaraco.text==3.12.1
|
| 94 |
+
typeguard==4.3.0
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-6.8.0-52-generic-x86_64-with-glibc2.35",
|
| 3 |
+
"python": "CPython 3.10.16",
|
| 4 |
+
"startedAt": "2025-04-22T16:58:05.167030Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--model.type",
|
| 7 |
+
"prism-qwen25-extra-dinosiglip-224px+1_5b",
|
| 8 |
+
"--wandb_project",
|
| 9 |
+
"Qwen25-Extra-DINOSigLIP-224px-1_5B-full-finetune",
|
| 10 |
+
"--wandb_entity",
|
| 11 |
+
"arash-akbari-stu-northeastern-university",
|
| 12 |
+
"--model.enable_mixed_precision_training",
|
| 13 |
+
"True"
|
| 14 |
+
],
|
| 15 |
+
"program": "/home/user1/arashwork/prismatic-vlms/scripts/pretrain.py",
|
| 16 |
+
"codePath": "scripts/pretrain.py",
|
| 17 |
+
"git": {
|
| 18 |
+
"remote": "git@github.com:arashakb/prismatic-vlms.git",
|
| 19 |
+
"commit": "8082f651fe3ba730b13899691a222fdef82d2c66"
|
| 20 |
+
},
|
| 21 |
+
"email": "arash.akbari.stu@gmail.com",
|
| 22 |
+
"root": "runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7",
|
| 23 |
+
"host": "nnmc72",
|
| 24 |
+
"executable": "/home/user1/anaconda3/envs/prism/bin/python",
|
| 25 |
+
"codePathLocal": "scripts/pretrain.py",
|
| 26 |
+
"cpu_count": 48,
|
| 27 |
+
"cpu_count_logical": 96,
|
| 28 |
+
"gpu": "NVIDIA H100 NVL",
|
| 29 |
+
"gpu_count": 8,
|
| 30 |
+
"disk": {
|
| 31 |
+
"/": {
|
| 32 |
+
"total": "30476149334016",
|
| 33 |
+
"used": "18270204944384"
|
| 34 |
+
}
|
| 35 |
+
},
|
| 36 |
+
"memory": {
|
| 37 |
+
"total": "811294752768"
|
| 38 |
+
},
|
| 39 |
+
"cpu": {
|
| 40 |
+
"count": 48,
|
| 41 |
+
"countLogical": 96
|
| 42 |
+
},
|
| 43 |
+
"gpu_nvidia": [
|
| 44 |
+
{
|
| 45 |
+
"name": "NVIDIA H100 NVL",
|
| 46 |
+
"memoryTotal": "100485038080",
|
| 47 |
+
"cudaCores": 16896,
|
| 48 |
+
"architecture": "Hopper"
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "NVIDIA H100 NVL",
|
| 52 |
+
"memoryTotal": "100485038080",
|
| 53 |
+
"cudaCores": 16896,
|
| 54 |
+
"architecture": "Hopper"
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"name": "NVIDIA H100 NVL",
|
| 58 |
+
"memoryTotal": "100485038080",
|
| 59 |
+
"cudaCores": 16896,
|
| 60 |
+
"architecture": "Hopper"
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"name": "NVIDIA H100 NVL",
|
| 64 |
+
"memoryTotal": "100485038080",
|
| 65 |
+
"cudaCores": 16896,
|
| 66 |
+
"architecture": "Hopper"
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"name": "NVIDIA H100 NVL",
|
| 70 |
+
"memoryTotal": "100485038080",
|
| 71 |
+
"cudaCores": 16896,
|
| 72 |
+
"architecture": "Hopper"
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"name": "NVIDIA H100 NVL",
|
| 76 |
+
"memoryTotal": "100485038080",
|
| 77 |
+
"cudaCores": 16896,
|
| 78 |
+
"architecture": "Hopper"
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"name": "NVIDIA H100 NVL",
|
| 82 |
+
"memoryTotal": "100485038080",
|
| 83 |
+
"cudaCores": 16896,
|
| 84 |
+
"architecture": "Hopper"
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"name": "NVIDIA H100 NVL",
|
| 88 |
+
"memoryTotal": "100485038080",
|
| 89 |
+
"cudaCores": 16896,
|
| 90 |
+
"architecture": "Hopper"
|
| 91 |
+
}
|
| 92 |
+
],
|
| 93 |
+
"cudaVersion": "12.4"
|
| 94 |
+
}
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_wandb":{"runtime":15}}
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/logs/debug-core.log
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-04-22T09:58:04.848042565-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp7evbevo7/port-494110.txt","pid":494110,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
|
| 2 |
+
{"time":"2025-04-22T09:58:04.849131413-07:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":494110}
|
| 3 |
+
{"time":"2025-04-22T09:58:04.849105444-07:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":34993,"Zone":""}}
|
| 4 |
+
{"time":"2025-04-22T09:58:05.037650239-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:42808"}
|
| 5 |
+
{"time":"2025-04-22T09:58:05.168079456-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"cpx6iuc8","id":"127.0.0.1:42808"}
|
| 6 |
+
{"time":"2025-04-22T09:58:05.747564085-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"cpx6iuc8","id":"127.0.0.1:42808"}
|
| 7 |
+
{"time":"2025-04-22T09:58:21.093776359-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:42808"}
|
| 8 |
+
{"time":"2025-04-22T09:58:21.093854295-07:00","level":"INFO","msg":"server is shutting down"}
|
| 9 |
+
{"time":"2025-04-22T09:58:21.093848236-07:00","level":"INFO","msg":"connection: closing","id":"127.0.0.1:42808"}
|
| 10 |
+
{"time":"2025-04-22T09:58:21.094006501-07:00","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:42808"}
|
| 11 |
+
{"time":"2025-04-22T09:58:21.1669656-07:00","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:34993->127.0.0.1:42808: use of closed network connection","id":"127.0.0.1:42808"}
|
| 12 |
+
{"time":"2025-04-22T09:58:21.728742764-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:42808"}
|
| 13 |
+
{"time":"2025-04-22T09:58:21.728765407-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:42808"}
|
| 14 |
+
{"time":"2025-04-22T09:58:21.728779829-07:00","level":"INFO","msg":"server is closed"}
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-04-22T09:58:05.168284731-07:00","level":"INFO","msg":"stream: starting","core version":"0.19.9","symlink path":"runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250422_095805-cpx6iuc8/logs/debug-core.log"}
|
| 2 |
+
{"time":"2025-04-22T09:58:05.74750679-07:00","level":"INFO","msg":"created new stream","id":"cpx6iuc8"}
|
| 3 |
+
{"time":"2025-04-22T09:58:05.747559949-07:00","level":"INFO","msg":"stream: started","id":"cpx6iuc8"}
|
| 4 |
+
{"time":"2025-04-22T09:58:05.74760112-07:00","level":"INFO","msg":"sender: started","stream_id":"cpx6iuc8"}
|
| 5 |
+
{"time":"2025-04-22T09:58:05.747609823-07:00","level":"INFO","msg":"writer: Do: started","stream_id":"cpx6iuc8"}
|
| 6 |
+
{"time":"2025-04-22T09:58:05.747646928-07:00","level":"INFO","msg":"handler: started","stream_id":"cpx6iuc8"}
|
| 7 |
+
{"time":"2025-04-22T09:58:06.048552657-07:00","level":"INFO","msg":"Starting system monitor"}
|
| 8 |
+
{"time":"2025-04-22T09:58:21.093879462-07:00","level":"INFO","msg":"stream: closing","id":"cpx6iuc8"}
|
| 9 |
+
{"time":"2025-04-22T09:58:21.093897119-07:00","level":"INFO","msg":"Stopping system monitor"}
|
| 10 |
+
{"time":"2025-04-22T09:58:21.093946622-07:00","level":"INFO","msg":"Stopped system monitor"}
|
| 11 |
+
{"time":"2025-04-22T09:58:21.630469097-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 12 |
+
{"time":"2025-04-22T09:58:21.728396559-07:00","level":"INFO","msg":"handler: closed","stream_id":"cpx6iuc8"}
|
| 13 |
+
{"time":"2025-04-22T09:58:21.728458532-07:00","level":"INFO","msg":"writer: Close: closed","stream_id":"cpx6iuc8"}
|
| 14 |
+
{"time":"2025-04-22T09:58:21.728467014-07:00","level":"INFO","msg":"sender: closed","stream_id":"cpx6iuc8"}
|
| 15 |
+
{"time":"2025-04-22T09:58:21.728627693-07:00","level":"INFO","msg":"stream: closed","id":"cpx6iuc8"}
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/logs/debug.log
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-04-22 09:58:05,162 INFO MainThread:494110 [wandb_setup.py:_flush():67] Current SDK version is 0.19.9
|
| 2 |
+
2025-04-22 09:58:05,162 INFO MainThread:494110 [wandb_setup.py:_flush():67] Configure stats pid to 494110
|
| 3 |
+
2025-04-22 09:58:05,162 INFO MainThread:494110 [wandb_setup.py:_flush():67] Loading settings from /home/user1/.config/wandb/settings
|
| 4 |
+
2025-04-22 09:58:05,162 INFO MainThread:494110 [wandb_setup.py:_flush():67] Loading settings from /home/user1/arashwork/prismatic-vlms/wandb/settings
|
| 5 |
+
2025-04-22 09:58:05,162 INFO MainThread:494110 [wandb_setup.py:_flush():67] Loading settings from environment variables
|
| 6 |
+
2025-04-22 09:58:05,162 INFO MainThread:494110 [wandb_init.py:setup_run_log_directory():662] Logging user logs to runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250422_095805-cpx6iuc8/logs/debug.log
|
| 7 |
+
2025-04-22 09:58:05,162 INFO MainThread:494110 [wandb_init.py:setup_run_log_directory():663] Logging internal logs to runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250422_095805-cpx6iuc8/logs/debug-internal.log
|
| 8 |
+
2025-04-22 09:58:05,162 INFO MainThread:494110 [wandb_init.py:init():781] calling init triggers
|
| 9 |
+
2025-04-22 09:58:05,162 INFO MainThread:494110 [wandb_init.py:init():786] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'model': {'type': 'prism-qwen25-extra-dinosiglip-224px+1_5b', 'model_id': 'prism-qwen25-extra-dinosiglip-224px+1_5b', 'arch_specifier': 'no-align+fused-gelu-mlp', 'vision_backbone_id': 'dinosiglip-vit-so-224px', 'llm_backbone_id': 'qwen25-1_5b-extra', 'image_resize_strategy': 'resize-naive', 'llm_max_length': 32768, 'align_epochs': 1, 'align_max_steps': None, 'align_global_batch_size': 8, 'align_per_device_batch_size': 8, 'align_learning_rate': 0.001, 'align_weight_decay': 0.0, 'align_max_grad_norm': 1.0, 'align_lr_scheduler_type': 'linear-warmup+cosine-decay', 'align_warmup_ratio': 0.03, 'align_train_strategy': 'fsdp-shard-grad-op', 'finetune_epochs': 2, 'finetune_max_steps': None, 'finetune_global_batch_size': 128, 'finetune_per_device_batch_size': 16, 'finetune_learning_rate': 2e-05, 'finetune_weight_decay': 0.1, 'finetune_max_grad_norm': 1.0, 'finetune_lr_scheduler_type': 'linear-warmup+cosine-decay', 'finetune_warmup_ratio': 0.03, 'finetune_train_strategy': 'fsdp-full-shard', 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': False}, 'dataset': {'type': 'llava-v15', 'dataset_id': 'llava-v15', 'align_stage_components': ['download/llava-laion-cc-sbu-558k/chat.json', 'download/llava-laion-cc-sbu-558k'], 'finetune_stage_components': ['download/llava-v1.5-instruct/llava_v1_5_mix665k.json', 'download/llava-v1.5-instruct'], 'dataset_root_dir': 'data'}, 'stage': 'finetune', 'pretrained_checkpoint': None, 'run_id': 'prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7', 'run_root_dir': 'runs', 'seed': 7, 'hf_token': 'hf_token.txt', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'Qwen25-Extra-DINOSigLIP-224px-1_5B-full-finetune', 'wandb_entity': 'arash-akbari-stu-northeastern-university', '_wandb': {}}
|
| 11 |
+
2025-04-22 09:58:05,162 INFO MainThread:494110 [wandb_init.py:init():809] starting backend
|
| 12 |
+
2025-04-22 09:58:05,162 INFO MainThread:494110 [wandb_init.py:init():813] sending inform_init request
|
| 13 |
+
2025-04-22 09:58:05,166 INFO MainThread:494110 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 14 |
+
2025-04-22 09:58:05,166 INFO MainThread:494110 [wandb_init.py:init():823] backend started and connected
|
| 15 |
+
2025-04-22 09:58:05,169 INFO MainThread:494110 [wandb_init.py:init():915] updated telemetry
|
| 16 |
+
2025-04-22 09:58:05,175 INFO MainThread:494110 [wandb_init.py:init():939] communicating run to backend with 90.0 second timeout
|
| 17 |
+
2025-04-22 09:58:06,046 INFO MainThread:494110 [wandb_init.py:init():1014] starting run threads in backend
|
| 18 |
+
2025-04-22 09:58:06,086 INFO MainThread:494110 [wandb_run.py:_console_start():2454] atexit reg
|
| 19 |
+
2025-04-22 09:58:06,086 INFO MainThread:494110 [wandb_run.py:_redirect():2306] redirect: wrap_raw
|
| 20 |
+
2025-04-22 09:58:06,087 INFO MainThread:494110 [wandb_run.py:_redirect():2371] Wrapping output streams.
|
| 21 |
+
2025-04-22 09:58:06,087 INFO MainThread:494110 [wandb_run.py:_redirect():2394] Redirects installed.
|
| 22 |
+
2025-04-22 09:58:06,088 INFO MainThread:494110 [wandb_init.py:init():1056] run started, returning control to user process
|
| 23 |
+
2025-04-22 09:58:21,093 INFO MsgRouterThr:494110 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 2 handles.
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_095805-cpx6iuc8/run-cpx6iuc8.wandb
ADDED
|
Binary file (13.9 kB). View file
|
|
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/files/config.yaml
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_wandb:
|
| 2 |
+
value:
|
| 3 |
+
cli_version: 0.19.9
|
| 4 |
+
m: []
|
| 5 |
+
python_version: 3.10.16
|
| 6 |
+
t:
|
| 7 |
+
"1":
|
| 8 |
+
- 1
|
| 9 |
+
- 11
|
| 10 |
+
- 41
|
| 11 |
+
- 49
|
| 12 |
+
- 55
|
| 13 |
+
- 63
|
| 14 |
+
- 71
|
| 15 |
+
- 98
|
| 16 |
+
"2":
|
| 17 |
+
- 1
|
| 18 |
+
- 11
|
| 19 |
+
- 41
|
| 20 |
+
- 49
|
| 21 |
+
- 55
|
| 22 |
+
- 63
|
| 23 |
+
- 71
|
| 24 |
+
- 98
|
| 25 |
+
"3":
|
| 26 |
+
- 13
|
| 27 |
+
- 16
|
| 28 |
+
- 23
|
| 29 |
+
- 55
|
| 30 |
+
"4": 3.10.16
|
| 31 |
+
"5": 0.19.9
|
| 32 |
+
"6": 4.51.1
|
| 33 |
+
"8":
|
| 34 |
+
- 5
|
| 35 |
+
"12": 0.19.9
|
| 36 |
+
"13": linux-x86_64
|
| 37 |
+
dataset:
|
| 38 |
+
value:
|
| 39 |
+
align_stage_components:
|
| 40 |
+
- download/llava-laion-cc-sbu-558k/chat.json
|
| 41 |
+
- download/llava-laion-cc-sbu-558k
|
| 42 |
+
dataset_id: llava-v15
|
| 43 |
+
dataset_root_dir: data
|
| 44 |
+
finetune_stage_components:
|
| 45 |
+
- download/llava-v1.5-instruct/llava_v1_5_mix665k.json
|
| 46 |
+
- download/llava-v1.5-instruct
|
| 47 |
+
type: llava-v15
|
| 48 |
+
hf_token:
|
| 49 |
+
value: hf_token.txt
|
| 50 |
+
model:
|
| 51 |
+
value:
|
| 52 |
+
align_epochs: 1
|
| 53 |
+
align_global_batch_size: 8
|
| 54 |
+
align_learning_rate: 0.001
|
| 55 |
+
align_lr_scheduler_type: linear-warmup+cosine-decay
|
| 56 |
+
align_max_grad_norm: 1
|
| 57 |
+
align_max_steps: null
|
| 58 |
+
align_per_device_batch_size: 8
|
| 59 |
+
align_train_strategy: fsdp-shard-grad-op
|
| 60 |
+
align_warmup_ratio: 0.03
|
| 61 |
+
align_weight_decay: 0
|
| 62 |
+
arch_specifier: no-align+fused-gelu-mlp
|
| 63 |
+
enable_gradient_checkpointing: true
|
| 64 |
+
enable_mixed_precision_training: true
|
| 65 |
+
finetune_epochs: 2
|
| 66 |
+
finetune_global_batch_size: 64
|
| 67 |
+
finetune_learning_rate: 2e-05
|
| 68 |
+
finetune_lr_scheduler_type: linear-warmup+cosine-decay
|
| 69 |
+
finetune_max_grad_norm: 1
|
| 70 |
+
finetune_max_steps: null
|
| 71 |
+
finetune_per_device_batch_size: 8
|
| 72 |
+
finetune_train_strategy: fsdp-full-shard
|
| 73 |
+
finetune_warmup_ratio: 0.03
|
| 74 |
+
finetune_weight_decay: 0.1
|
| 75 |
+
image_resize_strategy: resize-naive
|
| 76 |
+
llm_backbone_id: qwen25-1_5b-extra
|
| 77 |
+
llm_max_length: 32768
|
| 78 |
+
model_id: prism-qwen25-extra-dinosiglip-224px+1_5b
|
| 79 |
+
reduce_in_full_precision: false
|
| 80 |
+
type: prism-qwen25-extra-dinosiglip-224px+1_5b
|
| 81 |
+
vision_backbone_id: dinosiglip-vit-so-224px
|
| 82 |
+
pretrained_checkpoint:
|
| 83 |
+
value: null
|
| 84 |
+
run_id:
|
| 85 |
+
value: prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7
|
| 86 |
+
run_root_dir:
|
| 87 |
+
value: runs
|
| 88 |
+
seed:
|
| 89 |
+
value: 7
|
| 90 |
+
stage:
|
| 91 |
+
value: finetune
|
| 92 |
+
trackers:
|
| 93 |
+
value:
|
| 94 |
+
- jsonl
|
| 95 |
+
- wandb
|
| 96 |
+
wandb_entity:
|
| 97 |
+
value: arash-akbari-stu-northeastern-university
|
| 98 |
+
wandb_project:
|
| 99 |
+
value: Qwen25-Extra-DINOSigLIP-224px-1_5B-full-finetune
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/files/output.log
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
04/22 [10:00:10] INFO | >> [*] Starting Training Loop pretrain.py:238
|
| 2 |
+
Traceback (most recent call last):
|
| 3 |
+
File "/home/user1/arashwork/prismatic-vlms/scripts/pretrain.py", line 252, in <module>
|
| 4 |
+
pretrain()
|
| 5 |
+
File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/draccus/argparsing.py", line 225, in wrapper_inner
|
| 6 |
+
response = fn(cfg, *args, **kwargs)
|
| 7 |
+
File "/home/user1/arashwork/prismatic-vlms/scripts/pretrain.py", line 239, in pretrain
|
| 8 |
+
train_strategy.run_training(train_dataset, collator, metrics, stage=cfg.stage, seed=cfg.seed)
|
| 9 |
+
File "/home/user1/arashwork/prismatic-vlms/prismatic/training/strategies/base_strategy.py", line 208, in run_training
|
| 10 |
+
normalized_loss.backward()
|
| 11 |
+
File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/_tensor.py", line 522, in backward
|
| 12 |
+
torch.autograd.backward(
|
| 13 |
+
File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/autograd/__init__.py", line 266, in backward
|
| 14 |
+
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
| 15 |
+
torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 24.57 GiB. GPU 0 has a total capacity of 93.00 GiB of which 23.65 GiB is free. Including non-PyTorch memory, this process has 69.33 GiB memory in use. Of the allocated memory 66.95 GiB is allocated by PyTorch, and 1.40 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/files/requirements.txt
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pyyaml-include==1.4.1
|
| 2 |
+
torchaudio==2.2.0
|
| 3 |
+
psutil==7.0.0
|
| 4 |
+
nvidia-cufft-cu12==11.0.2.54
|
| 5 |
+
nvidia-nvtx-cu12==12.1.105
|
| 6 |
+
wheel==0.45.1
|
| 7 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
| 8 |
+
pillow==11.1.0
|
| 9 |
+
draccus==0.10.0
|
| 10 |
+
Jinja2==3.1.6
|
| 11 |
+
ninja==1.11.1.4
|
| 12 |
+
MarkupSafe==3.0.2
|
| 13 |
+
nvidia-cublas-cu12==12.1.3.1
|
| 14 |
+
tqdm==4.67.1
|
| 15 |
+
fsspec==2025.3.2
|
| 16 |
+
wandb==0.19.9
|
| 17 |
+
GitPython==3.1.44
|
| 18 |
+
click==8.1.8
|
| 19 |
+
timm==0.9.10
|
| 20 |
+
gitdb==4.0.12
|
| 21 |
+
nvidia-nvjitlink-cu12==12.8.93
|
| 22 |
+
nvidia-cudnn-cu12==8.9.2.26
|
| 23 |
+
mergedeep==1.3.4
|
| 24 |
+
annotated-types==0.7.0
|
| 25 |
+
protobuf==5.29.4
|
| 26 |
+
huggingface-hub==0.30.2
|
| 27 |
+
mdurl==0.1.2
|
| 28 |
+
urllib3==2.3.0
|
| 29 |
+
typing_extensions==4.13.1
|
| 30 |
+
numpy==1.26.4
|
| 31 |
+
torchvision==0.17.0
|
| 32 |
+
nvidia-cusparse-cu12==12.1.0.106
|
| 33 |
+
networkx==3.4.2
|
| 34 |
+
regex==2024.11.6
|
| 35 |
+
mypy-extensions==1.0.0
|
| 36 |
+
pip==25.0
|
| 37 |
+
peft==0.15.1
|
| 38 |
+
sentencepiece==0.2.0
|
| 39 |
+
Pygments==2.19.1
|
| 40 |
+
smmap==5.0.2
|
| 41 |
+
sympy==1.13.3
|
| 42 |
+
setuptools==75.8.0
|
| 43 |
+
nvidia-nccl-cu12==2.19.3
|
| 44 |
+
tokenizers==0.21.1
|
| 45 |
+
charset-normalizer==3.4.1
|
| 46 |
+
typing-inspection==0.4.0
|
| 47 |
+
platformdirs==4.3.7
|
| 48 |
+
packaging==24.2
|
| 49 |
+
setproctitle==1.3.5
|
| 50 |
+
idna==3.10
|
| 51 |
+
markdown-it-py==3.0.0
|
| 52 |
+
safetensors==0.5.3
|
| 53 |
+
rich==14.0.0
|
| 54 |
+
transformers==4.51.1
|
| 55 |
+
requests==2.32.3
|
| 56 |
+
sentry-sdk==2.25.1
|
| 57 |
+
jsonlines==4.0.0
|
| 58 |
+
PyYAML==6.0.2
|
| 59 |
+
pydantic_core==2.33.1
|
| 60 |
+
flash-attn==2.5.5
|
| 61 |
+
mpmath==1.3.0
|
| 62 |
+
attrs==25.3.0
|
| 63 |
+
einops==0.8.1
|
| 64 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
| 65 |
+
nvidia-curand-cu12==10.3.2.106
|
| 66 |
+
filelock==3.18.0
|
| 67 |
+
prismatic==0.0.2
|
| 68 |
+
certifi==2025.1.31
|
| 69 |
+
accelerate==1.6.0
|
| 70 |
+
typing-inspect==0.9.0
|
| 71 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
| 72 |
+
pydantic==2.11.3
|
| 73 |
+
six==1.17.0
|
| 74 |
+
nvidia-cusolver-cu12==11.4.5.107
|
| 75 |
+
torch==2.2.0
|
| 76 |
+
docker-pycreds==0.4.0
|
| 77 |
+
toml==0.10.2
|
| 78 |
+
triton==2.2.0
|
| 79 |
+
importlib_metadata==8.0.0
|
| 80 |
+
tomli==2.0.1
|
| 81 |
+
zipp==3.19.2
|
| 82 |
+
jaraco.context==5.3.0
|
| 83 |
+
inflect==7.3.1
|
| 84 |
+
autocommand==2.2.2
|
| 85 |
+
typing_extensions==4.12.2
|
| 86 |
+
jaraco.collections==5.1.0
|
| 87 |
+
jaraco.functools==4.0.1
|
| 88 |
+
packaging==24.2
|
| 89 |
+
wheel==0.43.0
|
| 90 |
+
backports.tarfile==1.2.0
|
| 91 |
+
platformdirs==4.2.2
|
| 92 |
+
more-itertools==10.3.0
|
| 93 |
+
jaraco.text==3.12.1
|
| 94 |
+
typeguard==4.3.0
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-6.8.0-52-generic-x86_64-with-glibc2.35",
|
| 3 |
+
"python": "CPython 3.10.16",
|
| 4 |
+
"startedAt": "2025-04-22T17:00:09.283012Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--model.type",
|
| 7 |
+
"prism-qwen25-extra-dinosiglip-224px+1_5b",
|
| 8 |
+
"--wandb_project",
|
| 9 |
+
"Qwen25-Extra-DINOSigLIP-224px-1_5B-full-finetune",
|
| 10 |
+
"--wandb_entity",
|
| 11 |
+
"arash-akbari-stu-northeastern-university",
|
| 12 |
+
"--model.enable_mixed_precision_training",
|
| 13 |
+
"True"
|
| 14 |
+
],
|
| 15 |
+
"program": "/home/user1/arashwork/prismatic-vlms/scripts/pretrain.py",
|
| 16 |
+
"codePath": "scripts/pretrain.py",
|
| 17 |
+
"git": {
|
| 18 |
+
"remote": "git@github.com:arashakb/prismatic-vlms.git",
|
| 19 |
+
"commit": "8082f651fe3ba730b13899691a222fdef82d2c66"
|
| 20 |
+
},
|
| 21 |
+
"email": "arash.akbari.stu@gmail.com",
|
| 22 |
+
"root": "runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7",
|
| 23 |
+
"host": "nnmc72",
|
| 24 |
+
"executable": "/home/user1/anaconda3/envs/prism/bin/python",
|
| 25 |
+
"codePathLocal": "scripts/pretrain.py",
|
| 26 |
+
"cpu_count": 48,
|
| 27 |
+
"cpu_count_logical": 96,
|
| 28 |
+
"gpu": "NVIDIA H100 NVL",
|
| 29 |
+
"gpu_count": 8,
|
| 30 |
+
"disk": {
|
| 31 |
+
"/": {
|
| 32 |
+
"total": "30476149334016",
|
| 33 |
+
"used": "18270205763584"
|
| 34 |
+
}
|
| 35 |
+
},
|
| 36 |
+
"memory": {
|
| 37 |
+
"total": "811294752768"
|
| 38 |
+
},
|
| 39 |
+
"cpu": {
|
| 40 |
+
"count": 48,
|
| 41 |
+
"countLogical": 96
|
| 42 |
+
},
|
| 43 |
+
"gpu_nvidia": [
|
| 44 |
+
{
|
| 45 |
+
"name": "NVIDIA H100 NVL",
|
| 46 |
+
"memoryTotal": "100485038080",
|
| 47 |
+
"cudaCores": 16896,
|
| 48 |
+
"architecture": "Hopper"
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "NVIDIA H100 NVL",
|
| 52 |
+
"memoryTotal": "100485038080",
|
| 53 |
+
"cudaCores": 16896,
|
| 54 |
+
"architecture": "Hopper"
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"name": "NVIDIA H100 NVL",
|
| 58 |
+
"memoryTotal": "100485038080",
|
| 59 |
+
"cudaCores": 16896,
|
| 60 |
+
"architecture": "Hopper"
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"name": "NVIDIA H100 NVL",
|
| 64 |
+
"memoryTotal": "100485038080",
|
| 65 |
+
"cudaCores": 16896,
|
| 66 |
+
"architecture": "Hopper"
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"name": "NVIDIA H100 NVL",
|
| 70 |
+
"memoryTotal": "100485038080",
|
| 71 |
+
"cudaCores": 16896,
|
| 72 |
+
"architecture": "Hopper"
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"name": "NVIDIA H100 NVL",
|
| 76 |
+
"memoryTotal": "100485038080",
|
| 77 |
+
"cudaCores": 16896,
|
| 78 |
+
"architecture": "Hopper"
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"name": "NVIDIA H100 NVL",
|
| 82 |
+
"memoryTotal": "100485038080",
|
| 83 |
+
"cudaCores": 16896,
|
| 84 |
+
"architecture": "Hopper"
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"name": "NVIDIA H100 NVL",
|
| 88 |
+
"memoryTotal": "100485038080",
|
| 89 |
+
"cudaCores": 16896,
|
| 90 |
+
"architecture": "Hopper"
|
| 91 |
+
}
|
| 92 |
+
],
|
| 93 |
+
"cudaVersion": "12.4"
|
| 94 |
+
}
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_wandb":{"runtime":13}}
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/logs/debug-core.log
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-04-22T10:00:08.948681376-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpki64cq_w/port-497136.txt","pid":497136,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
|
| 2 |
+
{"time":"2025-04-22T10:00:08.949621573-07:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":497136}
|
| 3 |
+
{"time":"2025-04-22T10:00:08.949626079-07:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":44241,"Zone":""}}
|
| 4 |
+
{"time":"2025-04-22T10:00:09.137709691-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:56432"}
|
| 5 |
+
{"time":"2025-04-22T10:00:09.285230582-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"722cxxmu","id":"127.0.0.1:56432"}
|
| 6 |
+
{"time":"2025-04-22T10:00:09.9820411-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"722cxxmu","id":"127.0.0.1:56432"}
|
| 7 |
+
{"time":"2025-04-22T10:00:22.283734161-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:56432"}
|
| 8 |
+
{"time":"2025-04-22T10:00:22.283869291-07:00","level":"INFO","msg":"server is shutting down"}
|
| 9 |
+
{"time":"2025-04-22T10:00:22.283862411-07:00","level":"INFO","msg":"connection: closing","id":"127.0.0.1:56432"}
|
| 10 |
+
{"time":"2025-04-22T10:00:22.28400978-07:00","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:56432"}
|
| 11 |
+
{"time":"2025-04-22T10:00:22.820172517-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:56432"}
|
| 12 |
+
{"time":"2025-04-22T10:00:22.820205436-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:56432"}
|
| 13 |
+
{"time":"2025-04-22T10:00:22.820226758-07:00","level":"INFO","msg":"server is closed"}
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-04-22T10:00:09.285532831-07:00","level":"INFO","msg":"stream: starting","core version":"0.19.9","symlink path":"runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250422_100009-722cxxmu/logs/debug-core.log"}
|
| 2 |
+
{"time":"2025-04-22T10:00:09.98198035-07:00","level":"INFO","msg":"created new stream","id":"722cxxmu"}
|
| 3 |
+
{"time":"2025-04-22T10:00:09.98203457-07:00","level":"INFO","msg":"stream: started","id":"722cxxmu"}
|
| 4 |
+
{"time":"2025-04-22T10:00:09.982109321-07:00","level":"INFO","msg":"writer: Do: started","stream_id":"722cxxmu"}
|
| 5 |
+
{"time":"2025-04-22T10:00:09.982130132-07:00","level":"INFO","msg":"handler: started","stream_id":"722cxxmu"}
|
| 6 |
+
{"time":"2025-04-22T10:00:09.982171173-07:00","level":"INFO","msg":"sender: started","stream_id":"722cxxmu"}
|
| 7 |
+
{"time":"2025-04-22T10:00:10.212439726-07:00","level":"INFO","msg":"Starting system monitor"}
|
| 8 |
+
{"time":"2025-04-22T10:00:22.283892446-07:00","level":"INFO","msg":"stream: closing","id":"722cxxmu"}
|
| 9 |
+
{"time":"2025-04-22T10:00:22.283943562-07:00","level":"INFO","msg":"Stopping system monitor"}
|
| 10 |
+
{"time":"2025-04-22T10:00:22.284020005-07:00","level":"INFO","msg":"Stopped system monitor"}
|
| 11 |
+
{"time":"2025-04-22T10:00:22.67777009-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 12 |
+
{"time":"2025-04-22T10:00:22.819743269-07:00","level":"INFO","msg":"handler: closed","stream_id":"722cxxmu"}
|
| 13 |
+
{"time":"2025-04-22T10:00:22.819852882-07:00","level":"INFO","msg":"sender: closed","stream_id":"722cxxmu"}
|
| 14 |
+
{"time":"2025-04-22T10:00:22.819847424-07:00","level":"INFO","msg":"writer: Close: closed","stream_id":"722cxxmu"}
|
| 15 |
+
{"time":"2025-04-22T10:00:22.819995704-07:00","level":"INFO","msg":"stream: closed","id":"722cxxmu"}
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/logs/debug.log
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-04-22 10:00:09,276 INFO MainThread:497136 [wandb_setup.py:_flush():67] Current SDK version is 0.19.9
|
| 2 |
+
2025-04-22 10:00:09,276 INFO MainThread:497136 [wandb_setup.py:_flush():67] Configure stats pid to 497136
|
| 3 |
+
2025-04-22 10:00:09,276 INFO MainThread:497136 [wandb_setup.py:_flush():67] Loading settings from /home/user1/.config/wandb/settings
|
| 4 |
+
2025-04-22 10:00:09,276 INFO MainThread:497136 [wandb_setup.py:_flush():67] Loading settings from /home/user1/arashwork/prismatic-vlms/wandb/settings
|
| 5 |
+
2025-04-22 10:00:09,276 INFO MainThread:497136 [wandb_setup.py:_flush():67] Loading settings from environment variables
|
| 6 |
+
2025-04-22 10:00:09,276 INFO MainThread:497136 [wandb_init.py:setup_run_log_directory():662] Logging user logs to runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250422_100009-722cxxmu/logs/debug.log
|
| 7 |
+
2025-04-22 10:00:09,276 INFO MainThread:497136 [wandb_init.py:setup_run_log_directory():663] Logging internal logs to runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7/wandb/run-20250422_100009-722cxxmu/logs/debug-internal.log
|
| 8 |
+
2025-04-22 10:00:09,276 INFO MainThread:497136 [wandb_init.py:init():781] calling init triggers
|
| 9 |
+
2025-04-22 10:00:09,277 INFO MainThread:497136 [wandb_init.py:init():786] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'model': {'type': 'prism-qwen25-extra-dinosiglip-224px+1_5b', 'model_id': 'prism-qwen25-extra-dinosiglip-224px+1_5b', 'arch_specifier': 'no-align+fused-gelu-mlp', 'vision_backbone_id': 'dinosiglip-vit-so-224px', 'llm_backbone_id': 'qwen25-1_5b-extra', 'image_resize_strategy': 'resize-naive', 'llm_max_length': 32768, 'align_epochs': 1, 'align_max_steps': None, 'align_global_batch_size': 8, 'align_per_device_batch_size': 8, 'align_learning_rate': 0.001, 'align_weight_decay': 0.0, 'align_max_grad_norm': 1.0, 'align_lr_scheduler_type': 'linear-warmup+cosine-decay', 'align_warmup_ratio': 0.03, 'align_train_strategy': 'fsdp-shard-grad-op', 'finetune_epochs': 2, 'finetune_max_steps': None, 'finetune_global_batch_size': 64, 'finetune_per_device_batch_size': 8, 'finetune_learning_rate': 2e-05, 'finetune_weight_decay': 0.1, 'finetune_max_grad_norm': 1.0, 'finetune_lr_scheduler_type': 'linear-warmup+cosine-decay', 'finetune_warmup_ratio': 0.03, 'finetune_train_strategy': 'fsdp-full-shard', 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': False}, 'dataset': {'type': 'llava-v15', 'dataset_id': 'llava-v15', 'align_stage_components': ['download/llava-laion-cc-sbu-558k/chat.json', 'download/llava-laion-cc-sbu-558k'], 'finetune_stage_components': ['download/llava-v1.5-instruct/llava_v1_5_mix665k.json', 'download/llava-v1.5-instruct'], 'dataset_root_dir': 'data'}, 'stage': 'finetune', 'pretrained_checkpoint': None, 'run_id': 'prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7', 'run_root_dir': 'runs', 'seed': 7, 'hf_token': 'hf_token.txt', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'Qwen25-Extra-DINOSigLIP-224px-1_5B-full-finetune', 'wandb_entity': 'arash-akbari-stu-northeastern-university', '_wandb': {}}
|
| 11 |
+
2025-04-22 10:00:09,277 INFO MainThread:497136 [wandb_init.py:init():809] starting backend
|
| 12 |
+
2025-04-22 10:00:09,277 INFO MainThread:497136 [wandb_init.py:init():813] sending inform_init request
|
| 13 |
+
2025-04-22 10:00:09,282 INFO MainThread:497136 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 14 |
+
2025-04-22 10:00:09,282 INFO MainThread:497136 [wandb_init.py:init():823] backend started and connected
|
| 15 |
+
2025-04-22 10:00:09,285 INFO MainThread:497136 [wandb_init.py:init():915] updated telemetry
|
| 16 |
+
2025-04-22 10:00:09,290 INFO MainThread:497136 [wandb_init.py:init():939] communicating run to backend with 90.0 second timeout
|
| 17 |
+
2025-04-22 10:00:10,209 INFO MainThread:497136 [wandb_init.py:init():1014] starting run threads in backend
|
| 18 |
+
2025-04-22 10:00:10,279 INFO MainThread:497136 [wandb_run.py:_console_start():2454] atexit reg
|
| 19 |
+
2025-04-22 10:00:10,279 INFO MainThread:497136 [wandb_run.py:_redirect():2306] redirect: wrap_raw
|
| 20 |
+
2025-04-22 10:00:10,280 INFO MainThread:497136 [wandb_run.py:_redirect():2371] Wrapping output streams.
|
| 21 |
+
2025-04-22 10:00:10,280 INFO MainThread:497136 [wandb_run.py:_redirect():2394] Redirects installed.
|
| 22 |
+
2025-04-22 10:00:10,281 INFO MainThread:497136 [wandb_init.py:init():1056] run started, returning control to user process
|
| 23 |
+
2025-04-22 10:00:22,283 INFO MsgRouterThr:497136 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100009-722cxxmu/run-722cxxmu.wandb
ADDED
|
Binary file (5.85 kB). View file
|
|
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100320-p7tc08z7/files/config.yaml
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_wandb:
|
| 2 |
+
value:
|
| 3 |
+
cli_version: 0.19.9
|
| 4 |
+
m: []
|
| 5 |
+
python_version: 3.10.16
|
| 6 |
+
t:
|
| 7 |
+
"1":
|
| 8 |
+
- 1
|
| 9 |
+
- 11
|
| 10 |
+
- 41
|
| 11 |
+
- 49
|
| 12 |
+
- 55
|
| 13 |
+
- 63
|
| 14 |
+
- 71
|
| 15 |
+
- 98
|
| 16 |
+
"2":
|
| 17 |
+
- 1
|
| 18 |
+
- 11
|
| 19 |
+
- 41
|
| 20 |
+
- 49
|
| 21 |
+
- 55
|
| 22 |
+
- 63
|
| 23 |
+
- 71
|
| 24 |
+
- 98
|
| 25 |
+
"3":
|
| 26 |
+
- 13
|
| 27 |
+
- 16
|
| 28 |
+
- 23
|
| 29 |
+
- 55
|
| 30 |
+
"4": 3.10.16
|
| 31 |
+
"5": 0.19.9
|
| 32 |
+
"6": 4.51.1
|
| 33 |
+
"8":
|
| 34 |
+
- 5
|
| 35 |
+
"12": 0.19.9
|
| 36 |
+
"13": linux-x86_64
|
| 37 |
+
dataset:
|
| 38 |
+
value:
|
| 39 |
+
align_stage_components:
|
| 40 |
+
- download/llava-laion-cc-sbu-558k/chat.json
|
| 41 |
+
- download/llava-laion-cc-sbu-558k
|
| 42 |
+
dataset_id: llava-v15
|
| 43 |
+
dataset_root_dir: data
|
| 44 |
+
finetune_stage_components:
|
| 45 |
+
- download/llava-v1.5-instruct/llava_v1_5_mix665k.json
|
| 46 |
+
- download/llava-v1.5-instruct
|
| 47 |
+
type: llava-v15
|
| 48 |
+
hf_token:
|
| 49 |
+
value: hf_token.txt
|
| 50 |
+
model:
|
| 51 |
+
value:
|
| 52 |
+
align_epochs: 1
|
| 53 |
+
align_global_batch_size: 8
|
| 54 |
+
align_learning_rate: 0.001
|
| 55 |
+
align_lr_scheduler_type: linear-warmup+cosine-decay
|
| 56 |
+
align_max_grad_norm: 1
|
| 57 |
+
align_max_steps: null
|
| 58 |
+
align_per_device_batch_size: 8
|
| 59 |
+
align_train_strategy: fsdp-shard-grad-op
|
| 60 |
+
align_warmup_ratio: 0.03
|
| 61 |
+
align_weight_decay: 0
|
| 62 |
+
arch_specifier: no-align+fused-gelu-mlp
|
| 63 |
+
enable_gradient_checkpointing: true
|
| 64 |
+
enable_mixed_precision_training: true
|
| 65 |
+
finetune_epochs: 2
|
| 66 |
+
finetune_global_batch_size: 56
|
| 67 |
+
finetune_learning_rate: 2e-05
|
| 68 |
+
finetune_lr_scheduler_type: linear-warmup+cosine-decay
|
| 69 |
+
finetune_max_grad_norm: 1
|
| 70 |
+
finetune_max_steps: null
|
| 71 |
+
finetune_per_device_batch_size: 8
|
| 72 |
+
finetune_train_strategy: fsdp-full-shard
|
| 73 |
+
finetune_warmup_ratio: 0.03
|
| 74 |
+
finetune_weight_decay: 0.1
|
| 75 |
+
image_resize_strategy: resize-naive
|
| 76 |
+
llm_backbone_id: qwen25-1_5b-extra
|
| 77 |
+
llm_max_length: 32768
|
| 78 |
+
model_id: prism-qwen25-extra-dinosiglip-224px+1_5b
|
| 79 |
+
reduce_in_full_precision: false
|
| 80 |
+
type: prism-qwen25-extra-dinosiglip-224px+1_5b
|
| 81 |
+
vision_backbone_id: dinosiglip-vit-so-224px
|
| 82 |
+
pretrained_checkpoint:
|
| 83 |
+
value: null
|
| 84 |
+
run_id:
|
| 85 |
+
value: prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7
|
| 86 |
+
run_root_dir:
|
| 87 |
+
value: runs
|
| 88 |
+
seed:
|
| 89 |
+
value: 7
|
| 90 |
+
stage:
|
| 91 |
+
value: finetune
|
| 92 |
+
trackers:
|
| 93 |
+
value:
|
| 94 |
+
- jsonl
|
| 95 |
+
- wandb
|
| 96 |
+
wandb_entity:
|
| 97 |
+
value: arash-akbari-stu-northeastern-university
|
| 98 |
+
wandb_project:
|
| 99 |
+
value: Qwen25-Extra-DINOSigLIP-224px-1_5B-full-finetune
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100320-p7tc08z7/files/output.log
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
04/22 [10:03:21] INFO | >> [*] Starting Training Loop pretrain.py:238
|
| 2 |
+
Traceback (most recent call last):
|
| 3 |
+
File "/home/user1/arashwork/prismatic-vlms/scripts/pretrain.py", line 252, in <module>
|
| 4 |
+
pretrain()
|
| 5 |
+
File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/draccus/argparsing.py", line 225, in wrapper_inner
|
| 6 |
+
response = fn(cfg, *args, **kwargs)
|
| 7 |
+
File "/home/user1/arashwork/prismatic-vlms/scripts/pretrain.py", line 239, in pretrain
|
| 8 |
+
train_strategy.run_training(train_dataset, collator, metrics, stage=cfg.stage, seed=cfg.seed)
|
| 9 |
+
File "/home/user1/arashwork/prismatic-vlms/prismatic/training/strategies/base_strategy.py", line 208, in run_training
|
| 10 |
+
normalized_loss.backward()
|
| 11 |
+
File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/_tensor.py", line 522, in backward
|
| 12 |
+
torch.autograd.backward(
|
| 13 |
+
File "/home/user1/anaconda3/envs/prism/lib/python3.10/site-packages/torch/autograd/__init__.py", line 266, in backward
|
| 14 |
+
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
| 15 |
+
torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 24.57 GiB. GPU 0 has a total capacity of 93.00 GiB of which 23.56 GiB is free. Including non-PyTorch memory, this process has 69.42 GiB memory in use. Of the allocated memory 67.09 GiB is allocated by PyTorch, and 1.35 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100320-p7tc08z7/files/requirements.txt
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pyyaml-include==1.4.1
|
| 2 |
+
torchaudio==2.2.0
|
| 3 |
+
psutil==7.0.0
|
| 4 |
+
nvidia-cufft-cu12==11.0.2.54
|
| 5 |
+
nvidia-nvtx-cu12==12.1.105
|
| 6 |
+
wheel==0.45.1
|
| 7 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
| 8 |
+
pillow==11.1.0
|
| 9 |
+
draccus==0.10.0
|
| 10 |
+
Jinja2==3.1.6
|
| 11 |
+
ninja==1.11.1.4
|
| 12 |
+
MarkupSafe==3.0.2
|
| 13 |
+
nvidia-cublas-cu12==12.1.3.1
|
| 14 |
+
tqdm==4.67.1
|
| 15 |
+
fsspec==2025.3.2
|
| 16 |
+
wandb==0.19.9
|
| 17 |
+
GitPython==3.1.44
|
| 18 |
+
click==8.1.8
|
| 19 |
+
timm==0.9.10
|
| 20 |
+
gitdb==4.0.12
|
| 21 |
+
nvidia-nvjitlink-cu12==12.8.93
|
| 22 |
+
nvidia-cudnn-cu12==8.9.2.26
|
| 23 |
+
mergedeep==1.3.4
|
| 24 |
+
annotated-types==0.7.0
|
| 25 |
+
protobuf==5.29.4
|
| 26 |
+
huggingface-hub==0.30.2
|
| 27 |
+
mdurl==0.1.2
|
| 28 |
+
urllib3==2.3.0
|
| 29 |
+
typing_extensions==4.13.1
|
| 30 |
+
numpy==1.26.4
|
| 31 |
+
torchvision==0.17.0
|
| 32 |
+
nvidia-cusparse-cu12==12.1.0.106
|
| 33 |
+
networkx==3.4.2
|
| 34 |
+
regex==2024.11.6
|
| 35 |
+
mypy-extensions==1.0.0
|
| 36 |
+
pip==25.0
|
| 37 |
+
peft==0.15.1
|
| 38 |
+
sentencepiece==0.2.0
|
| 39 |
+
Pygments==2.19.1
|
| 40 |
+
smmap==5.0.2
|
| 41 |
+
sympy==1.13.3
|
| 42 |
+
setuptools==75.8.0
|
| 43 |
+
nvidia-nccl-cu12==2.19.3
|
| 44 |
+
tokenizers==0.21.1
|
| 45 |
+
charset-normalizer==3.4.1
|
| 46 |
+
typing-inspection==0.4.0
|
| 47 |
+
platformdirs==4.3.7
|
| 48 |
+
packaging==24.2
|
| 49 |
+
setproctitle==1.3.5
|
| 50 |
+
idna==3.10
|
| 51 |
+
markdown-it-py==3.0.0
|
| 52 |
+
safetensors==0.5.3
|
| 53 |
+
rich==14.0.0
|
| 54 |
+
transformers==4.51.1
|
| 55 |
+
requests==2.32.3
|
| 56 |
+
sentry-sdk==2.25.1
|
| 57 |
+
jsonlines==4.0.0
|
| 58 |
+
PyYAML==6.0.2
|
| 59 |
+
pydantic_core==2.33.1
|
| 60 |
+
flash-attn==2.5.5
|
| 61 |
+
mpmath==1.3.0
|
| 62 |
+
attrs==25.3.0
|
| 63 |
+
einops==0.8.1
|
| 64 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
| 65 |
+
nvidia-curand-cu12==10.3.2.106
|
| 66 |
+
filelock==3.18.0
|
| 67 |
+
prismatic==0.0.2
|
| 68 |
+
certifi==2025.1.31
|
| 69 |
+
accelerate==1.6.0
|
| 70 |
+
typing-inspect==0.9.0
|
| 71 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
| 72 |
+
pydantic==2.11.3
|
| 73 |
+
six==1.17.0
|
| 74 |
+
nvidia-cusolver-cu12==11.4.5.107
|
| 75 |
+
torch==2.2.0
|
| 76 |
+
docker-pycreds==0.4.0
|
| 77 |
+
toml==0.10.2
|
| 78 |
+
triton==2.2.0
|
| 79 |
+
importlib_metadata==8.0.0
|
| 80 |
+
tomli==2.0.1
|
| 81 |
+
zipp==3.19.2
|
| 82 |
+
jaraco.context==5.3.0
|
| 83 |
+
inflect==7.3.1
|
| 84 |
+
autocommand==2.2.2
|
| 85 |
+
typing_extensions==4.12.2
|
| 86 |
+
jaraco.collections==5.1.0
|
| 87 |
+
jaraco.functools==4.0.1
|
| 88 |
+
packaging==24.2
|
| 89 |
+
wheel==0.43.0
|
| 90 |
+
backports.tarfile==1.2.0
|
| 91 |
+
platformdirs==4.2.2
|
| 92 |
+
more-itertools==10.3.0
|
| 93 |
+
jaraco.text==3.12.1
|
| 94 |
+
typeguard==4.3.0
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100320-p7tc08z7/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-6.8.0-52-generic-x86_64-with-glibc2.35",
|
| 3 |
+
"python": "CPython 3.10.16",
|
| 4 |
+
"startedAt": "2025-04-22T17:03:20.553675Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--model.type",
|
| 7 |
+
"prism-qwen25-extra-dinosiglip-224px+1_5b",
|
| 8 |
+
"--wandb_project",
|
| 9 |
+
"Qwen25-Extra-DINOSigLIP-224px-1_5B-full-finetune",
|
| 10 |
+
"--wandb_entity",
|
| 11 |
+
"arash-akbari-stu-northeastern-university",
|
| 12 |
+
"--model.enable_mixed_precision_training",
|
| 13 |
+
"True"
|
| 14 |
+
],
|
| 15 |
+
"program": "/home/user1/arashwork/prismatic-vlms/scripts/pretrain.py",
|
| 16 |
+
"codePath": "scripts/pretrain.py",
|
| 17 |
+
"git": {
|
| 18 |
+
"remote": "git@github.com:arashakb/prismatic-vlms.git",
|
| 19 |
+
"commit": "8082f651fe3ba730b13899691a222fdef82d2c66"
|
| 20 |
+
},
|
| 21 |
+
"email": "arash.akbari.stu@gmail.com",
|
| 22 |
+
"root": "runs/prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7",
|
| 23 |
+
"host": "nnmc72",
|
| 24 |
+
"executable": "/home/user1/anaconda3/envs/prism/bin/python",
|
| 25 |
+
"codePathLocal": "scripts/pretrain.py",
|
| 26 |
+
"cpu_count": 48,
|
| 27 |
+
"cpu_count_logical": 96,
|
| 28 |
+
"gpu": "NVIDIA H100 NVL",
|
| 29 |
+
"gpu_count": 8,
|
| 30 |
+
"disk": {
|
| 31 |
+
"/": {
|
| 32 |
+
"total": "30476149334016",
|
| 33 |
+
"used": "18270206685184"
|
| 34 |
+
}
|
| 35 |
+
},
|
| 36 |
+
"memory": {
|
| 37 |
+
"total": "811294752768"
|
| 38 |
+
},
|
| 39 |
+
"cpu": {
|
| 40 |
+
"count": 48,
|
| 41 |
+
"countLogical": 96
|
| 42 |
+
},
|
| 43 |
+
"gpu_nvidia": [
|
| 44 |
+
{
|
| 45 |
+
"name": "NVIDIA H100 NVL",
|
| 46 |
+
"memoryTotal": "100485038080",
|
| 47 |
+
"cudaCores": 16896,
|
| 48 |
+
"architecture": "Hopper"
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "NVIDIA H100 NVL",
|
| 52 |
+
"memoryTotal": "100485038080",
|
| 53 |
+
"cudaCores": 16896,
|
| 54 |
+
"architecture": "Hopper"
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"name": "NVIDIA H100 NVL",
|
| 58 |
+
"memoryTotal": "100485038080",
|
| 59 |
+
"cudaCores": 16896,
|
| 60 |
+
"architecture": "Hopper"
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"name": "NVIDIA H100 NVL",
|
| 64 |
+
"memoryTotal": "100485038080",
|
| 65 |
+
"cudaCores": 16896,
|
| 66 |
+
"architecture": "Hopper"
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"name": "NVIDIA H100 NVL",
|
| 70 |
+
"memoryTotal": "100485038080",
|
| 71 |
+
"cudaCores": 16896,
|
| 72 |
+
"architecture": "Hopper"
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"name": "NVIDIA H100 NVL",
|
| 76 |
+
"memoryTotal": "100485038080",
|
| 77 |
+
"cudaCores": 16896,
|
| 78 |
+
"architecture": "Hopper"
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"name": "NVIDIA H100 NVL",
|
| 82 |
+
"memoryTotal": "100485038080",
|
| 83 |
+
"cudaCores": 16896,
|
| 84 |
+
"architecture": "Hopper"
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"name": "NVIDIA H100 NVL",
|
| 88 |
+
"memoryTotal": "100485038080",
|
| 89 |
+
"cudaCores": 16896,
|
| 90 |
+
"architecture": "Hopper"
|
| 91 |
+
}
|
| 92 |
+
],
|
| 93 |
+
"cudaVersion": "12.4"
|
| 94 |
+
}
|
prism-qwen25-extra-dinosiglip-224px+1_5b+stage-finetune+x7--original_hyperparameters/wandb/run-20250422_100320-p7tc08z7/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_wandb":{"runtime":11}}
|