Upload folder using huggingface_hub
Browse files- config.json +7 -7
- depth_projector/config.json +1 -1
- depth_projector/model.safetensors +1 -1
- depth_tower/config.json +1 -1
- depth_tower/model.safetensors +1 -1
- llm/config.json +1 -1
- llm/model.safetensors +1 -1
- mm_projector/config.json +1 -1
- mm_projector/model.safetensors +1 -1
- runs/Jun14_06-32-28_job-9be7e87b-6698-4594-a832-c824906e4803-master-0/events.out.tfevents.1749883238.job-9be7e87b-6698-4594-a832-c824906e4803-master-0 +3 -0
- trainer_state.json +0 -0
- vision_tower/config.json +1 -1
- vision_tower/model.safetensors +1 -1
config.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"Ubit": 100,
|
| 3 |
"_attn_implementation_autoset": true,
|
| 4 |
-
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
| 5 |
"architectures": [
|
| 6 |
"LlavaLlamaModel"
|
| 7 |
],
|
|
@@ -17,7 +17,7 @@
|
|
| 17 |
"depth_projector": "mlp_downsample_3x3_fix",
|
| 18 |
"depth_projector_cfg": {
|
| 19 |
"_attn_implementation_autoset": false,
|
| 20 |
-
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
| 21 |
"add_cross_attention": false,
|
| 22 |
"architectures": [
|
| 23 |
"MultimodalProjector"
|
|
@@ -85,7 +85,7 @@
|
|
| 85 |
"depth_tower": "/share/project/zhouenshen/hpfs/ckpt/vlm/paligemma-siglip-so400m-patch14-448",
|
| 86 |
"depth_tower_cfg": {
|
| 87 |
"_attn_implementation_autoset": false,
|
| 88 |
-
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
| 89 |
"add_cross_attention": false,
|
| 90 |
"architectures": [
|
| 91 |
"SiglipVisionModel"
|
|
@@ -185,7 +185,7 @@
|
|
| 185 |
"interpolate_mode": "linear",
|
| 186 |
"llm_cfg": {
|
| 187 |
"_attn_implementation_autoset": false,
|
| 188 |
-
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
| 189 |
"add_cross_attention": false,
|
| 190 |
"architectures": [
|
| 191 |
"Qwen2ForCausalLM"
|
|
@@ -278,7 +278,7 @@
|
|
| 278 |
"mm_projector": "mlp_downsample_3x3_fix",
|
| 279 |
"mm_projector_cfg": {
|
| 280 |
"_attn_implementation_autoset": false,
|
| 281 |
-
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
| 282 |
"add_cross_attention": false,
|
| 283 |
"architectures": [
|
| 284 |
"MultimodalProjector"
|
|
@@ -366,7 +366,7 @@
|
|
| 366 |
"refine_mlp_blocksize": false,
|
| 367 |
"refine_residual_fp": false,
|
| 368 |
"refine_row_blocksize": 4,
|
| 369 |
-
"resume_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
| 370 |
"row_blocksize": -1,
|
| 371 |
"row_blocksize_optimizer": 1,
|
| 372 |
"s2": false,
|
|
@@ -395,7 +395,7 @@
|
|
| 395 |
"vision_tower": "/share/project/zhouenshen/hpfs/ckpt/vlm/paligemma-siglip-so400m-patch14-448",
|
| 396 |
"vision_tower_cfg": {
|
| 397 |
"_attn_implementation_autoset": false,
|
| 398 |
-
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
| 399 |
"add_cross_attention": false,
|
| 400 |
"architectures": [
|
| 401 |
"SiglipVisionModel"
|
|
|
|
| 1 |
{
|
| 2 |
"Ubit": 100,
|
| 3 |
"_attn_implementation_autoset": true,
|
| 4 |
+
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model",
|
| 5 |
"architectures": [
|
| 6 |
"LlavaLlamaModel"
|
| 7 |
],
|
|
|
|
| 17 |
"depth_projector": "mlp_downsample_3x3_fix",
|
| 18 |
"depth_projector_cfg": {
|
| 19 |
"_attn_implementation_autoset": false,
|
| 20 |
+
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model/depth_projector",
|
| 21 |
"add_cross_attention": false,
|
| 22 |
"architectures": [
|
| 23 |
"MultimodalProjector"
|
|
|
|
| 85 |
"depth_tower": "/share/project/zhouenshen/hpfs/ckpt/vlm/paligemma-siglip-so400m-patch14-448",
|
| 86 |
"depth_tower_cfg": {
|
| 87 |
"_attn_implementation_autoset": false,
|
| 88 |
+
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model/depth_tower",
|
| 89 |
"add_cross_attention": false,
|
| 90 |
"architectures": [
|
| 91 |
"SiglipVisionModel"
|
|
|
|
| 185 |
"interpolate_mode": "linear",
|
| 186 |
"llm_cfg": {
|
| 187 |
"_attn_implementation_autoset": false,
|
| 188 |
+
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model/llm",
|
| 189 |
"add_cross_attention": false,
|
| 190 |
"architectures": [
|
| 191 |
"Qwen2ForCausalLM"
|
|
|
|
| 278 |
"mm_projector": "mlp_downsample_3x3_fix",
|
| 279 |
"mm_projector_cfg": {
|
| 280 |
"_attn_implementation_autoset": false,
|
| 281 |
+
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model/mm_projector",
|
| 282 |
"add_cross_attention": false,
|
| 283 |
"architectures": [
|
| 284 |
"MultimodalProjector"
|
|
|
|
| 366 |
"refine_mlp_blocksize": false,
|
| 367 |
"refine_residual_fp": false,
|
| 368 |
"refine_row_blocksize": 4,
|
| 369 |
+
"resume_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model",
|
| 370 |
"row_blocksize": -1,
|
| 371 |
"row_blocksize_optimizer": 1,
|
| 372 |
"s2": false,
|
|
|
|
| 395 |
"vision_tower": "/share/project/zhouenshen/hpfs/ckpt/vlm/paligemma-siglip-so400m-patch14-448",
|
| 396 |
"vision_tower_cfg": {
|
| 397 |
"_attn_implementation_autoset": false,
|
| 398 |
+
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model/vision_tower",
|
| 399 |
"add_cross_attention": false,
|
| 400 |
"architectures": [
|
| 401 |
"SiglipVisionModel"
|
depth_projector/config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
| 3 |
"architectures": [
|
| 4 |
"MultimodalProjector"
|
| 5 |
],
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model/depth_projector",
|
| 3 |
"architectures": [
|
| 4 |
"MultimodalProjector"
|
| 5 |
],
|
depth_projector/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 87068272
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:077b99b727dfd07aaf60280baaad16be7560c71c05f0811b58bd5b593aca73d3
|
| 3 |
size 87068272
|
depth_tower/config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
| 3 |
"architectures": [
|
| 4 |
"SiglipVisionModel"
|
| 5 |
],
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model/depth_tower",
|
| 3 |
"architectures": [
|
| 4 |
"SiglipVisionModel"
|
| 5 |
],
|
depth_tower/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 826707904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:640aecc60714538d98ecc6ab817454446075b11c86915d836d0e50e7d5cff6a2
|
| 3 |
size 826707904
|
llm/config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
| 3 |
"architectures": [
|
| 4 |
"Qwen2ForCausalLM"
|
| 5 |
],
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model/llm",
|
| 3 |
"architectures": [
|
| 4 |
"Qwen2ForCausalLM"
|
| 5 |
],
|
llm/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3086594696
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0bfb140450d88d5698d7af3ae8c87af7d7d356fdefe94d66a5ff5b319218071d
|
| 3 |
size 3086594696
|
mm_projector/config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
| 3 |
"architectures": [
|
| 4 |
"MultimodalProjector"
|
| 5 |
],
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model/mm_projector",
|
| 3 |
"architectures": [
|
| 4 |
"MultimodalProjector"
|
| 5 |
],
|
mm_projector/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 87068272
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c84546f1ff8591f43cc2862dd80837d90c83e2c4f2e9cce9f5f951793433f13
|
| 3 |
size 87068272
|
runs/Jun14_06-32-28_job-9be7e87b-6698-4594-a832-c824906e4803-master-0/events.out.tfevents.1749883238.job-9be7e87b-6698-4594-a832-c824906e4803-master-0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c41e42347a0db68c8f9b4a94eabb475bfcbde2b18e864eaf47d46c0f41755ab9
|
| 3 |
+
size 4633872
|
trainer_state.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
vision_tower/config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
| 3 |
"architectures": [
|
| 4 |
"SiglipVisionModel"
|
| 5 |
],
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model/vision_tower",
|
| 3 |
"architectures": [
|
| 4 |
"SiglipVisionModel"
|
| 5 |
],
|
vision_tower/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 826707904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:86c782eccce96224a8e0402a3676bb1dbc3d77b279b3c5392b96e99fe008443d
|
| 3 |
size 826707904
|