Jeong-su commited on
Commit
2e1fc42
·
1 Parent(s): 6d1073c

Add files

Browse files
config.json CHANGED
@@ -1,48 +1,69 @@
1
  {
2
- "model_type": "llava",
3
- "architectures": ["LLaVAForVideoClassification"],
4
- "learning_rate": 5e-05,
5
- "train_batch_size": 2,
6
- "eval_batch_size": 8,
7
- "seed": 42,
8
- "distributed_type": "multi-GPU",
9
- "num_devices": 2,
10
- "gradient_accumulation_steps": 8,
11
- "total_train_batch_size": 32,
12
- "total_eval_batch_size": 16,
13
- "optimizer": "AdamW_Torch",
14
- "betas": [0.9, 0.999],
15
- "epsilon": 1e-08,
16
- "lr_scheduler_type": "cosine",
17
- "lr_scheduler_warmup_steps": 100,
18
- "num_epochs": 1.0,
19
- "framework_versions": {
20
- "PEFT": "0.12.0",
21
- "Transformers": "4.46.1",
22
- "Pytorch": "2.3.1+cu121",
23
- "Datasets": "3.1.0",
24
- "Tokenizers": "0.20.3"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  },
26
- "description": "This model is a fine-tuned version of llava-hf/LLaVA-NeXT-Video-7B-hf on the merger, the LLM_dataset(4o) and the LLM_dataset(4mini) datasets.",
27
- "training_data": {
28
- "dataset_names": ["merger", "LLM_dataset(4o)", "LLM_dataset(4mini)"]
29
- },
30
- "training_hyperparameters": {
31
- "learning_rate": 5e-05,
32
- "train_batch_size": 2,
33
- "eval_batch_size": 8,
34
- "seed": 42,
35
- "distributed_type": "multi-GPU",
36
- "num_devices": 2,
37
- "gradient_accumulation_steps": 8,
38
- "total_train_batch_size": 32,
39
- "total_eval_batch_size": 16,
40
- "optimizer": "AdamW_Torch",
41
- "betas": [0.9, 0.999],
42
- "epsilon": 1e-08,
43
- "lr_scheduler_type": "cosine",
44
- "lr_scheduler_warmup_steps": 100,
45
- "num_epochs": 1.0
46
- }
47
- }
48
-
 
 
 
 
 
1
  {
2
+ "architectures": [
3
+ "LlavaNextVideoForConditionalGeneration"
4
+ ],
5
+ "ignore_index": -100,
6
+ "image_grid_pinpoints": [
7
+ [
8
+ 336,
9
+ 672
10
+ ],
11
+ [
12
+ 672,
13
+ 336
14
+ ],
15
+ [
16
+ 672,
17
+ 672
18
+ ],
19
+ [
20
+ 1008,
21
+ 336
22
+ ],
23
+ [
24
+ 336,
25
+ 1008
26
+ ]
27
+ ],
28
+ "image_token_index": 32001,
29
+ "model_type": "llava_next_video",
30
+ "projector_hidden_act": "gelu",
31
+ "spatial_pool_mode": "average",
32
+ "spatial_pool_out_channels": 1024,
33
+ "spatial_pool_stride": 2,
34
+ "text_config": {
35
+ "_name_or_path": "lmsys/vicuna-7b-v1.5",
36
+ "architectures": [
37
+ "LlamaForCausalLM"
38
+ ],
39
+ "rope_scaling" : {
40
+ "factor": 2.5,
41
+ "type": "linear"
42
  },
43
+ "max_position_embeddings": 4096,
44
+ "model_type": "llama",
45
+ "pad_token_id": 0,
46
+ "rms_norm_eps": 1e-05,
47
+ "torch_dtype": "float16",
48
+ "type": "linear",
49
+ "vocab_size": 32064
50
+ },
51
+ "tie_word_embeddings": false,
52
+ "torch_dtype": "bfloat16",
53
+ "transformers_version": "4.42.0.dev0",
54
+ "use_image_newline_parameter": true,
55
+ "video_token_index": 32000,
56
+ "vision_config": {
57
+ "hidden_size": 1024,
58
+ "image_size": 336,
59
+ "intermediate_size": 4096,
60
+ "model_type": "clip_vision_model",
61
+ "num_attention_heads": 16,
62
+ "num_hidden_layers": 24,
63
+ "patch_size": 14,
64
+ "projection_dim": 768,
65
+ "vocab_size": 32000
66
+ },
67
+ "vision_feature_layer": -2,
68
+ "vision_feature_select_strategy": "default"
69
+ }
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc32753ce8e75e89748b95677d9349d2b47af998457b4ebe22d27dece7338a3d
3
+ size 4992938952
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c50112e9dffcb204f90a5be738d28e33b92cd14332cabde6e8f39171d999a1b
3
+ size 4957878552
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8022077b98e08236a07b999f1f51ab577bda9c8aa2dc66d12853b1b162731100
3
+ size 4176137496