.ipynb_checkpoints/README-checkpoint.md DELETED
@@ -1,45 +0,0 @@
1
- ---
2
- inference: false
3
- license: apache-2.0
4
- ---
5
-
6
- <br>
7
- <br>
8
-
9
- # LLaVA Model Card
10
-
11
- ## Model details
12
-
13
- **Model type:**
14
- LLaVA is an open-source chatbot trained by fine-tuning LLM on multimodal instruction-following data.
15
- It is an auto-regressive language model, based on the transformer architecture.
16
- Base LLM: [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2)
17
-
18
- **Model date:**
19
- LLaVA-v1.6-Mistral-7B was trained in December 2023.
20
-
21
- **Paper or resources for more information:**
22
- https://llava-vl.github.io/
23
-
24
- ## License
25
- [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) license.
26
-
27
- **Where to send questions or comments about the model:**
28
- https://github.com/haotian-liu/LLaVA/issues
29
-
30
- ## Intended use
31
- **Primary intended uses:**
32
- The primary use of LLaVA is research on large multimodal models and chatbots.
33
-
34
- **Primary intended users:**
35
- The primary intended users of the model are researchers and hobbyists in computer vision, natural language processing, machine learning, and artificial intelligence.
36
-
37
- ## Training dataset
38
- - 558K filtered image-text pairs from LAION/CC/SBU, captioned by BLIP.
39
- - 158K GPT-generated multimodal instruction-following data.
40
- - 500K academic-task-oriented VQA data mixture.
41
- - 50K GPT-4V data mixture.
42
- - 40K ShareGPT data.
43
-
44
- ## Evaluation dataset
45
- A collection of 12 benchmarks, including 5 academic VQA benchmarks and 7 recent benchmarks specifically proposed for instruction-following LMMs.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.ipynb_checkpoints/added_tokens-checkpoint.json DELETED
@@ -1,4 +0,0 @@
1
- {
2
- "<image>": 32000,
3
- "<pad>": 32001
4
- }
 
 
 
 
 
.ipynb_checkpoints/config-checkpoint.json DELETED
@@ -1,70 +0,0 @@
1
- {
2
- "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
3
- "architectures": [
4
- "LlavaLlamaForCausalLM"
5
- ],
6
- "attention_dropout": 0.0,
7
- "bos_token_id": 1,
8
- "eos_token_id": 2,
9
- "freeze_mm_mlp_adapter": false,
10
- "freeze_mm_vision_resampler": false,
11
- "hidden_act": "silu",
12
- "hidden_size": 4096,
13
- "image_aspect_ratio": "anyres",
14
- "image_crop_resolution": 224,
15
- "image_grid_pinpoints": [
16
- [
17
- 336,
18
- 672
19
- ],
20
- [
21
- 672,
22
- 336
23
- ],
24
- [
25
- 672,
26
- 672
27
- ],
28
- [
29
- 1008,
30
- 336
31
- ],
32
- [
33
- 336,
34
- 1008
35
- ]
36
- ],
37
- "image_split_resolution": 224,
38
- "initializer_range": 0.02,
39
- "intermediate_size": 14336,
40
- "max_position_embeddings": 32768,
41
- "mm_hidden_size": 1024,
42
- "mm_patch_merge_type": "spatial_unpad",
43
- "mm_projector_lr": null,
44
- "mm_projector_type": "mlp2x_gelu",
45
- "mm_resampler_type": null,
46
- "mm_use_im_patch_token": false,
47
- "mm_use_im_start_end": false,
48
- "mm_vision_select_feature": "patch",
49
- "mm_vision_select_layer": -2,
50
- "mm_vision_tower": "openai/clip-vit-large-patch14-336",
51
- "mm_vision_tower_lr": 2e-06,
52
- "model_type": "llava",
53
- "num_attention_heads": 32,
54
- "num_hidden_layers": 32,
55
- "num_key_value_heads": 8,
56
- "rms_norm_eps": 1e-05,
57
- "rope_theta": 1000000.0,
58
- "sliding_window": null,
59
- "tie_word_embeddings": false,
60
- "tokenizer_model_max_length": 4096,
61
- "tokenizer_padding_side": "left",
62
- "torch_dtype": "bfloat16",
63
- "transformers_version": "4.36.2",
64
- "tune_mm_mlp_adapter": false,
65
- "tune_mm_vision_resampler": false,
66
- "unfreeze_mm_vision_tower": true,
67
- "use_cache": true,
68
- "use_mm_proj": true,
69
- "vocab_size": 32000
70
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.ipynb_checkpoints/generation_config-checkpoint.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "bos_token_id": 1,
4
- "eos_token_id": 2,
5
- "pad_token_id": 32001,
6
- "transformers_version": "4.36.2"
7
- }
 
 
 
 
 
 
 
 
.ipynb_checkpoints/preprocessor_config-checkpoint.json DELETED
@@ -1,28 +0,0 @@
1
- {
2
- "crop_size": {
3
- "height": 336,
4
- "width": 336
5
- },
6
- "do_center_crop": true,
7
- "do_convert_rgb": true,
8
- "do_normalize": true,
9
- "do_rescale": true,
10
- "do_resize": true,
11
- "image_mean": [
12
- 0.48145466,
13
- 0.4578275,
14
- 0.40821073
15
- ],
16
- "image_processor_type": "CLIPImageProcessor",
17
- "image_std": [
18
- 0.26862954,
19
- 0.26130258,
20
- 0.27577711
21
- ],
22
- "processor_class": "LlavaProcessor",
23
- "resample": 3,
24
- "rescale_factor": 0.00392156862745098,
25
- "size": {
26
- "shortest_edge": 336
27
- }
28
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.ipynb_checkpoints/special_tokens_map-checkpoint.json DELETED
@@ -1,30 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "</s>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "unk_token": {
17
- "content": "<unk>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "pad_token": {
24
- "content": "<pad>",
25
- "lstrip": false,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- },
30
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.ipynb_checkpoints/tokenizer-checkpoint.json DELETED
The diff for this file is too large to render. See raw diff
 
.ipynb_checkpoints/tokenizer_config-checkpoint.json DELETED
@@ -1,61 +0,0 @@
1
- {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
- "added_tokens_decoder": {
5
- "0": {
6
- "content": "<unk>",
7
- "lstrip": false,
8
- "normalized": false,
9
- "rstrip": false,
10
- "single_word": false,
11
- "special": true
12
- },
13
- "1": {
14
- "content": "<s>",
15
- "lstrip": false,
16
- "normalized": false,
17
- "rstrip": false,
18
- "single_word": false,
19
- "special": true
20
- },
21
- "2": {
22
- "content": "</s>",
23
- "lstrip": false,
24
- "normalized": false,
25
- "rstrip": false,
26
- "single_word": false,
27
- "special": true
28
- },
29
- "32000": {
30
- "content": "<image>",
31
- "lstrip": false,
32
- "normalized": false,
33
- "rstrip": false,
34
- "single_word": false,
35
- "special": true
36
- },
37
- "32001": {
38
- "content": "<pad>",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false,
43
- "special": true
44
- }
45
- },
46
- "additional_special_tokens": [],
47
- "bos_token": "<s>",
48
- "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
49
- "clean_up_tokenization_spaces": false,
50
- "eos_token": "</s>",
51
- "legacy": false,
52
- "model_max_length": 4096,
53
- "pad_token": "<pad>",
54
- "padding_side": "right",
55
- "processor_class": "LlavaProcessor",
56
- "sp_model_kwargs": {},
57
- "spaces_between_special_tokens": false,
58
- "tokenizer_class": "LlamaTokenizer",
59
- "unk_token": "<unk>",
60
- "use_default_system_prompt": false
61
- }