wli1995 commited on
Commit
6f04984
·
verified ·
1 Parent(s): a043a29

Upload c++ demo

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +31 -0
  2. .infer_axmodel_650.py.swn +0 -0
  3. .infer_axmodel_650.py.swo +0 -0
  4. .infer_axmodel_650.py.swp +0 -0
  5. FastVLM_tokenizer.txt +0 -0
  6. README.md +136 -62
  7. embeds/model.embed_tokens.weight.bfloat16.bin +2 -2
  8. embeds/model.embed_tokens.weight.npy +2 -2
  9. fastvlm_C128_CTX1024_P640_ax620e/image_encoder_512x512_ax620e.axmodel +3 -0
  10. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l0_together.axmodel +3 -0
  11. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l10_together.axmodel +3 -0
  12. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l11_together.axmodel +3 -0
  13. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l12_together.axmodel +3 -0
  14. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l13_together.axmodel +3 -0
  15. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l14_together.axmodel +3 -0
  16. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l15_together.axmodel +3 -0
  17. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l16_together.axmodel +3 -0
  18. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l17_together.axmodel +3 -0
  19. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l18_together.axmodel +3 -0
  20. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l19_together.axmodel +3 -0
  21. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l1_together.axmodel +3 -0
  22. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l20_together.axmodel +3 -0
  23. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l21_together.axmodel +3 -0
  24. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l22_together.axmodel +3 -0
  25. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l23_together.axmodel +3 -0
  26. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l2_together.axmodel +3 -0
  27. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l3_together.axmodel +3 -0
  28. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l4_together.axmodel +3 -0
  29. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l5_together.axmodel +3 -0
  30. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l6_together.axmodel +3 -0
  31. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l7_together.axmodel +3 -0
  32. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l8_together.axmodel +3 -0
  33. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l9_together.axmodel +3 -0
  34. fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_post.axmodel +3 -0
  35. fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l0_together.axmodel +2 -2
  36. fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l10_together.axmodel +2 -2
  37. fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l11_together.axmodel +2 -2
  38. fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l12_together.axmodel +2 -2
  39. fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l13_together.axmodel +2 -2
  40. fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l14_together.axmodel +2 -2
  41. fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l15_together.axmodel +2 -2
  42. fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l16_together.axmodel +2 -2
  43. fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l17_together.axmodel +2 -2
  44. fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l18_together.axmodel +2 -2
  45. fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l19_together.axmodel +2 -2
  46. fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l1_together.axmodel +2 -2
  47. fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l20_together.axmodel +2 -2
  48. fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l21_together.axmodel +2 -2
  49. fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l22_together.axmodel +2 -2
  50. fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l23_together.axmodel +2 -2
.gitattributes CHANGED
@@ -87,3 +87,34 @@ fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l9_together.axmodel filter=lfs
87
  fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_post.axmodel filter=lfs diff=lfs merge=lfs -text
88
  images/image_1.jpg filter=lfs diff=lfs merge=lfs -text
89
  images/ssd_horse.jpg filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_post.axmodel filter=lfs diff=lfs merge=lfs -text
88
  images/image_1.jpg filter=lfs diff=lfs merge=lfs -text
89
  images/ssd_horse.jpg filter=lfs diff=lfs merge=lfs -text
90
+ fastvlm_C128_CTX1024_P640_ax620e/image_encoder_512x512_ax620e.axmodel filter=lfs diff=lfs merge=lfs -text
91
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l0_together.axmodel filter=lfs diff=lfs merge=lfs -text
92
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l10_together.axmodel filter=lfs diff=lfs merge=lfs -text
93
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l11_together.axmodel filter=lfs diff=lfs merge=lfs -text
94
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l12_together.axmodel filter=lfs diff=lfs merge=lfs -text
95
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l13_together.axmodel filter=lfs diff=lfs merge=lfs -text
96
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l14_together.axmodel filter=lfs diff=lfs merge=lfs -text
97
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l15_together.axmodel filter=lfs diff=lfs merge=lfs -text
98
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l16_together.axmodel filter=lfs diff=lfs merge=lfs -text
99
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l17_together.axmodel filter=lfs diff=lfs merge=lfs -text
100
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l18_together.axmodel filter=lfs diff=lfs merge=lfs -text
101
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l19_together.axmodel filter=lfs diff=lfs merge=lfs -text
102
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l1_together.axmodel filter=lfs diff=lfs merge=lfs -text
103
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l20_together.axmodel filter=lfs diff=lfs merge=lfs -text
104
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l21_together.axmodel filter=lfs diff=lfs merge=lfs -text
105
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l22_together.axmodel filter=lfs diff=lfs merge=lfs -text
106
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l23_together.axmodel filter=lfs diff=lfs merge=lfs -text
107
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l2_together.axmodel filter=lfs diff=lfs merge=lfs -text
108
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l3_together.axmodel filter=lfs diff=lfs merge=lfs -text
109
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l4_together.axmodel filter=lfs diff=lfs merge=lfs -text
110
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l5_together.axmodel filter=lfs diff=lfs merge=lfs -text
111
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l6_together.axmodel filter=lfs diff=lfs merge=lfs -text
112
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l7_together.axmodel filter=lfs diff=lfs merge=lfs -text
113
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l8_together.axmodel filter=lfs diff=lfs merge=lfs -text
114
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l9_together.axmodel filter=lfs diff=lfs merge=lfs -text
115
+ fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_post.axmodel filter=lfs diff=lfs merge=lfs -text
116
+ fastvlm_tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
117
+ main_ax650 filter=lfs diff=lfs merge=lfs -text
118
+ main_ax650_api filter=lfs diff=lfs merge=lfs -text
119
+ main_axcl_x86 filter=lfs diff=lfs merge=lfs -text
120
+ main_axcl_x86_api filter=lfs diff=lfs merge=lfs -text
.infer_axmodel_650.py.swn ADDED
Binary file (16.4 kB). View file
 
.infer_axmodel_650.py.swo ADDED
Binary file (20.5 kB). View file
 
.infer_axmodel_650.py.swp ADDED
Binary file (20.5 kB). View file
 
FastVLM_tokenizer.txt ADDED
The diff for this file is too large to render. See raw diff
 
README.md CHANGED
@@ -38,7 +38,8 @@ How to Convert LLM from Huggingface to axmodel[TODO]
38
 
39
  |Chips|image encoder|ttft|w4a16|
40
  |--|--|--|--|
41
- |AX650N| 44.572 ms (512x512)| 94.532 ms (99tokens)| 34.81 tokens/sec|
 
42
  |AX630C| 205.961 ms (512x512)| 489.013 ms (99tokens)| 11.67 tokens/sec|
43
 
44
 
@@ -49,19 +50,29 @@ Download all files from this repository to the device
49
  ```
50
  $tree -L 1
51
  .
52
- |-- README.md
53
- |-- config.json
54
- |-- embeds
55
- |-- fastvlm_C128_CTX1024_P640_ax650
56
- |-- fastvlm_C128_CTX512_P256_ax620e
57
- |-- fastvlm_tokenizer
58
- |-- images
59
- |-- infer_axmodel_620e.py
60
- |-- infer_axmodel_650.py
61
- |-- requirements.txt
62
- `-- utils
63
-
64
- 6 directories, 5 files
 
 
 
 
 
 
 
 
 
 
65
  ```
66
 
67
  #### Install transformer
@@ -70,125 +81,188 @@ $tree -L 1
70
  pip install -r requirements.txt
71
  ```
72
 
73
- #### Inference with AX630C Host
74
 
75
  Run the following command on the Axera board to start a chat conversation:
76
 
77
- ```sh
78
- python3 infer_axmodel_620e.py -v ./fastvlm_C128_CTX512_P256_ax620e/image_encoder_512x512_ax620e.axmodel -m ./fastvlm_C128_CTX512_P256_ax620e -t fastvlm_tokenizer -i 512
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  ```
80
 
81
- output:
 
82
  ```
83
- [INFO] Available providers: ['AxEngineExecutionProvider']
 
 
 
84
  Loading config, tokenizer and init model.
85
  [INFO] Using provider: AxEngineExecutionProvider
86
- [INFO] Chip type: ChipType.MC20E
87
  [INFO] VNPU type: VNPUType.DISABLED
88
- [INFO] Engine version: 2.7.2a
89
- [INFO] Model type: 1 (full core)
90
  [INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
91
  Detected prefixes: ['llava_qwen2'], chosen: llava_qwen2, layers: 24
92
  Init InferenceSession: 0%| | 0/24 [00:00<?, ?it/s][INFO] Using provider: AxEngineExecutionProvider
93
- [INFO] Model type: 1 (full core)
94
  [INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
95
- Init InferenceSession: 4%|████▊ | 1/24
96
- [00:02<00:00, 9.25it/s]
97
- ...
98
  [INFO] Using provider: AxEngineExecutionProvider
99
- [INFO] Model type: 1 (full core)
 
 
 
100
  [INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
101
- Init InferenceSession: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:02<00:00, 9.12it/s]
102
  [INFO] Using provider: AxEngineExecutionProvider
103
- [INFO] Model type: 1 (full core)
 
 
 
104
  [INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
105
  Model loaded successfully!
106
  [INFO]: 输入文本进行对话,或者输入图片路径进行图片理解, 或者输入q退出对话。
107
  prompt<<who are you
108
  slice_indices: [0]
109
  Slice prefill done: 0
110
- answer >> I'm an AI language model, I don't have personal identity or a physical body. I exist solely as a digital creation created by Apple. I don't have a name or a personal identity. I'm designed to assist and provide information to users. Is there anything else I can help you with?
111
 
112
  prompt<<./images/ssd_horse.jpg
113
  slice_indices: [0]
114
  Slice prefill done: 0
115
- answer >> The image depicts a person riding a brown horse with a white blaze on its face. The rider is wearing a blue hoodie and blue jeans, and is holding the reins of the horse. The horse is standing on a dirt ground with some grass and trees in the background. The rider is also holding a rope in their left hand, which is attached to the horse's harness.
116
 
117
- To the left of the horse, there is a brown dog standing on the ground, looking up at the rider. The dog appears to be in a begging or pleading position, with its front paws raised and its mouth open.
118
 
119
- In the background, there is a gray pickup truck parked on the grass, and a wooden fence can be seen behind the horse and rider. There are also some people visible in the background, including a person in a red shirt and another person in a blue shirt. The overall scene appears to be taking place in an outdoor setting, possibly a ranch or a farm.
 
 
120
 
121
  prompt<<./images/image_1.jpg
122
  slice_indices: [0]
123
  Slice prefill done: 0
124
- answer >> The image depicts a panda bear in a naturalistic enclosure, likely within a zoo or wildlife sanctuary. The panda is sitting on its hind legs, with its front paws resting on a wooden structure that resembles a tree stump. The panda's distinctive black and white fur is clearly visible, with the black fur covering its ears, eyes, and the area around its nose and mouth, while the white fur covers the rest of its body. The panda's black nose and the black fur around its mouth are also visible.
125
 
126
- The panda is surrounded by green foliage, including bamboo shoots and other plants, which are typical of a panda's natural habitat. The ground appears to be covered with dirt and small rocks, and there are some larger rocks and a tree stump in the background. The lighting in the image suggests that it was taken during the daytime, with natural light illuminating the scene. The overall setting appears to be a well-maintained and naturalistic enclosure designed to mimic the panda's natural environment.
 
 
127
 
128
  prompt<<q
129
  [INFO]: 对话结束,再见。
130
  ```
131
 
132
- #### Inference with AX650 Host, such as M4N-Dock(爱芯派Pro) or AX650 DEMO Board
133
 
134
  Run the following command on the Axera board to start a chat conversation:
135
 
136
  ```sh
137
- $ python3 infer_axmodel_650.py -v ./fastvlm_C128_CTX1024_P640_ax650/image_encoder_512x512_0.5b_ax650.axmodel -m ./fastvlm_C128_CTX1024_P640_ax650 -t fastvlm_tokenizer -i 512
138
  ```
139
- output:
140
 
141
- ```bash
142
- [INFO] Available providers: ['AxEngineExecutionProvider', 'AXCLRTExecutionProvider']
 
143
  Loading config, tokenizer and init model.
144
  [INFO] Using provider: AxEngineExecutionProvider
145
- [INFO] Chip type: ChipType.MC50
146
  [INFO] VNPU type: VNPUType.DISABLED
147
- [INFO] Engine version: 2.12.0s
148
- [INFO] Model type: 2 (triple core)
149
  [INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
150
  Detected prefixes: ['llava_qwen2'], chosen: llava_qwen2, layers: 24
151
  Init InferenceSession: 0%| | 0/24 [00:00<?, ?it/s][INFO] Using provider: AxEngineExecutionProvider
152
- [INFO] Model type: 2 (triple core)
153
  [INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
 
 
 
154
  [INFO] Using provider: AxEngineExecutionProvider
155
- [INFO] Model type: 2 (triple core)
156
- [INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
157
- Init InferenceSession: 8%|█████████▌ | 2/24 [00:00<00:01, 17.39it/s][INFO] Using provider: AxEngineExecutionProvider
158
- [INFO] Model type: 2 (triple core)
159
  [INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
 
160
  [INFO] Using provider: AxEngineExecutionProvider
161
- ...
162
- Init InferenceSession: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 24.30it/s]
163
- [INFO] Using provider: AxEngineExecutionProvider
164
- [INFO] Model type: 2 (triple core)
165
  [INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
166
  Model loaded successfully!
167
  [INFO]: 输入文本进行对话,或者输入图片路径进行图片理解, 或者输入q退出对话。
168
  prompt<<who are you
169
  slice_indices: [0]
170
  Slice prefill done: 0
171
- answer >> I'm an AI language model, I don't have personal identity or a physical body. I exist solely as a digital entity designed to assist and provide information to users. I don't have a name or a personal identity, but I can provide information and answer questions based on my training data and algorithms. Is there something specific you would like to know about me?
172
 
173
  prompt<<./images/ssd_horse.jpg
174
  slice_indices: [0]
175
  Slice prefill done: 0
176
- answer >> The image depicts a person riding a brown horse with a white blaze on its face. The rider is wearing a gray hoodie and blue jeans, and is holding the reins of the horse. The horse is standing on a dirt ground with some grass and trees in the background.
177
-
178
- To the left of the horse, there is a brown dog sitting on the ground. The dog is looking up at the rider with its mouth open, as if it is begging or reacting to something.
179
 
180
- In the background, there is a gray pickup truck parked on the grass, and a person wearing a red shirt and blue jeans is standing near the truck. There is also a wooden fence and some trees in the background.
181
 
182
- The overall scene appears to be taking place in a rural or outdoor setting, possibly a farm or ranch.
183
 
184
  prompt<<./images/image_1.jpg
185
  slice_indices: [0]
186
  Slice prefill done: 0
187
- answer >> The image depicts a panda bear in a naturalistic enclosure, likely within a zoo or wildlife sanctuary. The panda is lying on its stomach with its head resting on its front paws, appearing relaxed and content. The panda's distinctive black and white fur is clearly visible, with the black fur covering its ears, eyes, and limbs, while the white fur covers its face, neck, and the underside of its body. The panda's black nose and mouth are also visible.
188
-
189
- The panda is surrounded by green foliage, including bamboo shoots and other plants, which are typical of a panda's natural habitat. In the background, there is a wooden structure that resembles a tree stump or a small tree, adding to the naturalistic setting. The ground is covered with dirt and leaves, further emphasizing the natural environment.
190
 
191
- The lighting in the image is natural, suggesting that the photo was taken during the day. The overall scene conveys a sense of tranquility and the panda's comfort in its environment.
192
 
193
  prompt<<q
194
  [INFO]: 对话结束,再见。
 
38
 
39
  |Chips|image encoder|ttft|w4a16|
40
  |--|--|--|--|
41
+ |AX650N| 59.83 ms (512x512)| 76.36 ms (100tokens)| 34.81 tokens/sec|
42
+ |AXCL x86| 51.80 ms (512x512)| 145.05 ms (93tokens)| 17.40 tokens/sec|
43
  |AX630C| 205.961 ms (512x512)| 489.013 ms (99tokens)| 11.67 tokens/sec|
44
 
45
 
 
50
  ```
51
  $tree -L 1
52
  .
53
+ ├── config.json
54
+ ├── embeds
55
+ ├── fastvlm_C128_CTX1024_P640_ax620e
56
+ ├── fastvlm_C128_CTX1024_P640_ax650
57
+ ├── fastvlm_tokenizer
58
+ ├── FastVLM_tokenizer.txt
59
+ ├── images
60
+ ├── infer_axmodel_620e.py
61
+ ├── infer_axmodel_650.py
62
+ ├── main_ax650
63
+ ├── main_ax650_api
64
+ ├── main_axcl_x86
65
+ ├── main_axcl_x86_api
66
+ ├── post_config.json
67
+ ├── README.md
68
+ ├── requirements.txt
69
+ ├── run_ax650_512.sh
70
+ ├── run_ax650_api.sh
71
+ ├── run_axcl_x86_api.sh
72
+ ├── run_axcl_x86.sh
73
+ └── utils
74
+
75
+ 7 directories, 15 files
76
  ```
77
 
78
  #### Install transformer
 
81
  pip install -r requirements.txt
82
  ```
83
 
84
+ #### Inference with AX650 Host, such as M4N-Dock(爱芯派Pro) or AX650 DEMO Board
85
 
86
  Run the following command on the Axera board to start a chat conversation:
87
 
88
+ ```
89
+ root@ax650:~/FastVLM-0.5B# ./run_ax650_512.sh
90
+ [I][ Init][ 134]: LLM init start
91
+ tokenizer_type = 3
92
+ stop_tokens size: 2
93
+ 151645
94
+ 151645
95
+ 7% | ███ | 2 / 27 [1.06s<14.26s, 1.89 count/s] embed_selector init ok
96
+ 100% | ████████████████████████████████ | 27 / 27 [2.35s<2.35s, 11.51 count/s] init post axmodel ok,remain_cmm(9222 MB)[I][ Init][ 252]: IMAGE_CONTEXT_TOKEN: 151646
97
+ [I][ Init][ 284]: image encoder input nhwc@uint8
98
+ [I][ Init][ 308]: image encoder output float32
99
+
100
+ [I][ Init][ 318]: image_encoder_height : 512, image_encoder_width: 512
101
+ [I][ Init][ 320]: max_token_len : 1024
102
+ [I][ Init][ 323]: kv_cache_size : 128, kv_cache_num: 1024
103
+ [I][ Init][ 331]: prefill_token_num : 128
104
+ [I][ Init][ 335]: grp: 1, prefill_max_token_num : 1
105
+ [I][ Init][ 335]: grp: 2, prefill_max_token_num : 128
106
+ [I][ Init][ 335]: grp: 3, prefill_max_token_num : 256
107
+ [I][ Init][ 335]: grp: 4, prefill_max_token_num : 512
108
+ [I][ Init][ 335]: grp: 5, prefill_max_token_num : 640
109
+ [I][ Init][ 339]: prefill_max_token_num : 640
110
+ [I][ load_config][ 282]: load config:
111
+ {
112
+ "enable_repetition_penalty": false,
113
+ "enable_temperature": true,
114
+ "enable_top_k_sampling": true,
115
+ "enable_top_p_sampling": false,
116
+ "penalty_window": 30,
117
+ "repetition_penalty": 2,
118
+ "temperature": 0.1,
119
+ "top_k": 10,
120
+ "top_p": 0.8
121
+ }
122
+
123
+ [I][ Init][ 348]: LLM init ok
124
+ Type "q" to exit, Ctrl+c to stop current running
125
+ prompt >> who are you
126
+ image >>
127
+ [I][ Encode][ 470]: input_ids size: 33
128
+ [I][ Run][ 604]: input token num : 33, prefill_split_num : 1
129
+ [I][ Run][ 619]: prefill grpid 2
130
+ [I][ Run][ 646]: input_num_token:33
131
+ [I][ Run][ 770]: ttft: 76.40 ms
132
+ I am a language model created by Apple Inc. I am designed to assist users in generating human-like text based on the input they provide. I can understand and generate text based on the context and the input provided by the user. I am not capable of generating human-like text, but I can generate text based on the context and the input provided by the user.
133
+
134
+ [N][ Run][ 879]: hit eos,avg 31.22 token/s
135
+
136
+ prompt >> describe the image.
137
+ image >> ./images/image_1.jpg
138
+ [I][ Encode][ 442]: image encode time : 59.83 ms, size : 57344
139
+ [I][ Encode][ 496]: imgs_embed.size() : 1, media token size : 64
140
+ [I][ Run][ 604]: input token num : 100, prefill_split_num : 1
141
+ [I][ Run][ 619]: prefill grpid 2
142
+ [I][ Run][ 646]: input_num_token:100
143
+ [I][ Run][ 770]: ttft: 76.36 ms
144
+ The image depicts a panda bear in a naturalistic enclosure, likely within a zoo or wildlife sanctuary. The panda is lying on its stomach with its head resting on a log, appearing relaxed and content. The panda's distinctive black and white fur is clearly visible, with its black ears, eyes, and nose contrasting against its white face and body. The enclosure is surrounded by greenery, including bamboo and other plants, which adds to the natural habitat feel of the scene. The panda appears to be in a comfortable and secure environment, with ample space to move around and interact with its surroundings.
145
+
146
+ [N][ Run][ 879]: hit eos,avg 31.30 token/s
147
+
148
+ prompt >> q
149
  ```
150
 
151
+ ```sh
152
+ $ python3 infer_axmodel_650.py -v ./fastvlm_C128_CTX1024_P640_ax650/image_encoder_512x512_0.5b_ax650.axmodel -m ./fastvlm_C128_CTX1024_P640_ax650 -t fastvlm_tokenizer -i 512
153
  ```
154
+ output:
155
+
156
+ ```bash
157
+ [INFO] Available providers: ['AxEngineExecutionProvider', 'AXCLRTExecutionProvider']
158
  Loading config, tokenizer and init model.
159
  [INFO] Using provider: AxEngineExecutionProvider
160
+ [INFO] Chip type: ChipType.MC50
161
  [INFO] VNPU type: VNPUType.DISABLED
162
+ [INFO] Engine version: 2.12.0s
163
+ [INFO] Model type: 2 (triple core)
164
  [INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
165
  Detected prefixes: ['llava_qwen2'], chosen: llava_qwen2, layers: 24
166
  Init InferenceSession: 0%| | 0/24 [00:00<?, ?it/s][INFO] Using provider: AxEngineExecutionProvider
167
+ [INFO] Model type: 2 (triple core)
168
  [INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
 
 
 
169
  [INFO] Using provider: AxEngineExecutionProvider
170
+ [INFO] Model type: 2 (triple core)
171
+ [INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
172
+ Init InferenceSession: 8%|█████████▌ | 2/24 [00:00<00:01, 17.39it/s][INFO] Using provider: AxEngineExecutionProvider
173
+ [INFO] Model type: 2 (triple core)
174
  [INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
 
175
  [INFO] Using provider: AxEngineExecutionProvider
176
+ ...
177
+ Init InferenceSession: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 24.30it/s]
178
+ [INFO] Using provider: AxEngineExecutionProvider
179
+ [INFO] Model type: 2 (triple core)
180
  [INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
181
  Model loaded successfully!
182
  [INFO]: 输入文本进行对话,或者输入图片路径进行图片理解, 或者输入q退出对话。
183
  prompt<<who are you
184
  slice_indices: [0]
185
  Slice prefill done: 0
186
+ answer >> I'm an AI language model, I don't have personal identity or a physical body. I exist solely as a digital entity designed to assist and provide information to users. I don't have a name or a personal identity, but I can provide information and answer questions based on my training data and algorithms. Is there something specific you would like to know about me?
187
 
188
  prompt<<./images/ssd_horse.jpg
189
  slice_indices: [0]
190
  Slice prefill done: 0
191
+ answer >> The image depicts a person riding a brown horse with a white blaze on its face. The rider is wearing a gray hoodie and blue jeans, and is holding the reins of the horse. The horse is standing on a dirt ground with some grass and trees in the background.
192
 
193
+ To the left of the horse, there is a brown dog sitting on the ground. The dog is looking up at the rider with its mouth open, as if it is begging or reacting to something.
194
 
195
+ In the background, there is a gray pickup truck parked on the grass, and a person wearing a red shirt and blue jeans is standing near the truck. There is also a wooden fence and some trees in the background.
196
+
197
+ The overall scene appears to be taking place in a rural or outdoor setting, possibly a farm or ranch.
198
 
199
  prompt<<./images/image_1.jpg
200
  slice_indices: [0]
201
  Slice prefill done: 0
202
+ answer >> The image depicts a panda bear in a naturalistic enclosure, likely within a zoo or wildlife sanctuary. The panda is lying on its stomach with its head resting on its front paws, appearing relaxed and content. The panda's distinctive black and white fur is clearly visible, with the black fur covering its ears, eyes, and limbs, while the white fur covers its face, neck, and the underside of its body. The panda's black nose and mouth are also visible.
203
 
204
+ The panda is surrounded by green foliage, including bamboo shoots and other plants, which are typical of a panda's natural habitat. In the background, there is a wooden structure that resembles a tree stump or a small tree, adding to the naturalistic setting. The ground is covered with dirt and leaves, further emphasizing the natural environment.
205
+
206
+ The lighting in the image is natural, suggesting that the photo was taken during the day. The overall scene conveys a sense of tranquility and the panda's comfort in its environment.
207
 
208
  prompt<<q
209
  [INFO]: 对话结束,再见。
210
  ```
211
 
212
+ #### Inference with AX630C Host
213
 
214
  Run the following command on the Axera board to start a chat conversation:
215
 
216
  ```sh
217
+ python3 infer_axmodel_620e.py -v ./fastvlm_C128_CTX512_P256_ax620e/image_encoder_512x512_ax620e.axmodel -m ./fastvlm_C128_CTX512_P256_ax620e -t fastvlm_tokenizer -i 512
218
  ```
 
219
 
220
+ output:
221
+ ```
222
+ [INFO] Available providers: ['AxEngineExecutionProvider']
223
  Loading config, tokenizer and init model.
224
  [INFO] Using provider: AxEngineExecutionProvider
225
+ [INFO] Chip type: ChipType.MC20E
226
  [INFO] VNPU type: VNPUType.DISABLED
227
+ [INFO] Engine version: 2.7.2a
228
+ [INFO] Model type: 1 (full core)
229
  [INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
230
  Detected prefixes: ['llava_qwen2'], chosen: llava_qwen2, layers: 24
231
  Init InferenceSession: 0%| | 0/24 [00:00<?, ?it/s][INFO] Using provider: AxEngineExecutionProvider
232
+ [INFO] Model type: 1 (full core)
233
  [INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
234
+ Init InferenceSession: 4%|████▊ | 1/24
235
+ [00:02<00:00, 9.25it/s]
236
+ ...
237
  [INFO] Using provider: AxEngineExecutionProvider
238
+ [INFO] Model type: 1 (full core)
 
 
 
239
  [INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
240
+ Init InferenceSession: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:02<00:00, 9.12it/s]
241
  [INFO] Using provider: AxEngineExecutionProvider
242
+ [INFO] Model type: 1 (full core)
 
 
 
243
  [INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
244
  Model loaded successfully!
245
  [INFO]: 输入文本进行对话,或者输入图片路径进行图片理解, 或者输入q退出对话。
246
  prompt<<who are you
247
  slice_indices: [0]
248
  Slice prefill done: 0
249
+ answer >> I'm an AI language model, I don't have personal identity or a physical body. I exist solely as a digital creation created by Apple. I don't have a name or a personal identity. I'm designed to assist and provide information to users. Is there anything else I can help you with?
250
 
251
  prompt<<./images/ssd_horse.jpg
252
  slice_indices: [0]
253
  Slice prefill done: 0
254
+ answer >> The image depicts a person riding a brown horse with a white blaze on its face. The rider is wearing a blue hoodie and blue jeans, and is holding the reins of the horse. The horse is standing on a dirt ground with some grass and trees in the background. The rider is also holding a rope in their left hand, which is attached to the horse's harness.
 
 
255
 
256
+ To the left of the horse, there is a brown dog standing on the ground, looking up at the rider. The dog appears to be in a begging or pleading position, with its front paws raised and its mouth open.
257
 
258
+ In the background, there is a gray pickup truck parked on the grass, and a wooden fence can be seen behind the horse and rider. There are also some people visible in the background, including a person in a red shirt and another person in a blue shirt. The overall scene appears to be taking place in an outdoor setting, possibly a ranch or a farm.
259
 
260
  prompt<<./images/image_1.jpg
261
  slice_indices: [0]
262
  Slice prefill done: 0
263
+ answer >> The image depicts a panda bear in a naturalistic enclosure, likely within a zoo or wildlife sanctuary. The panda is sitting on its hind legs, with its front paws resting on a wooden structure that resembles a tree stump. The panda's distinctive black and white fur is clearly visible, with the black fur covering its ears, eyes, and the area around its nose and mouth, while the white fur covers the rest of its body. The panda's black nose and the black fur around its mouth are also visible.
 
 
264
 
265
+ The panda is surrounded by green foliage, including bamboo shoots and other plants, which are typical of a panda's natural habitat. The ground appears to be covered with dirt and small rocks, and there are some larger rocks and a tree stump in the background. The lighting in the image suggests that it was taken during the daytime, with natural light illuminating the scene. The overall setting appears to be a well-maintained and naturalistic enclosure designed to mimic the panda's natural environment.
266
 
267
  prompt<<q
268
  [INFO]: 对话结束,再见。
embeds/model.embed_tokens.weight.bfloat16.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0403b365004db461375fe5d5daebcc61bab76b884844d84cbc0ba5820085e0e9
3
- size 272269312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bfa5dd1b4f87a9a1b42ffd26d17532d9631e0f299eee75f6d9a1ef5360ed7fb
3
+ size 271751424
embeds/model.embed_tokens.weight.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69ebc4dd0a736c34db773c86bc708eda4dbcc626dc7bee4d363c103a9bb6d100
3
- size 544538752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba20f7ac021ad9bea734ba51ee01de5331611ae6b45fc35562d0cb79bf0c810c
3
+ size 543502976
fastvlm_C128_CTX1024_P640_ax620e/image_encoder_512x512_ax620e.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f914c7bd4b237095742e87fbd1a0dc0974e3184bf23cd13f7194558e952effef
3
+ size 160413696
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l0_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92c64b87c5a790bfdbc1ed227c6e30c040a354fdf856f1d257b88a1d09ad4571
3
+ size 16712241
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l10_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cab7dc4d97e6a84957cf9d0989e50dc46fcdd4560df01014b46693c2109b81c
3
+ size 16712217
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l11_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11450a57a33a8e02ac3eefa60d06818ac8ed11f2bbcca25cf61d5d5557be8613
3
+ size 16712249
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l12_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbe3ec3a94528846e34c1ee61ec3b16eabbdc8ae2b437dd89a157a389e648bc8
3
+ size 16712225
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l13_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c2519cfefd3be7c7376b349a545a7b4c04a414df1a5cbddd89c2bded52f013e
3
+ size 16712321
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l14_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:642de73dd94e59615ef64994844eec30496dcd0ad66561124225a9fd7567ca3e
3
+ size 16712217
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l15_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89f5211bac18e4d8ddf825c9fd0f807dfbe498624c762193b605ddaec5bd6204
3
+ size 16712185
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l16_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05b12ad7080afefd87378d7191bdc47e92bd886f17657a62a8c5caac1adcc3e2
3
+ size 16712201
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l17_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:417da914dd2b59daca236e0c68210387eae42deba91835735d8937a3d8efa072
3
+ size 16712313
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l18_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e35e10cd6a0cea16e1afe3fc16a47a05864c991bb0ded00797cdd5a9f87e84ff
3
+ size 16712225
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l19_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32799a8281e5b64600a31445a703250d4beecb91a69188c19e64254cb3e4074b
3
+ size 16712209
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l1_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ff1aa08b5f05457633936fce8650400fb7ad74ad9aba5711003ded4d27ee4c4
3
+ size 16712225
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l20_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2e60874879ae0c7ab60ee12eb72caaab4b4c04579d0d465374e2f3dd03af80
3
+ size 16712233
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l21_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:590964a4aa9fa629fad9708756909964c92071e91bcc481521be26e7ff77a037
3
+ size 16712241
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l22_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fcb9deefbab7821ed686403ac328d6a3176864b38b5edc7e128a25d0836b49b
3
+ size 16712265
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l23_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98d90727215c176e12d9d939975ffeb01c5b8463ca658083dfedae36d43eba6d
3
+ size 16712209
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l2_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c460ec06d43433bc8c2481fe6294dd7d02533744c5bd89383b25641d2c50840
3
+ size 16712305
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l3_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36e16af33f7936e81030130c796584f9863849d4fb2686aae96b8412f321050d
3
+ size 16712241
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l4_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a1f020f4187d283ff134a7cbfdb2ff1cd066eef2030e8fe09def8b36d655f8b
3
+ size 16712233
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l5_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d2f1967ed1305f6aa0e1ffc9fc9ed6d900d5446dede465a4de0d111519dccf1
3
+ size 16712193
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l6_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c54336601b83dcda22185799fff0459e8c4a2d43eadde4cfcc8093537c7c43e6
3
+ size 16712233
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l7_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a926793428c6cca65a2878071570fdbad3710bcc3e6a4b12560797d4131bdcc
3
+ size 16712217
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l8_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81137ad98e6d26b39059705e4e3bf5380b612e579f4c41452211f34c317f7899
3
+ size 16712241
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l9_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82c2af786a8cbf7b3e1eeebd6127e91b0aff705c3080e2aa3774f05a0810f700
3
+ size 16712289
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_post.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:391aff25ad252109d286cb1b393429ae0738ec1107c2fb997897351cb2e4bd0c
3
+ size 139682851
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l0_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11a3483817e1ce66cc95c612619a1a1e60e4bbc77a012617b0b845cde0268a11
3
- size 17508925
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e23b62154e6d3d0a1312666632dd25cbf79c0dceafbf27795dd09aaa1d57d36
3
+ size 18243005
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l10_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7a88c0a139e09a37ed079166a152c6208707c84645745f2c7fed31a77c74d29
3
- size 17508933
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0bbf3a03b783592fa8f20747acacfd144db977ac04ca5731871a37213e2ff4f
3
+ size 18243013
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l11_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e1060f24145f4c2f773c5ed9cbcfa04e9dd2b59f62be3772287e9f515ad2a43
3
- size 17508933
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0aa90dedd23c615109c92409eca7ee7217faa4e2814c35fd99be735fe0965cb5
3
+ size 18243013
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l12_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17147af14d42c305172ee511c69c33281c354dd746261a2cb6d1df326eff9868
3
- size 17508933
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db480a73fa9093f469ac1f71388f7ec7232e1f5de1956b209c2eed692200de5b
3
+ size 18243013
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l13_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd76a3f9d764935b7f94439762ffda93a92911dc813522076eea2c850942632b
3
- size 17508933
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93ff168d4042051532fbb52263eb7d6ca47fee2188cec02c7f433d5226c268c2
3
+ size 18243013
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l14_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:404e37e37f43285f04d55e3b68586dd2cab0b675c5658c62ce16ad677b920cf1
3
- size 17508933
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71abc53e667625e33feff9742576bcf8d18bf060e148f39d7a8aae48de8f44c6
3
+ size 18243013
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l15_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8e1fd1fb7de4aa9c859543c7f07fd410bbdcd0deb6a20375f3f47024ed44cef
3
- size 17508933
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ed18ae52df489551b0419ea855e46445e1723b5a3c089c278c193de9d0cac3a
3
+ size 18243013
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l16_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3020712849ef594d4104aa1402442cbeb7f334f56ef9b4b8b3e063f345cbfe3
3
- size 17508933
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4ede5b86577c975ae12d6860b9943a0c0970a108dfd5b9d357eb34073356693
3
+ size 18243013
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l17_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:128d8e4ea65ebedd56123097d4cc635dd0958840084b33625d06ec7b21e13355
3
- size 17508933
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45b870d4485abfd0c57350e5b87eb4b215025c2d69a3e5620061aa7e3e1e871a
3
+ size 18243013
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l18_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f0e61614437be65baff7c20c430eb9fde34cf85f91ef8dc7a489715cf2ae70f
3
- size 17508933
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fa5072204d85a50f1115e3d8de609f33d9b9e792a4ea2a0c9e572b02b986242
3
+ size 18243013
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l19_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e92084c4a8117981ba9b7f5da0086c1bad38c4a7e8b87d4ba9193883f329b4b5
3
- size 17508933
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07b011ce1b1c94dc2f03643d7fc086736f9ad76b9755081b43eb2b7532526e83
3
+ size 18243013
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l1_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2867a766cdba85a8e925d96d67757193be73e38267a653d8c78ebc7a3d1af5dd
3
- size 17508925
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd07df17677123fe11b3ede1409791c86b21f3ed6fb088fcf7e1981583172ee8
3
+ size 18243005
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l20_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c846e6bb008b448a4922297eed660f111780e3a0719f695100dec1564b4e23b2
3
- size 17508933
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20648131068b6fd7be58699f6b34191ba7f6b0bf2b5622234e09fc1e713ca955
3
+ size 18243013
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l21_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc450fbd546cffe53250b9e74454faf9e452ec4412a11ec07e3b1857e7d64c70
3
- size 17508933
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9806631129785f025cff5147abae93ae11b02fee39d987c4e3e387ff438e4238
3
+ size 18243013
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l22_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3595909c86c5706da8d49c9b87616d033ea1070b1ac08ab7802b63409fbeda99
3
- size 17508933
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:473eccc3e07a4f3bcfe79a2fc55f17650472e09c2b22f3bd60d3700e30bb4340
3
+ size 18243013
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l23_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f91903d8a7313f1ce17cbc43cec71e4d72d0e358d893ce8da6f99b71622b641a
3
- size 17508933
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd4ad9014b4bda50eb37d7481871d6c1a0dadf3e677a52514d368f33d56b31f7
3
+ size 18243013