Upload c++ demo
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +31 -0
- .infer_axmodel_650.py.swn +0 -0
- .infer_axmodel_650.py.swo +0 -0
- .infer_axmodel_650.py.swp +0 -0
- FastVLM_tokenizer.txt +0 -0
- README.md +136 -62
- embeds/model.embed_tokens.weight.bfloat16.bin +2 -2
- embeds/model.embed_tokens.weight.npy +2 -2
- fastvlm_C128_CTX1024_P640_ax620e/image_encoder_512x512_ax620e.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l0_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l10_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l11_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l12_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l13_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l14_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l15_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l16_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l17_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l18_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l19_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l1_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l20_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l21_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l22_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l23_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l2_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l3_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l4_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l5_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l6_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l7_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l8_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l9_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_post.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l0_together.axmodel +2 -2
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l10_together.axmodel +2 -2
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l11_together.axmodel +2 -2
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l12_together.axmodel +2 -2
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l13_together.axmodel +2 -2
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l14_together.axmodel +2 -2
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l15_together.axmodel +2 -2
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l16_together.axmodel +2 -2
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l17_together.axmodel +2 -2
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l18_together.axmodel +2 -2
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l19_together.axmodel +2 -2
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l1_together.axmodel +2 -2
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l20_together.axmodel +2 -2
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l21_together.axmodel +2 -2
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l22_together.axmodel +2 -2
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l23_together.axmodel +2 -2
.gitattributes
CHANGED
|
@@ -87,3 +87,34 @@ fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l9_together.axmodel filter=lfs
|
|
| 87 |
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_post.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 88 |
images/image_1.jpg filter=lfs diff=lfs merge=lfs -text
|
| 89 |
images/ssd_horse.jpg filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_post.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 88 |
images/image_1.jpg filter=lfs diff=lfs merge=lfs -text
|
| 89 |
images/ssd_horse.jpg filter=lfs diff=lfs merge=lfs -text
|
| 90 |
+
fastvlm_C128_CTX1024_P640_ax620e/image_encoder_512x512_ax620e.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 91 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l0_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 92 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l10_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l11_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 94 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l12_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 95 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l13_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 96 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l14_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 97 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l15_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 98 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l16_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 99 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l17_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 100 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l18_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 101 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l19_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 102 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l1_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 103 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l20_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 104 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l21_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 105 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l22_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 106 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l23_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 107 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l2_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 108 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l3_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 109 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l4_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 110 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l5_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 111 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l6_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 112 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l7_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 113 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l8_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 114 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l9_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 115 |
+
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_post.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 116 |
+
fastvlm_tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 117 |
+
main_ax650 filter=lfs diff=lfs merge=lfs -text
|
| 118 |
+
main_ax650_api filter=lfs diff=lfs merge=lfs -text
|
| 119 |
+
main_axcl_x86 filter=lfs diff=lfs merge=lfs -text
|
| 120 |
+
main_axcl_x86_api filter=lfs diff=lfs merge=lfs -text
|
.infer_axmodel_650.py.swn
ADDED
|
Binary file (16.4 kB). View file
|
|
|
.infer_axmodel_650.py.swo
ADDED
|
Binary file (20.5 kB). View file
|
|
|
.infer_axmodel_650.py.swp
ADDED
|
Binary file (20.5 kB). View file
|
|
|
FastVLM_tokenizer.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
README.md
CHANGED
|
@@ -38,7 +38,8 @@ How to Convert LLM from Huggingface to axmodel[TODO]
|
|
| 38 |
|
| 39 |
|Chips|image encoder|ttft|w4a16|
|
| 40 |
|--|--|--|--|
|
| 41 |
-
|AX650N|
|
|
|
|
| 42 |
|AX630C| 205.961 ms (512x512)| 489.013 ms (99tokens)| 11.67 tokens/sec|
|
| 43 |
|
| 44 |
|
|
@@ -49,19 +50,29 @@ Download all files from this repository to the device
|
|
| 49 |
```
|
| 50 |
$tree -L 1
|
| 51 |
.
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
```
|
| 66 |
|
| 67 |
#### Install transformer
|
|
@@ -70,125 +81,188 @@ $tree -L 1
|
|
| 70 |
pip install -r requirements.txt
|
| 71 |
```
|
| 72 |
|
| 73 |
-
#### Inference with
|
| 74 |
|
| 75 |
Run the following command on the Axera board to start a chat conversation:
|
| 76 |
|
| 77 |
-
```
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
```
|
| 80 |
|
| 81 |
-
|
|
|
|
| 82 |
```
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
| 84 |
Loading config, tokenizer and init model.
|
| 85 |
[INFO] Using provider: AxEngineExecutionProvider
|
| 86 |
-
[INFO] Chip type: ChipType.
|
| 87 |
[INFO] VNPU type: VNPUType.DISABLED
|
| 88 |
-
[INFO] Engine version: 2.
|
| 89 |
-
[INFO] Model type:
|
| 90 |
[INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
|
| 91 |
Detected prefixes: ['llava_qwen2'], chosen: llava_qwen2, layers: 24
|
| 92 |
Init InferenceSession: 0%| | 0/24 [00:00<?, ?it/s][INFO] Using provider: AxEngineExecutionProvider
|
| 93 |
-
[INFO] Model type:
|
| 94 |
[INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
|
| 95 |
-
Init InferenceSession: 4%|████▊ | 1/24
|
| 96 |
-
[00:02<00:00, 9.25it/s]
|
| 97 |
-
...
|
| 98 |
[INFO] Using provider: AxEngineExecutionProvider
|
| 99 |
-
[INFO] Model type:
|
|
|
|
|
|
|
|
|
|
| 100 |
[INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
|
| 101 |
-
Init InferenceSession: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:02<00:00, 9.12it/s]
|
| 102 |
[INFO] Using provider: AxEngineExecutionProvider
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
| 104 |
[INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
|
| 105 |
Model loaded successfully!
|
| 106 |
[INFO]: 输入文本进行对话,或者输入图片路径进行图片理解, 或者输入q退出对话。
|
| 107 |
prompt<<who are you
|
| 108 |
slice_indices: [0]
|
| 109 |
Slice prefill done: 0
|
| 110 |
-
answer >> I'm an AI language model, I don't have personal identity or a physical body. I exist solely as a digital
|
| 111 |
|
| 112 |
prompt<<./images/ssd_horse.jpg
|
| 113 |
slice_indices: [0]
|
| 114 |
Slice prefill done: 0
|
| 115 |
-
answer >> The image depicts a person riding a brown horse with a white blaze on its face. The rider is wearing a
|
| 116 |
|
| 117 |
-
To the left of the horse, there is a brown dog
|
| 118 |
|
| 119 |
-
In the background, there is a gray pickup truck parked on the grass, and a
|
|
|
|
|
|
|
| 120 |
|
| 121 |
prompt<<./images/image_1.jpg
|
| 122 |
slice_indices: [0]
|
| 123 |
Slice prefill done: 0
|
| 124 |
-
answer >> The image depicts a panda bear in a naturalistic enclosure, likely within a zoo or wildlife sanctuary. The panda is
|
| 125 |
|
| 126 |
-
The panda is surrounded by green foliage, including bamboo shoots and other plants, which are typical of a panda's natural habitat.
|
|
|
|
|
|
|
| 127 |
|
| 128 |
prompt<<q
|
| 129 |
[INFO]: 对话结束,再见。
|
| 130 |
```
|
| 131 |
|
| 132 |
-
#### Inference with
|
| 133 |
|
| 134 |
Run the following command on the Axera board to start a chat conversation:
|
| 135 |
|
| 136 |
```sh
|
| 137 |
-
|
| 138 |
```
|
| 139 |
-
output:
|
| 140 |
|
| 141 |
-
|
| 142 |
-
|
|
|
|
| 143 |
Loading config, tokenizer and init model.
|
| 144 |
[INFO] Using provider: AxEngineExecutionProvider
|
| 145 |
-
[INFO] Chip type: ChipType.
|
| 146 |
[INFO] VNPU type: VNPUType.DISABLED
|
| 147 |
-
[INFO] Engine version: 2.
|
| 148 |
-
[INFO] Model type:
|
| 149 |
[INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
|
| 150 |
Detected prefixes: ['llava_qwen2'], chosen: llava_qwen2, layers: 24
|
| 151 |
Init InferenceSession: 0%| | 0/24 [00:00<?, ?it/s][INFO] Using provider: AxEngineExecutionProvider
|
| 152 |
-
[INFO] Model type:
|
| 153 |
[INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
|
|
|
|
|
|
|
|
|
|
| 154 |
[INFO] Using provider: AxEngineExecutionProvider
|
| 155 |
-
[INFO] Model type:
|
| 156 |
-
[INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
|
| 157 |
-
Init InferenceSession: 8%|█████████▌ | 2/24 [00:00<00:01, 17.39it/s][INFO] Using provider: AxEngineExecutionProvider
|
| 158 |
-
[INFO] Model type: 2 (triple core)
|
| 159 |
[INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
|
|
|
|
| 160 |
[INFO] Using provider: AxEngineExecutionProvider
|
| 161 |
-
|
| 162 |
-
Init InferenceSession: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 24.30it/s]
|
| 163 |
-
[INFO] Using provider: AxEngineExecutionProvider
|
| 164 |
-
[INFO] Model type: 2 (triple core)
|
| 165 |
[INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
|
| 166 |
Model loaded successfully!
|
| 167 |
[INFO]: 输入文本进行对话,或者输入图片路径进行图片理解, 或者输入q退出对话。
|
| 168 |
prompt<<who are you
|
| 169 |
slice_indices: [0]
|
| 170 |
Slice prefill done: 0
|
| 171 |
-
answer >> I'm an AI language model, I don't have personal identity or a physical body. I exist solely as a digital
|
| 172 |
|
| 173 |
prompt<<./images/ssd_horse.jpg
|
| 174 |
slice_indices: [0]
|
| 175 |
Slice prefill done: 0
|
| 176 |
-
answer >> The image depicts a person riding a brown horse with a white blaze on its face. The rider is wearing a
|
| 177 |
-
|
| 178 |
-
To the left of the horse, there is a brown dog sitting on the ground. The dog is looking up at the rider with its mouth open, as if it is begging or reacting to something.
|
| 179 |
|
| 180 |
-
|
| 181 |
|
| 182 |
-
The overall scene appears to be taking place in
|
| 183 |
|
| 184 |
prompt<<./images/image_1.jpg
|
| 185 |
slice_indices: [0]
|
| 186 |
Slice prefill done: 0
|
| 187 |
-
answer >> The image depicts a panda bear in a naturalistic enclosure, likely within a zoo or wildlife sanctuary. The panda is
|
| 188 |
-
|
| 189 |
-
The panda is surrounded by green foliage, including bamboo shoots and other plants, which are typical of a panda's natural habitat. In the background, there is a wooden structure that resembles a tree stump or a small tree, adding to the naturalistic setting. The ground is covered with dirt and leaves, further emphasizing the natural environment.
|
| 190 |
|
| 191 |
-
The
|
| 192 |
|
| 193 |
prompt<<q
|
| 194 |
[INFO]: 对话结束,再见。
|
|
|
|
| 38 |
|
| 39 |
|Chips|image encoder|ttft|w4a16|
|
| 40 |
|--|--|--|--|
|
| 41 |
+
|AX650N| 59.83 ms (512x512)| 76.36 ms (100tokens)| 34.81 tokens/sec|
|
| 42 |
+
|AXCL x86| 51.80 ms (512x512)| 145.05 ms (93tokens)| 17.40 tokens/sec|
|
| 43 |
|AX630C| 205.961 ms (512x512)| 489.013 ms (99tokens)| 11.67 tokens/sec|
|
| 44 |
|
| 45 |
|
|
|
|
| 50 |
```
|
| 51 |
$tree -L 1
|
| 52 |
.
|
| 53 |
+
├── config.json
|
| 54 |
+
├── embeds
|
| 55 |
+
├── fastvlm_C128_CTX1024_P640_ax620e
|
| 56 |
+
├── fastvlm_C128_CTX1024_P640_ax650
|
| 57 |
+
├── fastvlm_tokenizer
|
| 58 |
+
├── FastVLM_tokenizer.txt
|
| 59 |
+
├── images
|
| 60 |
+
├── infer_axmodel_620e.py
|
| 61 |
+
├── infer_axmodel_650.py
|
| 62 |
+
├── main_ax650
|
| 63 |
+
├── main_ax650_api
|
| 64 |
+
├── main_axcl_x86
|
| 65 |
+
├── main_axcl_x86_api
|
| 66 |
+
├── post_config.json
|
| 67 |
+
├── README.md
|
| 68 |
+
├── requirements.txt
|
| 69 |
+
├── run_ax650_512.sh
|
| 70 |
+
├── run_ax650_api.sh
|
| 71 |
+
├── run_axcl_x86_api.sh
|
| 72 |
+
├── run_axcl_x86.sh
|
| 73 |
+
└── utils
|
| 74 |
+
|
| 75 |
+
7 directories, 15 files
|
| 76 |
```
|
| 77 |
|
| 78 |
#### Install transformer
|
|
|
|
| 81 |
pip install -r requirements.txt
|
| 82 |
```
|
| 83 |
|
| 84 |
+
#### Inference with AX650 Host, such as M4N-Dock(爱芯派Pro) or AX650 DEMO Board
|
| 85 |
|
| 86 |
Run the following command on the Axera board to start a chat conversation:
|
| 87 |
|
| 88 |
+
```
|
| 89 |
+
root@ax650:~/FastVLM-0.5B# ./run_ax650_512.sh
|
| 90 |
+
[I][ Init][ 134]: LLM init start
|
| 91 |
+
tokenizer_type = 3
|
| 92 |
+
stop_tokens size: 2
|
| 93 |
+
151645
|
| 94 |
+
151645
|
| 95 |
+
7% | ███ | 2 / 27 [1.06s<14.26s, 1.89 count/s] embed_selector init ok
|
| 96 |
+
100% | ████████████████████████████████ | 27 / 27 [2.35s<2.35s, 11.51 count/s] init post axmodel ok,remain_cmm(9222 MB)[I][ Init][ 252]: IMAGE_CONTEXT_TOKEN: 151646
|
| 97 |
+
[I][ Init][ 284]: image encoder input nhwc@uint8
|
| 98 |
+
[I][ Init][ 308]: image encoder output float32
|
| 99 |
+
|
| 100 |
+
[I][ Init][ 318]: image_encoder_height : 512, image_encoder_width: 512
|
| 101 |
+
[I][ Init][ 320]: max_token_len : 1024
|
| 102 |
+
[I][ Init][ 323]: kv_cache_size : 128, kv_cache_num: 1024
|
| 103 |
+
[I][ Init][ 331]: prefill_token_num : 128
|
| 104 |
+
[I][ Init][ 335]: grp: 1, prefill_max_token_num : 1
|
| 105 |
+
[I][ Init][ 335]: grp: 2, prefill_max_token_num : 128
|
| 106 |
+
[I][ Init][ 335]: grp: 3, prefill_max_token_num : 256
|
| 107 |
+
[I][ Init][ 335]: grp: 4, prefill_max_token_num : 512
|
| 108 |
+
[I][ Init][ 335]: grp: 5, prefill_max_token_num : 640
|
| 109 |
+
[I][ Init][ 339]: prefill_max_token_num : 640
|
| 110 |
+
[I][ load_config][ 282]: load config:
|
| 111 |
+
{
|
| 112 |
+
"enable_repetition_penalty": false,
|
| 113 |
+
"enable_temperature": true,
|
| 114 |
+
"enable_top_k_sampling": true,
|
| 115 |
+
"enable_top_p_sampling": false,
|
| 116 |
+
"penalty_window": 30,
|
| 117 |
+
"repetition_penalty": 2,
|
| 118 |
+
"temperature": 0.1,
|
| 119 |
+
"top_k": 10,
|
| 120 |
+
"top_p": 0.8
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
[I][ Init][ 348]: LLM init ok
|
| 124 |
+
Type "q" to exit, Ctrl+c to stop current running
|
| 125 |
+
prompt >> who are you
|
| 126 |
+
image >>
|
| 127 |
+
[I][ Encode][ 470]: input_ids size: 33
|
| 128 |
+
[I][ Run][ 604]: input token num : 33, prefill_split_num : 1
|
| 129 |
+
[I][ Run][ 619]: prefill grpid 2
|
| 130 |
+
[I][ Run][ 646]: input_num_token:33
|
| 131 |
+
[I][ Run][ 770]: ttft: 76.40 ms
|
| 132 |
+
I am a language model created by Apple Inc. I am designed to assist users in generating human-like text based on the input they provide. I can understand and generate text based on the context and the input provided by the user. I am not capable of generating human-like text, but I can generate text based on the context and the input provided by the user.
|
| 133 |
+
|
| 134 |
+
[N][ Run][ 879]: hit eos,avg 31.22 token/s
|
| 135 |
+
|
| 136 |
+
prompt >> describe the image.
|
| 137 |
+
image >> ./images/image_1.jpg
|
| 138 |
+
[I][ Encode][ 442]: image encode time : 59.83 ms, size : 57344
|
| 139 |
+
[I][ Encode][ 496]: imgs_embed.size() : 1, media token size : 64
|
| 140 |
+
[I][ Run][ 604]: input token num : 100, prefill_split_num : 1
|
| 141 |
+
[I][ Run][ 619]: prefill grpid 2
|
| 142 |
+
[I][ Run][ 646]: input_num_token:100
|
| 143 |
+
[I][ Run][ 770]: ttft: 76.36 ms
|
| 144 |
+
The image depicts a panda bear in a naturalistic enclosure, likely within a zoo or wildlife sanctuary. The panda is lying on its stomach with its head resting on a log, appearing relaxed and content. The panda's distinctive black and white fur is clearly visible, with its black ears, eyes, and nose contrasting against its white face and body. The enclosure is surrounded by greenery, including bamboo and other plants, which adds to the natural habitat feel of the scene. The panda appears to be in a comfortable and secure environment, with ample space to move around and interact with its surroundings.
|
| 145 |
+
|
| 146 |
+
[N][ Run][ 879]: hit eos,avg 31.30 token/s
|
| 147 |
+
|
| 148 |
+
prompt >> q
|
| 149 |
```
|
| 150 |
|
| 151 |
+
```sh
|
| 152 |
+
$ python3 infer_axmodel_650.py -v ./fastvlm_C128_CTX1024_P640_ax650/image_encoder_512x512_0.5b_ax650.axmodel -m ./fastvlm_C128_CTX1024_P640_ax650 -t fastvlm_tokenizer -i 512
|
| 153 |
```
|
| 154 |
+
output:
|
| 155 |
+
|
| 156 |
+
```bash
|
| 157 |
+
[INFO] Available providers: ['AxEngineExecutionProvider', 'AXCLRTExecutionProvider']
|
| 158 |
Loading config, tokenizer and init model.
|
| 159 |
[INFO] Using provider: AxEngineExecutionProvider
|
| 160 |
+
[INFO] Chip type: ChipType.MC50
|
| 161 |
[INFO] VNPU type: VNPUType.DISABLED
|
| 162 |
+
[INFO] Engine version: 2.12.0s
|
| 163 |
+
[INFO] Model type: 2 (triple core)
|
| 164 |
[INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
|
| 165 |
Detected prefixes: ['llava_qwen2'], chosen: llava_qwen2, layers: 24
|
| 166 |
Init InferenceSession: 0%| | 0/24 [00:00<?, ?it/s][INFO] Using provider: AxEngineExecutionProvider
|
| 167 |
+
[INFO] Model type: 2 (triple core)
|
| 168 |
[INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
|
|
|
|
|
|
|
|
|
|
| 169 |
[INFO] Using provider: AxEngineExecutionProvider
|
| 170 |
+
[INFO] Model type: 2 (triple core)
|
| 171 |
+
[INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
|
| 172 |
+
Init InferenceSession: 8%|█████████▌ | 2/24 [00:00<00:01, 17.39it/s][INFO] Using provider: AxEngineExecutionProvider
|
| 173 |
+
[INFO] Model type: 2 (triple core)
|
| 174 |
[INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
|
|
|
|
| 175 |
[INFO] Using provider: AxEngineExecutionProvider
|
| 176 |
+
...
|
| 177 |
+
Init InferenceSession: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 24.30it/s]
|
| 178 |
+
[INFO] Using provider: AxEngineExecutionProvider
|
| 179 |
+
[INFO] Model type: 2 (triple core)
|
| 180 |
[INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
|
| 181 |
Model loaded successfully!
|
| 182 |
[INFO]: 输入文本进行对话,或者输入图片路径进行图片理解, 或者输入q退出对话。
|
| 183 |
prompt<<who are you
|
| 184 |
slice_indices: [0]
|
| 185 |
Slice prefill done: 0
|
| 186 |
+
answer >> I'm an AI language model, I don't have personal identity or a physical body. I exist solely as a digital entity designed to assist and provide information to users. I don't have a name or a personal identity, but I can provide information and answer questions based on my training data and algorithms. Is there something specific you would like to know about me?
|
| 187 |
|
| 188 |
prompt<<./images/ssd_horse.jpg
|
| 189 |
slice_indices: [0]
|
| 190 |
Slice prefill done: 0
|
| 191 |
+
answer >> The image depicts a person riding a brown horse with a white blaze on its face. The rider is wearing a gray hoodie and blue jeans, and is holding the reins of the horse. The horse is standing on a dirt ground with some grass and trees in the background.
|
| 192 |
|
| 193 |
+
To the left of the horse, there is a brown dog sitting on the ground. The dog is looking up at the rider with its mouth open, as if it is begging or reacting to something.
|
| 194 |
|
| 195 |
+
In the background, there is a gray pickup truck parked on the grass, and a person wearing a red shirt and blue jeans is standing near the truck. There is also a wooden fence and some trees in the background.
|
| 196 |
+
|
| 197 |
+
The overall scene appears to be taking place in a rural or outdoor setting, possibly a farm or ranch.
|
| 198 |
|
| 199 |
prompt<<./images/image_1.jpg
|
| 200 |
slice_indices: [0]
|
| 201 |
Slice prefill done: 0
|
| 202 |
+
answer >> The image depicts a panda bear in a naturalistic enclosure, likely within a zoo or wildlife sanctuary. The panda is lying on its stomach with its head resting on its front paws, appearing relaxed and content. The panda's distinctive black and white fur is clearly visible, with the black fur covering its ears, eyes, and limbs, while the white fur covers its face, neck, and the underside of its body. The panda's black nose and mouth are also visible.
|
| 203 |
|
| 204 |
+
The panda is surrounded by green foliage, including bamboo shoots and other plants, which are typical of a panda's natural habitat. In the background, there is a wooden structure that resembles a tree stump or a small tree, adding to the naturalistic setting. The ground is covered with dirt and leaves, further emphasizing the natural environment.
|
| 205 |
+
|
| 206 |
+
The lighting in the image is natural, suggesting that the photo was taken during the day. The overall scene conveys a sense of tranquility and the panda's comfort in its environment.
|
| 207 |
|
| 208 |
prompt<<q
|
| 209 |
[INFO]: 对话结束,再见。
|
| 210 |
```
|
| 211 |
|
| 212 |
+
#### Inference with AX630C Host
|
| 213 |
|
| 214 |
Run the following command on the Axera board to start a chat conversation:
|
| 215 |
|
| 216 |
```sh
|
| 217 |
+
python3 infer_axmodel_620e.py -v ./fastvlm_C128_CTX512_P256_ax620e/image_encoder_512x512_ax620e.axmodel -m ./fastvlm_C128_CTX512_P256_ax620e -t fastvlm_tokenizer -i 512
|
| 218 |
```
|
|
|
|
| 219 |
|
| 220 |
+
output:
|
| 221 |
+
```
|
| 222 |
+
[INFO] Available providers: ['AxEngineExecutionProvider']
|
| 223 |
Loading config, tokenizer and init model.
|
| 224 |
[INFO] Using provider: AxEngineExecutionProvider
|
| 225 |
+
[INFO] Chip type: ChipType.MC20E
|
| 226 |
[INFO] VNPU type: VNPUType.DISABLED
|
| 227 |
+
[INFO] Engine version: 2.7.2a
|
| 228 |
+
[INFO] Model type: 1 (full core)
|
| 229 |
[INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
|
| 230 |
Detected prefixes: ['llava_qwen2'], chosen: llava_qwen2, layers: 24
|
| 231 |
Init InferenceSession: 0%| | 0/24 [00:00<?, ?it/s][INFO] Using provider: AxEngineExecutionProvider
|
| 232 |
+
[INFO] Model type: 1 (full core)
|
| 233 |
[INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
|
| 234 |
+
Init InferenceSession: 4%|████▊ | 1/24
|
| 235 |
+
[00:02<00:00, 9.25it/s]
|
| 236 |
+
...
|
| 237 |
[INFO] Using provider: AxEngineExecutionProvider
|
| 238 |
+
[INFO] Model type: 1 (full core)
|
|
|
|
|
|
|
|
|
|
| 239 |
[INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
|
| 240 |
+
Init InferenceSession: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:02<00:00, 9.12it/s]
|
| 241 |
[INFO] Using provider: AxEngineExecutionProvider
|
| 242 |
+
[INFO] Model type: 1 (full core)
|
|
|
|
|
|
|
|
|
|
| 243 |
[INFO] Compiler version: 5.1-patch1-dirty 0a5b164f-dirty
|
| 244 |
Model loaded successfully!
|
| 245 |
[INFO]: 输入文本进行对话,或者输入图片路径进行图片理解, 或者输入q退出对话。
|
| 246 |
prompt<<who are you
|
| 247 |
slice_indices: [0]
|
| 248 |
Slice prefill done: 0
|
| 249 |
+
answer >> I'm an AI language model, I don't have personal identity or a physical body. I exist solely as a digital creation created by Apple. I don't have a name or a personal identity. I'm designed to assist and provide information to users. Is there anything else I can help you with?
|
| 250 |
|
| 251 |
prompt<<./images/ssd_horse.jpg
|
| 252 |
slice_indices: [0]
|
| 253 |
Slice prefill done: 0
|
| 254 |
+
answer >> The image depicts a person riding a brown horse with a white blaze on its face. The rider is wearing a blue hoodie and blue jeans, and is holding the reins of the horse. The horse is standing on a dirt ground with some grass and trees in the background. The rider is also holding a rope in their left hand, which is attached to the horse's harness.
|
|
|
|
|
|
|
| 255 |
|
| 256 |
+
To the left of the horse, there is a brown dog standing on the ground, looking up at the rider. The dog appears to be in a begging or pleading position, with its front paws raised and its mouth open.
|
| 257 |
|
| 258 |
+
In the background, there is a gray pickup truck parked on the grass, and a wooden fence can be seen behind the horse and rider. There are also some people visible in the background, including a person in a red shirt and another person in a blue shirt. The overall scene appears to be taking place in an outdoor setting, possibly a ranch or a farm.
|
| 259 |
|
| 260 |
prompt<<./images/image_1.jpg
|
| 261 |
slice_indices: [0]
|
| 262 |
Slice prefill done: 0
|
| 263 |
+
answer >> The image depicts a panda bear in a naturalistic enclosure, likely within a zoo or wildlife sanctuary. The panda is sitting on its hind legs, with its front paws resting on a wooden structure that resembles a tree stump. The panda's distinctive black and white fur is clearly visible, with the black fur covering its ears, eyes, and the area around its nose and mouth, while the white fur covers the rest of its body. The panda's black nose and the black fur around its mouth are also visible.
|
|
|
|
|
|
|
| 264 |
|
| 265 |
+
The panda is surrounded by green foliage, including bamboo shoots and other plants, which are typical of a panda's natural habitat. The ground appears to be covered with dirt and small rocks, and there are some larger rocks and a tree stump in the background. The lighting in the image suggests that it was taken during the daytime, with natural light illuminating the scene. The overall setting appears to be a well-maintained and naturalistic enclosure designed to mimic the panda's natural environment.
|
| 266 |
|
| 267 |
prompt<<q
|
| 268 |
[INFO]: 对话结束,再见。
|
embeds/model.embed_tokens.weight.bfloat16.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4bfa5dd1b4f87a9a1b42ffd26d17532d9631e0f299eee75f6d9a1ef5360ed7fb
|
| 3 |
+
size 271751424
|
embeds/model.embed_tokens.weight.npy
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba20f7ac021ad9bea734ba51ee01de5331611ae6b45fc35562d0cb79bf0c810c
|
| 3 |
+
size 543502976
|
fastvlm_C128_CTX1024_P640_ax620e/image_encoder_512x512_ax620e.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f914c7bd4b237095742e87fbd1a0dc0974e3184bf23cd13f7194558e952effef
|
| 3 |
+
size 160413696
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l0_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92c64b87c5a790bfdbc1ed227c6e30c040a354fdf856f1d257b88a1d09ad4571
|
| 3 |
+
size 16712241
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l10_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9cab7dc4d97e6a84957cf9d0989e50dc46fcdd4560df01014b46693c2109b81c
|
| 3 |
+
size 16712217
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l11_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11450a57a33a8e02ac3eefa60d06818ac8ed11f2bbcca25cf61d5d5557be8613
|
| 3 |
+
size 16712249
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l12_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dbe3ec3a94528846e34c1ee61ec3b16eabbdc8ae2b437dd89a157a389e648bc8
|
| 3 |
+
size 16712225
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l13_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c2519cfefd3be7c7376b349a545a7b4c04a414df1a5cbddd89c2bded52f013e
|
| 3 |
+
size 16712321
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l14_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:642de73dd94e59615ef64994844eec30496dcd0ad66561124225a9fd7567ca3e
|
| 3 |
+
size 16712217
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l15_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89f5211bac18e4d8ddf825c9fd0f807dfbe498624c762193b605ddaec5bd6204
|
| 3 |
+
size 16712185
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l16_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05b12ad7080afefd87378d7191bdc47e92bd886f17657a62a8c5caac1adcc3e2
|
| 3 |
+
size 16712201
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l17_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:417da914dd2b59daca236e0c68210387eae42deba91835735d8937a3d8efa072
|
| 3 |
+
size 16712313
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l18_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e35e10cd6a0cea16e1afe3fc16a47a05864c991bb0ded00797cdd5a9f87e84ff
|
| 3 |
+
size 16712225
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l19_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:32799a8281e5b64600a31445a703250d4beecb91a69188c19e64254cb3e4074b
|
| 3 |
+
size 16712209
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l1_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ff1aa08b5f05457633936fce8650400fb7ad74ad9aba5711003ded4d27ee4c4
|
| 3 |
+
size 16712225
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l20_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a2e60874879ae0c7ab60ee12eb72caaab4b4c04579d0d465374e2f3dd03af80
|
| 3 |
+
size 16712233
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l21_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:590964a4aa9fa629fad9708756909964c92071e91bcc481521be26e7ff77a037
|
| 3 |
+
size 16712241
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l22_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2fcb9deefbab7821ed686403ac328d6a3176864b38b5edc7e128a25d0836b49b
|
| 3 |
+
size 16712265
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l23_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:98d90727215c176e12d9d939975ffeb01c5b8463ca658083dfedae36d43eba6d
|
| 3 |
+
size 16712209
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l2_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c460ec06d43433bc8c2481fe6294dd7d02533744c5bd89383b25641d2c50840
|
| 3 |
+
size 16712305
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l3_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:36e16af33f7936e81030130c796584f9863849d4fb2686aae96b8412f321050d
|
| 3 |
+
size 16712241
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l4_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a1f020f4187d283ff134a7cbfdb2ff1cd066eef2030e8fe09def8b36d655f8b
|
| 3 |
+
size 16712233
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l5_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d2f1967ed1305f6aa0e1ffc9fc9ed6d900d5446dede465a4de0d111519dccf1
|
| 3 |
+
size 16712193
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l6_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c54336601b83dcda22185799fff0459e8c4a2d43eadde4cfcc8093537c7c43e6
|
| 3 |
+
size 16712233
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l7_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a926793428c6cca65a2878071570fdbad3710bcc3e6a4b12560797d4131bdcc
|
| 3 |
+
size 16712217
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l8_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81137ad98e6d26b39059705e4e3bf5380b612e579f4c41452211f34c317f7899
|
| 3 |
+
size 16712241
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_p128_l9_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:82c2af786a8cbf7b3e1eeebd6127e91b0aff705c3080e2aa3774f05a0810f700
|
| 3 |
+
size 16712289
|
fastvlm_C128_CTX1024_P640_ax620e/llava_qwen2_post.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:391aff25ad252109d286cb1b393429ae0738ec1107c2fb997897351cb2e4bd0c
|
| 3 |
+
size 139682851
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l0_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e23b62154e6d3d0a1312666632dd25cbf79c0dceafbf27795dd09aaa1d57d36
|
| 3 |
+
size 18243005
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l10_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f0bbf3a03b783592fa8f20747acacfd144db977ac04ca5731871a37213e2ff4f
|
| 3 |
+
size 18243013
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l11_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0aa90dedd23c615109c92409eca7ee7217faa4e2814c35fd99be735fe0965cb5
|
| 3 |
+
size 18243013
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l12_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db480a73fa9093f469ac1f71388f7ec7232e1f5de1956b209c2eed692200de5b
|
| 3 |
+
size 18243013
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l13_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:93ff168d4042051532fbb52263eb7d6ca47fee2188cec02c7f433d5226c268c2
|
| 3 |
+
size 18243013
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l14_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:71abc53e667625e33feff9742576bcf8d18bf060e148f39d7a8aae48de8f44c6
|
| 3 |
+
size 18243013
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l15_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ed18ae52df489551b0419ea855e46445e1723b5a3c089c278c193de9d0cac3a
|
| 3 |
+
size 18243013
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l16_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4ede5b86577c975ae12d6860b9943a0c0970a108dfd5b9d357eb34073356693
|
| 3 |
+
size 18243013
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l17_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45b870d4485abfd0c57350e5b87eb4b215025c2d69a3e5620061aa7e3e1e871a
|
| 3 |
+
size 18243013
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l18_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3fa5072204d85a50f1115e3d8de609f33d9b9e792a4ea2a0c9e572b02b986242
|
| 3 |
+
size 18243013
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l19_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07b011ce1b1c94dc2f03643d7fc086736f9ad76b9755081b43eb2b7532526e83
|
| 3 |
+
size 18243013
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l1_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd07df17677123fe11b3ede1409791c86b21f3ed6fb088fcf7e1981583172ee8
|
| 3 |
+
size 18243005
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l20_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20648131068b6fd7be58699f6b34191ba7f6b0bf2b5622234e09fc1e713ca955
|
| 3 |
+
size 18243013
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l21_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9806631129785f025cff5147abae93ae11b02fee39d987c4e3e387ff438e4238
|
| 3 |
+
size 18243013
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l22_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:473eccc3e07a4f3bcfe79a2fc55f17650472e09c2b22f3bd60d3700e30bb4340
|
| 3 |
+
size 18243013
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l23_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd4ad9014b4bda50eb37d7481871d6c1a0dadf3e677a52514d368f33d56b31f7
|
| 3 |
+
size 18243013
|