Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +54 -0
- README.md +122 -3
- config.json +0 -0
- embeds/model.embed_tokens.weight.bfloat16.bin +3 -0
- embeds/model.embed_tokens.weight.npy +3 -0
- fastvlm_C128_CTX1024_P640_ax650/image_encoder_512x512_0.5b_ax650.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l0_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l10_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l11_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l12_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l13_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l14_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l15_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l16_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l17_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l18_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l19_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l1_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l20_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l21_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l22_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l23_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l2_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l3_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l4_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l5_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l6_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l7_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l8_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l9_together.axmodel +3 -0
- fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_post.axmodel +3 -0
- fastvlm_C128_CTX512_P256_ax620e/image_encoder_512x512_ax620e.axmodel +3 -0
- fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l0_together.axmodel +3 -0
- fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l10_together.axmodel +3 -0
- fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l11_together.axmodel +3 -0
- fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l12_together.axmodel +3 -0
- fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l13_together.axmodel +3 -0
- fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l14_together.axmodel +3 -0
- fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l15_together.axmodel +3 -0
- fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l16_together.axmodel +3 -0
- fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l17_together.axmodel +3 -0
- fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l18_together.axmodel +3 -0
- fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l19_together.axmodel +3 -0
- fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l1_together.axmodel +3 -0
- fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l20_together.axmodel +3 -0
- fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l21_together.axmodel +3 -0
- fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l22_together.axmodel +3 -0
- fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l23_together.axmodel +3 -0
- fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l2_together.axmodel +3 -0
- fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l3_together.axmodel +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,57 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
fastvlm_C128_CTX1024_P640_ax650/image_encoder_512x512_0.5b_ax650.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l0_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l10_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l11_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l12_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l13_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l14_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l15_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l16_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l17_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l18_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l19_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l1_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l20_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l21_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l22_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l23_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l2_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l3_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l4_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l5_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l6_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l7_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l8_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l9_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_post.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
fastvlm_C128_CTX512_P256_ax620e/image_encoder_512x512_ax620e.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l0_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l10_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l11_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l12_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l13_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l14_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l15_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l16_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l17_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l18_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l19_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l1_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l20_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l21_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l22_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l23_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l2_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l3_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l4_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l5_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l6_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l7_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l8_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l9_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 87 |
+
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_post.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 88 |
+
images/image_1.jpg filter=lfs diff=lfs merge=lfs -text
|
| 89 |
+
images/ssd_horse.jpg filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
|
@@ -1,3 +1,122 @@
|
|
| 1 |
-
---
|
| 2 |
-
license:
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: apache-2.0
|
| 3 |
+
language:
|
| 4 |
+
- en
|
| 5 |
+
base_model:
|
| 6 |
+
- apple/FastVLM-1.5B
|
| 7 |
+
pipeline_tag: image-text-to-text
|
| 8 |
+
tags:
|
| 9 |
+
- vlm
|
| 10 |
+
- en
|
| 11 |
+
---
|
| 12 |
+
# FastVLM-1.5B-GPTQ-Int4
|
| 13 |
+
|
| 14 |
+
This version of FastVLM-1.5B-GPTQ-Int4 has been converted to run on the Axera NPU using **w4a16** quantization.
|
| 15 |
+
|
| 16 |
+
This model has been optimized with the following LoRA:
|
| 17 |
+
|
| 18 |
+
Compatible with Pulsar2 version: 5.1-patch1.
|
| 19 |
+
|
| 20 |
+
Please note that the context of the model is 1k and the maximum prefill length is 640 tokens.
|
| 21 |
+
|
| 22 |
+
## Convert tools links:
|
| 23 |
+
|
| 24 |
+
For those who are interested in model conversion, you can try to quant and export axmodel through the original repo:
|
| 25 |
+
|
| 26 |
+
https://huggingface.co/apple/FastVLM-1.5B
|
| 27 |
+
|
| 28 |
+
How to Convert LLM from Huggingface to axmodel[TODO]
|
| 29 |
+
|
| 30 |
+
## Support Platform
|
| 31 |
+
|
| 32 |
+
- AX650
|
| 33 |
+
- AX650N DEMO Board
|
| 34 |
+
- [M4N-Dock(爱芯派Pro)](https://wiki.sipeed.com/hardware/zh/maixIV/m4ndock/m4ndock.html)
|
| 35 |
+
- [M.2 Accelerator card](https://axcl-docs.readthedocs.io/zh-cn/latest/doc_guide_hardware.html)
|
| 36 |
+
|
| 37 |
+
|Chips|image encoder|ttft|w4a16|
|
| 38 |
+
|--|--|--|--|
|
| 39 |
+
|AX650| 216.257 ms (1024x1024)| 709.455 ms (291tokens)| 21.38 tokens/sec|
|
| 40 |
+
|AX650| 44.747 ms (512x512)| 167.543 ms (99tokens)| 21.38 tokens/sec|
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
## How to use
|
| 44 |
+
|
| 45 |
+
Download all files from this repository to the device
|
| 46 |
+
|
| 47 |
+
```
|
| 48 |
+
$ tree -L 1
|
| 49 |
+
.
|
| 50 |
+
├── config.json
|
| 51 |
+
├── fastvlm_ax650_context_1k_prefill_640_int4
|
| 52 |
+
├── fastvlm_tokenizer
|
| 53 |
+
├── images
|
| 54 |
+
├── infer_axmodel.py
|
| 55 |
+
├── README.md
|
| 56 |
+
├── requirements.txt
|
| 57 |
+
└── utils
|
| 58 |
+
|
| 59 |
+
5 directories, 4 files
|
| 60 |
+
```
|
| 61 |
+
|
| 62 |
+
#### Install transformer
|
| 63 |
+
|
| 64 |
+
```
|
| 65 |
+
pip install -r requirements.txt
|
| 66 |
+
```
|
| 67 |
+
|
| 68 |
+
#### Inference with AX650 Host, such as M4N-Dock(爱芯派Pro) or AX650 DEMO Board
|
| 69 |
+
|
| 70 |
+
Run the following command on the Axera board to start a chat conversation:
|
| 71 |
+
|
| 72 |
+
```sh
|
| 73 |
+
$ python infer_axmodel.py -v ./fastvlm_ax650_context_1k_prefill_640_int4/image_encoder_512x512.axmodel -m ./fastvlm_ax650_context_1k_prefill_640_int4 -t ./fastvlm_tokenizer/ -i 512
|
| 74 |
+
```
|
| 75 |
+
output:
|
| 76 |
+
|
| 77 |
+
```bash
|
| 78 |
+
[INFO] Available providers: ['AXCLRTExecutionProvider']
|
| 79 |
+
Loading config, tokenizer and init model.
|
| 80 |
+
Detected prefixes: ['llava_qwen2'], chosen: llava_qwen2, layers: 28
|
| 81 |
+
Init InferenceSession: 0%| | 0/28 [00:00<?, ?it/s][INFO] Using provider: AXCLRTExecutionProvider
|
| 82 |
+
[INFO] SOC Name: AX650N
|
| 83 |
+
[INFO] VNPU type: VNPUType.DISABLED
|
| 84 |
+
[INFO] Compiler version: 5.1-patch1-dirty 140e8d4a-dirty
|
| 85 |
+
Init InferenceSession: 4%|████ | 1/28 [00:00<00:20, 1.31it/s][INFO] Using provider: AXCLRTExecutionProvider
|
| 86 |
+
[INFO] SOC Name: AX650N
|
| 87 |
+
[INFO] VNPU type: VNPUType.DISABLED
|
| 88 |
+
[INFO] Compiler version: 5.1-patch1-dirty 140e8d4a-dirty
|
| 89 |
+
Init InferenceSession: 7%|████████▏ | 2/28 [00:01<00:14, 1.85it/s][INFO] Using provider: AXCLRTExecutionProvider
|
| 90 |
+
...
|
| 91 |
+
[INFO] SOC Name: AX650N
|
| 92 |
+
[INFO] VNPU type: VNPUType.DISABLED
|
| 93 |
+
[INFO] Compiler version: 5.1-patch1-dirty 140e8d4a-dirty
|
| 94 |
+
Init InferenceSession: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:11<00:00, 2.48it/s]
|
| 95 |
+
[INFO] Using provider: AXCLRTExecutionProvider
|
| 96 |
+
[INFO] SOC Name: AX650N
|
| 97 |
+
[INFO] VNPU type: VNPUType.DISABLED
|
| 98 |
+
[INFO] Compiler version: 5.1-patch1-dirty 140e8d4a-dirty
|
| 99 |
+
Model loaded successfully!
|
| 100 |
+
[INFO] Using provider: AXCLRTExecutionProvider
|
| 101 |
+
[INFO] SOC Name: AX650N
|
| 102 |
+
[INFO] VNPU type: VNPUType.DISABLED
|
| 103 |
+
[INFO] Compiler version: 5.1-patch1-dirty 140e8d4a-dirty
|
| 104 |
+
[INFO]: 输入文本进行对话,或者输入图片路径进行图片理解, 或者输入q退出对话。
|
| 105 |
+
prompt<<who are you
|
| 106 |
+
slice_indices: [0]
|
| 107 |
+
Slice prefill done: 0
|
| 108 |
+
answer >> I am an artificial intelligence language model developed by Apple Inc.
|
| 109 |
+
|
| 110 |
+
prompt<<./images/ssd_horse.jpg
|
| 111 |
+
slice_indices: [0]
|
| 112 |
+
Slice prefill done: 0
|
| 113 |
+
answer >> The image depicts a young man riding a brown horse in an outdoor setting. The horse is standing on a dirt ground, and the rider is wearing a blue jacket and jeans. The horse has a white blaze on its face and white markings on its legs. The rider is holding the reins and appears to be looking down at a brown dog standing on the ground next to the horse. The dog is wearing a collar and is looking up at the rider. In the background, there is a silver pickup truck parked on the grass, and a fence can be seen further back. The scene appears to be set in a rural or farm-like environment.
|
| 114 |
+
|
| 115 |
+
prompt<<./images/image_1.jpg
|
| 116 |
+
slice_indices: [0]
|
| 117 |
+
Slice prefill done: 0
|
| 118 |
+
answer >> The image depicts a panda bear in a natural setting. The panda is sitting on the ground, surrounded by greenery, including bamboo and other plants. The panda has a distinctive black and white fur pattern, with black fur around its eyes, ears, and limbs, and white fur on its face, chest, and legs. The panda is sitting on its hind legs, with its front paws resting on its chest. The background shows a forested area with trees and rocks, suggesting that the panda is in its natural habitat. The panda appears to be looking directly at the camera, giving the impression that it is aware of the photographer's presence. The overall scene is peaceful and serene, capturing the beauty of the panda in its natural environment.
|
| 119 |
+
|
| 120 |
+
prompt<<q
|
| 121 |
+
[INFO]: 对话结束,再见。
|
| 122 |
+
```
|
config.json
ADDED
|
File without changes
|
embeds/model.embed_tokens.weight.bfloat16.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0403b365004db461375fe5d5daebcc61bab76b884844d84cbc0ba5820085e0e9
|
| 3 |
+
size 272269312
|
embeds/model.embed_tokens.weight.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69ebc4dd0a736c34db773c86bc708eda4dbcc626dc7bee4d363c103a9bb6d100
|
| 3 |
+
size 544538752
|
fastvlm_C128_CTX1024_P640_ax650/image_encoder_512x512_0.5b_ax650.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f34c58c5a72ed871c066fdf3f756d0ff7451b347a389fe65fb95dc3b855f78cc
|
| 3 |
+
size 166944692
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l0_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11a3483817e1ce66cc95c612619a1a1e60e4bbc77a012617b0b845cde0268a11
|
| 3 |
+
size 17508925
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l10_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7a88c0a139e09a37ed079166a152c6208707c84645745f2c7fed31a77c74d29
|
| 3 |
+
size 17508933
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l11_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e1060f24145f4c2f773c5ed9cbcfa04e9dd2b59f62be3772287e9f515ad2a43
|
| 3 |
+
size 17508933
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l12_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:17147af14d42c305172ee511c69c33281c354dd746261a2cb6d1df326eff9868
|
| 3 |
+
size 17508933
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l13_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd76a3f9d764935b7f94439762ffda93a92911dc813522076eea2c850942632b
|
| 3 |
+
size 17508933
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l14_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:404e37e37f43285f04d55e3b68586dd2cab0b675c5658c62ce16ad677b920cf1
|
| 3 |
+
size 17508933
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l15_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c8e1fd1fb7de4aa9c859543c7f07fd410bbdcd0deb6a20375f3f47024ed44cef
|
| 3 |
+
size 17508933
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l16_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d3020712849ef594d4104aa1402442cbeb7f334f56ef9b4b8b3e063f345cbfe3
|
| 3 |
+
size 17508933
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l17_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:128d8e4ea65ebedd56123097d4cc635dd0958840084b33625d06ec7b21e13355
|
| 3 |
+
size 17508933
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l18_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f0e61614437be65baff7c20c430eb9fde34cf85f91ef8dc7a489715cf2ae70f
|
| 3 |
+
size 17508933
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l19_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e92084c4a8117981ba9b7f5da0086c1bad38c4a7e8b87d4ba9193883f329b4b5
|
| 3 |
+
size 17508933
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l1_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2867a766cdba85a8e925d96d67757193be73e38267a653d8c78ebc7a3d1af5dd
|
| 3 |
+
size 17508925
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l20_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c846e6bb008b448a4922297eed660f111780e3a0719f695100dec1564b4e23b2
|
| 3 |
+
size 17508933
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l21_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fc450fbd546cffe53250b9e74454faf9e452ec4412a11ec07e3b1857e7d64c70
|
| 3 |
+
size 17508933
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l22_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3595909c86c5706da8d49c9b87616d033ea1070b1ac08ab7802b63409fbeda99
|
| 3 |
+
size 17508933
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l23_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f91903d8a7313f1ce17cbc43cec71e4d72d0e358d893ce8da6f99b71622b641a
|
| 3 |
+
size 17508933
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l2_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f378b553323c4db55df629f68ad11791c40de63c199f3ea6143dd8ca1f292e3e
|
| 3 |
+
size 17508925
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l3_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9f3dbf1f8a761c344c1a070cf62993e49863adc3c8caae728e8d785f956a736
|
| 3 |
+
size 17508925
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l4_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3ed78969a0e0c4ebe22d2facb1fba3e283e42e8a65f74a6fb9dbbe79ca4f79e
|
| 3 |
+
size 17508925
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l5_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c1026c141630bd0f78bc720e571d4539af8127c8bbfc529fc47c86c0c7b4741a
|
| 3 |
+
size 17508925
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l6_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66839f48faa65fc63031077dc48ae517df47a11f4c9710f6d13ddf270ddbd362
|
| 3 |
+
size 17508925
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l7_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e1e27d371a089c2096ad46cb3c1c2ea16f9e0acfd39803d67ced7e5aa0b49b2
|
| 3 |
+
size 17508925
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l8_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:de8f6a89047054f53284f7b5d676911595419e1b342331daac0078a73cd8ac48
|
| 3 |
+
size 17508925
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_p128_l9_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bfd6b609c48c5fbe5b26a18cd5d317130ed0acaacc368a8b7976ef5ddfb66163
|
| 3 |
+
size 17508925
|
fastvlm_C128_CTX1024_P640_ax650/llava_qwen2_post.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b44eaddaac62ea186e7d937d4e6fb7e016b401d5591a02c01251c315863a52a6
|
| 3 |
+
size 147954866
|
fastvlm_C128_CTX512_P256_ax620e/image_encoder_512x512_ax620e.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f914c7bd4b237095742e87fbd1a0dc0974e3184bf23cd13f7194558e952effef
|
| 3 |
+
size 160413696
|
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l0_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03dfabd7b77ba8e97726f5b12142c3442a678d4d7e57d1bd75f336b526a8c497
|
| 3 |
+
size 16118251
|
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l10_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fc3f65cd014fda072a40bc61a23058b1dd8febf8895bdde2a934390490793123
|
| 3 |
+
size 16118227
|
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l11_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0242b2f125da46c4d71436e8aeccc3f68e86cf8a1aa448209c3e7f70ce786d2
|
| 3 |
+
size 16118211
|
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l12_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1d8611aedf218536d9c0b975c93619a24ba2a2805c5bc15ee255c4a0e89fedc
|
| 3 |
+
size 16118251
|
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l13_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f604cdcd2786455d25a3e8d650c967ce7d3437c6d03880d805e461e7fd2286c
|
| 3 |
+
size 16118243
|
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l14_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a63a2973ad20d234590e668e33b7553f46166130ce577eebe1d41e3efc9dabd7
|
| 3 |
+
size 16118227
|
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l15_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:821127902db3a2e3d81d347d8be3dc4032e28d42422a61a3264184b932e169df
|
| 3 |
+
size 16118219
|
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l16_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed9b12094a6da5a0dbef64b61938572ff8704791d74288ff6805f729cbd545e1
|
| 3 |
+
size 16118219
|
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l17_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1b6dacf37bb79f0a3149dc12514663659d1f32125b193b939f1df146fc91646
|
| 3 |
+
size 16118259
|
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l18_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5cd38397fadcc19a334b96d2d76ba88c7e4a26ddef27f81b5c75781402bcc1c6
|
| 3 |
+
size 16118259
|
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l19_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b0973654205d0f682a8f933bdefd6fbebaa492901e6348cba6ac25dfad8cbe6
|
| 3 |
+
size 16118211
|
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l1_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4db15f236f59bf9ff8e25d25c191a006473a583a94f4023f7c5f9f6692c11e82
|
| 3 |
+
size 16118219
|
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l20_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7242a5c70574237c769777e25a503c391424de68bb858e2961e177c71fccec64
|
| 3 |
+
size 16118251
|
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l21_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b32e5f52374869a2c339c950b0e38bc03a0ce0e7f781b77f2eb4b7b6b6965168
|
| 3 |
+
size 16118243
|
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l22_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51a15fe270a2fd924b7a17b49fb1e9706965bdcbd46fb62816d0fe5b89dd9c6a
|
| 3 |
+
size 16118235
|
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l23_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b37a589e9532d1c4546201b53d41335797a1741de9f7da956ad011f28ed1f7c
|
| 3 |
+
size 16118259
|
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l2_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b885eaed8e0afe0c7c03c8337c96c6e15919c221ece8a4feb1fef69aba518161
|
| 3 |
+
size 16118267
|
fastvlm_C128_CTX512_P256_ax620e/llava_qwen2_p128_l3_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c95c7d82ad09172f1f1763217773519fbfcf0e289502cedbd93da65623d970d9
|
| 3 |
+
size 16118243
|