Upload 13 files
Browse files- .gitattributes +5 -0
- convert_gen_head.py +52 -0
- convert_gen_img_embeds.py +52 -0
- convert_image_decode.py +52 -0
- convert_lm_head.py +52 -0
- convert_vision_encoder.py +53 -0
- embed_tokens.onnx +3 -0
- gen_head.rknn +3 -0
- gen_img_embeds.rknn +3 -0
- image_decode.rknn +3 -0
- lm_head.rknn +3 -0
- rkllm-convert.py +23 -0
- run_rkllm.py +308 -0
- vision_encoder.rknn +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
gen_head.rknn filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
gen_img_embeds.rknn filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
image_decode.rknn filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
lm_head.rknn filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
vision_encoder.rknn filter=lfs diff=lfs merge=lfs -text
|
convert_gen_head.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# ztu_somemodelruntime_rknn2: gen_head
|
| 3 |
+
|
| 4 |
+
from rknn.api import RKNN
|
| 5 |
+
import os
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
def main():
|
| 9 |
+
# 创建RKNN实例
|
| 10 |
+
rknn = RKNN(verbose=True)
|
| 11 |
+
|
| 12 |
+
# ONNX模型路径
|
| 13 |
+
ONNX_MODEL = "gen_head.onnx"
|
| 14 |
+
# 输出RKNN模型路径
|
| 15 |
+
RKNN_MODEL = "gen_head.rknn"
|
| 16 |
+
|
| 17 |
+
# 配置参数
|
| 18 |
+
print("--> Config model")
|
| 19 |
+
ret = rknn.config(target_platform="rk3588",
|
| 20 |
+
dynamic_input=None)
|
| 21 |
+
if ret != 0:
|
| 22 |
+
print('Config model failed!')
|
| 23 |
+
exit(ret)
|
| 24 |
+
|
| 25 |
+
# 加载ONNX模型
|
| 26 |
+
print("--> Loading model")
|
| 27 |
+
ret = rknn.load_onnx(model=ONNX_MODEL,
|
| 28 |
+
inputs=['hidden_states'],
|
| 29 |
+
input_size_list=[[1, 1, 2048]])
|
| 30 |
+
if ret != 0:
|
| 31 |
+
print('Load model failed!')
|
| 32 |
+
exit(ret)
|
| 33 |
+
|
| 34 |
+
# 构建模型
|
| 35 |
+
print("--> Building model")
|
| 36 |
+
ret = rknn.build(do_quantization=False)
|
| 37 |
+
if ret != 0:
|
| 38 |
+
print('Build model failed!')
|
| 39 |
+
exit(ret)
|
| 40 |
+
|
| 41 |
+
# 导出RKNN模型
|
| 42 |
+
print("--> Export RKNN model")
|
| 43 |
+
ret = rknn.export_rknn(RKNN_MODEL)
|
| 44 |
+
if ret != 0:
|
| 45 |
+
print('Export RKNN model failed!')
|
| 46 |
+
exit(ret)
|
| 47 |
+
|
| 48 |
+
print(f'Done! The converted RKNN model has been saved to: ' + RKNN_MODEL)
|
| 49 |
+
rknn.release()
|
| 50 |
+
|
| 51 |
+
if __name__ == '__main__':
|
| 52 |
+
main()
|
convert_gen_img_embeds.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# ztu_somemodelruntime_rknn2: gen_img_embeds
|
| 3 |
+
|
| 4 |
+
from rknn.api import RKNN
|
| 5 |
+
import os
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
def main():
|
| 9 |
+
# 创建RKNN实例
|
| 10 |
+
rknn = RKNN(verbose=True)
|
| 11 |
+
|
| 12 |
+
# ONNX模型路径
|
| 13 |
+
ONNX_MODEL = "gen_img_embeds.onnx"
|
| 14 |
+
# 输出RKNN模型路径
|
| 15 |
+
RKNN_MODEL = "gen_img_embeds.rknn"
|
| 16 |
+
|
| 17 |
+
# 配置参数
|
| 18 |
+
print("--> Config model")
|
| 19 |
+
ret = rknn.config(target_platform="rk3588",
|
| 20 |
+
dynamic_input=None)
|
| 21 |
+
if ret != 0:
|
| 22 |
+
print('Config model failed!')
|
| 23 |
+
exit(ret)
|
| 24 |
+
|
| 25 |
+
# 加载ONNX模型
|
| 26 |
+
print("--> Loading model")
|
| 27 |
+
ret = rknn.load_onnx(model=ONNX_MODEL,
|
| 28 |
+
inputs=['image_ids'],
|
| 29 |
+
input_size_list=[[1, 1]])
|
| 30 |
+
if ret != 0:
|
| 31 |
+
print('Load model failed!')
|
| 32 |
+
exit(ret)
|
| 33 |
+
|
| 34 |
+
# 构建模型
|
| 35 |
+
print("--> Building model")
|
| 36 |
+
ret = rknn.build(do_quantization=False)
|
| 37 |
+
if ret != 0:
|
| 38 |
+
print('Build model failed!')
|
| 39 |
+
exit(ret)
|
| 40 |
+
|
| 41 |
+
# 导出RKNN模型
|
| 42 |
+
print("--> Export RKNN model")
|
| 43 |
+
ret = rknn.export_rknn(RKNN_MODEL)
|
| 44 |
+
if ret != 0:
|
| 45 |
+
print('Export RKNN model failed!')
|
| 46 |
+
exit(ret)
|
| 47 |
+
|
| 48 |
+
print(f'Done! The converted RKNN model has been saved to: ' + RKNN_MODEL)
|
| 49 |
+
rknn.release()
|
| 50 |
+
|
| 51 |
+
if __name__ == '__main__':
|
| 52 |
+
main()
|
convert_image_decode.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# ztu_somemodelruntime_rknn2: image_decode
|
| 3 |
+
|
| 4 |
+
from rknn.api import RKNN
|
| 5 |
+
import os
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
def main():
|
| 9 |
+
# 创建RKNN实例
|
| 10 |
+
rknn = RKNN(verbose=True)
|
| 11 |
+
|
| 12 |
+
# ONNX模型路径
|
| 13 |
+
ONNX_MODEL = "image_decode.onnx"
|
| 14 |
+
# 输出RKNN模型路径
|
| 15 |
+
RKNN_MODEL = "image_decode.rknn"
|
| 16 |
+
|
| 17 |
+
# 配置参数
|
| 18 |
+
print("--> Config model")
|
| 19 |
+
ret = rknn.config(target_platform="rk3588",
|
| 20 |
+
dynamic_input=None)
|
| 21 |
+
if ret != 0:
|
| 22 |
+
print('Config model failed!')
|
| 23 |
+
exit(ret)
|
| 24 |
+
|
| 25 |
+
# 加载ONNX模型
|
| 26 |
+
print("--> Loading model")
|
| 27 |
+
ret = rknn.load_onnx(model=ONNX_MODEL,
|
| 28 |
+
inputs=['generated_tokens'],
|
| 29 |
+
input_size_list=[[1, 576]])
|
| 30 |
+
if ret != 0:
|
| 31 |
+
print('Load model failed!')
|
| 32 |
+
exit(ret)
|
| 33 |
+
|
| 34 |
+
# 构建模型
|
| 35 |
+
print("--> Building model")
|
| 36 |
+
ret = rknn.build(do_quantization=False)
|
| 37 |
+
if ret != 0:
|
| 38 |
+
print('Build model failed!')
|
| 39 |
+
exit(ret)
|
| 40 |
+
|
| 41 |
+
# 导出RKNN模型
|
| 42 |
+
print("--> Export RKNN model")
|
| 43 |
+
ret = rknn.export_rknn(RKNN_MODEL)
|
| 44 |
+
if ret != 0:
|
| 45 |
+
print('Export RKNN model failed!')
|
| 46 |
+
exit(ret)
|
| 47 |
+
|
| 48 |
+
print(f'Done! The converted RKNN model has been saved to: ' + RKNN_MODEL)
|
| 49 |
+
rknn.release()
|
| 50 |
+
|
| 51 |
+
if __name__ == '__main__':
|
| 52 |
+
main()
|
convert_lm_head.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# ztu_somemodelruntime_rknn2: lm_head
|
| 3 |
+
|
| 4 |
+
from rknn.api import RKNN
|
| 5 |
+
import os
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
def main():
|
| 9 |
+
# 创建RKNN实例
|
| 10 |
+
rknn = RKNN(verbose=True)
|
| 11 |
+
|
| 12 |
+
# ONNX模型路径
|
| 13 |
+
ONNX_MODEL = "lm_head.onnx"
|
| 14 |
+
# 输出RKNN模型路径
|
| 15 |
+
RKNN_MODEL = "lm_head.rknn"
|
| 16 |
+
|
| 17 |
+
# 配置参数
|
| 18 |
+
print("--> Config model")
|
| 19 |
+
ret = rknn.config(target_platform="rk3588",
|
| 20 |
+
dynamic_input=None)
|
| 21 |
+
if ret != 0:
|
| 22 |
+
print('Config model failed!')
|
| 23 |
+
exit(ret)
|
| 24 |
+
|
| 25 |
+
# 加载ONNX模型
|
| 26 |
+
print("--> Loading model")
|
| 27 |
+
ret = rknn.load_onnx(model=ONNX_MODEL,
|
| 28 |
+
inputs=['hidden_states'],
|
| 29 |
+
input_size_list=[[1, 1, 2048]])
|
| 30 |
+
if ret != 0:
|
| 31 |
+
print('Load model failed!')
|
| 32 |
+
exit(ret)
|
| 33 |
+
|
| 34 |
+
# 构建模型
|
| 35 |
+
print("--> Building model")
|
| 36 |
+
ret = rknn.build(do_quantization=False)
|
| 37 |
+
if ret != 0:
|
| 38 |
+
print('Build model failed!')
|
| 39 |
+
exit(ret)
|
| 40 |
+
|
| 41 |
+
# 导出RKNN模型
|
| 42 |
+
print("--> Export RKNN model")
|
| 43 |
+
ret = rknn.export_rknn(RKNN_MODEL)
|
| 44 |
+
if ret != 0:
|
| 45 |
+
print('Export RKNN model failed!')
|
| 46 |
+
exit(ret)
|
| 47 |
+
|
| 48 |
+
print(f'Done! The converted RKNN model has been saved to: ' + RKNN_MODEL)
|
| 49 |
+
rknn.release()
|
| 50 |
+
|
| 51 |
+
if __name__ == '__main__':
|
| 52 |
+
main()
|
convert_vision_encoder.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# ztu_somemodelruntime_rknn2: prepare_inputs_embeds
|
| 3 |
+
|
| 4 |
+
from rknn.api import RKNN
|
| 5 |
+
import os
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
def main():
|
| 9 |
+
# 创建RKNN实例
|
| 10 |
+
rknn = RKNN(verbose=True)
|
| 11 |
+
|
| 12 |
+
# ONNX模型路径
|
| 13 |
+
ONNX_MODEL = "prepare_inputs_embeds.onnx"
|
| 14 |
+
# 输出RKNN模型路径
|
| 15 |
+
RKNN_MODEL = "vision_encoder.rknn"
|
| 16 |
+
|
| 17 |
+
# 配置参数
|
| 18 |
+
print("--> Config model")
|
| 19 |
+
ret = rknn.config(target_platform="rk3588",
|
| 20 |
+
dynamic_input=None)
|
| 21 |
+
if ret != 0:
|
| 22 |
+
print('Config model failed!')
|
| 23 |
+
exit(ret)
|
| 24 |
+
|
| 25 |
+
# 加载ONNX模型
|
| 26 |
+
print("--> Loading model")
|
| 27 |
+
ret = rknn.load_onnx(model=ONNX_MODEL,
|
| 28 |
+
inputs=['pixel_values'],
|
| 29 |
+
input_size_list=[[1, 1, 3, 384, 384]],
|
| 30 |
+
outputs=['/aligner/layers/layers.2/Add_output_0'])
|
| 31 |
+
if ret != 0:
|
| 32 |
+
print('Load model failed!')
|
| 33 |
+
exit(ret)
|
| 34 |
+
|
| 35 |
+
# 构建模型
|
| 36 |
+
print("--> Building model")
|
| 37 |
+
ret = rknn.build(do_quantization=False)
|
| 38 |
+
if ret != 0:
|
| 39 |
+
print('Build model failed!')
|
| 40 |
+
exit(ret)
|
| 41 |
+
|
| 42 |
+
# 导出RKNN模型
|
| 43 |
+
print("--> Export RKNN model")
|
| 44 |
+
ret = rknn.export_rknn(RKNN_MODEL)
|
| 45 |
+
if ret != 0:
|
| 46 |
+
print('Export RKNN model failed!')
|
| 47 |
+
exit(ret)
|
| 48 |
+
|
| 49 |
+
print(f'Done! The converted RKNN model has been saved to: ' + RKNN_MODEL)
|
| 50 |
+
rknn.release()
|
| 51 |
+
|
| 52 |
+
if __name__ == '__main__':
|
| 53 |
+
main()
|
embed_tokens.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68b65c83b08bd82cfb4ef6009755244316374f593970625761957278f12920c6
|
| 3 |
+
size 838861039
|
gen_head.rknn
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:97742dd3a76eb491cc6311b80b05ecb9d9547963223c095539993064e0605b64
|
| 3 |
+
size 75616889
|
gen_img_embeds.rknn
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:99b8e29ad96ee5cb254a5cfe3401f73df6c872b9a1f4138d4bf5209a9b5d860d
|
| 3 |
+
size 8741750
|
image_decode.rknn
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f8d30304794961107bd904db7953103da7d2535aad3534f51d0722ad87e1ec1
|
| 3 |
+
size 247330434
|
lm_head.rknn
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:af8830d6b3d889abfd2a1ba2801a98cce88eb49ab637a1fb84bd989a34071aaa
|
| 3 |
+
size 419496186
|
rkllm-convert.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from rkllm.api import RKLLM
|
| 2 |
+
|
| 3 |
+
modelpath = '.'
|
| 4 |
+
llm = RKLLM()
|
| 5 |
+
|
| 6 |
+
ret = llm.load_huggingface(model=modelpath, model_lora=None, device='cpu')
|
| 7 |
+
if ret != 0:
|
| 8 |
+
print('Load model failed!')
|
| 9 |
+
exit(ret)
|
| 10 |
+
|
| 11 |
+
qparams = None
|
| 12 |
+
ret = llm.build(do_quantization=False, optimization_level=1, quantized_dtype='w8a8',
|
| 13 |
+
quantized_algorithm='normal', target_platform='rk3588', num_npu_core=3, extra_qparams=qparams)
|
| 14 |
+
|
| 15 |
+
if ret != 0:
|
| 16 |
+
print('Build model failed!')
|
| 17 |
+
exit(ret)
|
| 18 |
+
|
| 19 |
+
# Export rkllm model
|
| 20 |
+
ret = llm.export_rkllm("./language_model.rkllm")
|
| 21 |
+
if ret != 0:
|
| 22 |
+
print('Export model failed!')
|
| 23 |
+
exit(ret)
|
run_rkllm.py
ADDED
|
@@ -0,0 +1,308 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import faulthandler
|
| 2 |
+
faulthandler.enable()
|
| 3 |
+
|
| 4 |
+
import os
|
| 5 |
+
os.environ["RKLLM_LOG_LEVEL"] = "1"
|
| 6 |
+
import numpy as np
|
| 7 |
+
import onnxruntime as real_ort
|
| 8 |
+
import ztu_somemodelruntime_rknn2 as ort
|
| 9 |
+
from tokenizers import Tokenizer
|
| 10 |
+
import cv2
|
| 11 |
+
import tqdm
|
| 12 |
+
import time
|
| 13 |
+
import ctypes
|
| 14 |
+
|
| 15 |
+
from rkllm_binding import *
|
| 16 |
+
|
| 17 |
+
model_path = "."
|
| 18 |
+
onnx_model_path = f"{model_path}"
|
| 19 |
+
tokenizer = Tokenizer.from_file(f"{model_path}/tokenizer.json")
|
| 20 |
+
# np.random.seed(0)
|
| 21 |
+
|
| 22 |
+
# image = None
|
| 23 |
+
# prompt = "A stunning princess from kabul in red, white traditional clothing, blue eyes, brown hair"
|
| 24 |
+
# mode = "t2i" # 文本生成图片, 反过来是it2t -> 图片/文本生成文本
|
| 25 |
+
|
| 26 |
+
image = "./test.jpg"
|
| 27 |
+
prompt = "仔细描述这张图片。"
|
| 28 |
+
mode = "it2t"
|
| 29 |
+
|
| 30 |
+
tempature = 0.7
|
| 31 |
+
|
| 32 |
+
# 全局变量用于存储 rkllm 推理结果
|
| 33 |
+
rkllm_result_data = {
|
| 34 |
+
'hidden_states': None,
|
| 35 |
+
'finished': False,
|
| 36 |
+
'error': False
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
def rkllm_callback(result_ptr, userdata_ptr, state_enum):
|
| 40 |
+
"""RKLLM 推理回调函数"""
|
| 41 |
+
global rkllm_result_data
|
| 42 |
+
|
| 43 |
+
try:
|
| 44 |
+
state = LLMCallState(state_enum)
|
| 45 |
+
# print(f"回调状态: {state.name}")
|
| 46 |
+
|
| 47 |
+
if state == LLMCallState.RKLLM_RUN_FINISH:
|
| 48 |
+
rkllm_result_data['finished'] = True
|
| 49 |
+
print("RKLLM 推理完成")
|
| 50 |
+
return
|
| 51 |
+
elif state == LLMCallState.RKLLM_RUN_ERROR:
|
| 52 |
+
rkllm_result_data['error'] = True
|
| 53 |
+
rkllm_result_data['error_msg'] = "RKLLM 推理出错"
|
| 54 |
+
rkllm_result_data['finished'] = True
|
| 55 |
+
print("错误: RKLLM 推理出错")
|
| 56 |
+
|
| 57 |
+
# 检查 result_ptr 是否为空
|
| 58 |
+
if not result_ptr:
|
| 59 |
+
print("警告: result_ptr 为空指针")
|
| 60 |
+
return
|
| 61 |
+
|
| 62 |
+
result = result_ptr.contents
|
| 63 |
+
# print(result.perf)
|
| 64 |
+
if state == LLMCallState.RKLLM_RUN_NORMAL:
|
| 65 |
+
# 获取 last hidden layer 结果
|
| 66 |
+
if result.last_hidden_layer.hidden_states and result.last_hidden_layer.embd_size > 0:
|
| 67 |
+
# 将 C 数组转换为 numpy 数组
|
| 68 |
+
hidden_size = result.last_hidden_layer.embd_size
|
| 69 |
+
num_tokens = result.last_hidden_layer.num_tokens
|
| 70 |
+
|
| 71 |
+
# print(f"Hidden layer info: num_tokens={num_tokens}, embd_size={hidden_size}")
|
| 72 |
+
|
| 73 |
+
# 创建 numpy 数组从 C 指针
|
| 74 |
+
hidden_array = np.ctypeslib.as_array(
|
| 75 |
+
result.last_hidden_layer.hidden_states,
|
| 76 |
+
shape=(num_tokens, hidden_size)
|
| 77 |
+
).copy() # 复制数据以避免内存问题
|
| 78 |
+
|
| 79 |
+
rkllm_result_data['hidden_states'] = hidden_array
|
| 80 |
+
# print(f"成功获取 hidden states,形状: {hidden_array.shape}")
|
| 81 |
+
rkllm_result_data['finished'] = True
|
| 82 |
+
return 1
|
| 83 |
+
else:
|
| 84 |
+
print("警告: 没有获取到有效的 hidden states")
|
| 85 |
+
|
| 86 |
+
return 1
|
| 87 |
+
except Exception as e:
|
| 88 |
+
print(f"回调函数异常: {e}")
|
| 89 |
+
rkllm_result_data['error'] = True
|
| 90 |
+
rkllm_result_data['error_msg'] = str(e)
|
| 91 |
+
rkllm_result_data['finished'] = True
|
| 92 |
+
|
| 93 |
+
# 1. 加载模型
|
| 94 |
+
|
| 95 |
+
# 视觉编码器
|
| 96 |
+
# <- pixel_values: float32[batch_size,num_images,3,384,384]
|
| 97 |
+
# -> inputs_embeds: float32[batch_size*num_images,576,2048]
|
| 98 |
+
vision_encoder = ort.InferenceSession(f"{onnx_model_path}/vision_encoder.rknn")
|
| 99 |
+
|
| 100 |
+
# 初始化 RKLLM 语言模型
|
| 101 |
+
print("初始化 RKLLM 语言模型...")
|
| 102 |
+
rkllm_runtime = RKLLMRuntime()
|
| 103 |
+
rkllm_params = rkllm_runtime.create_default_param()
|
| 104 |
+
rkllm_params.model_path = f"{model_path}/language_model.rkllm".encode('utf-8')
|
| 105 |
+
rkllm_params.max_context_len = 1024
|
| 106 |
+
rkllm_params.max_new_tokens = 5
|
| 107 |
+
# rkllm_params.temperature = tempature
|
| 108 |
+
rkllm_params.skip_special_token = 0
|
| 109 |
+
rkllm_params.extend_param.base_domain_id = 1
|
| 110 |
+
rkllm_runtime.init(rkllm_params, rkllm_callback)
|
| 111 |
+
|
| 112 |
+
# LM Head
|
| 113 |
+
# <- hidden_states: float32[batch_size,sequence_length,2048]
|
| 114 |
+
# -> logits: float32[batch_size,sequence_length,102400]
|
| 115 |
+
lm_head = ort.InferenceSession(f"{onnx_model_path}/lm_head.onnx")
|
| 116 |
+
# 图片生成Head
|
| 117 |
+
# <- hidden_states: float32[batch_size,sequence_length,2048]
|
| 118 |
+
# -> logits: float32[batch_size,sequence_length,16384]
|
| 119 |
+
gen_head = ort.InferenceSession(f"{onnx_model_path}/gen_head.onnx")
|
| 120 |
+
# 图片生成Embedding
|
| 121 |
+
# <- image_ids: int64[batch_size,sequence_length]
|
| 122 |
+
# -> inputs_embeds: float32[batch_size,sequence_length,2048]
|
| 123 |
+
gen_img_embeds = ort.InferenceSession(f"{onnx_model_path}/gen_img_embeds.onnx")
|
| 124 |
+
# 文本Embedding
|
| 125 |
+
# <- input_ids: int64[batch_size,sequence_length]
|
| 126 |
+
# -> inputs_embeds: float32[batch_size,sequence_length,2048]
|
| 127 |
+
text_embeds = real_ort.InferenceSession(f"{onnx_model_path}/embed_tokens.onnx")
|
| 128 |
+
# VQVAE 解码器 (576个token变成一个384x384的图片)
|
| 129 |
+
# <- generated_tokens: int64[batch_size,sequence_length]
|
| 130 |
+
# -> decoded_image: float32[batch_size,3,384,384]
|
| 131 |
+
image_decode = ort.InferenceSession(f"{onnx_model_path}/image_decode.onnx")
|
| 132 |
+
|
| 133 |
+
# 2. 预处理输入
|
| 134 |
+
# tokenizer会在最开始加<|begin▁of▁sentence|>, 这里不要加!
|
| 135 |
+
if mode == "t2i":
|
| 136 |
+
input_str = f"""<|User|>: {prompt}
|
| 137 |
+
|
| 138 |
+
<|Assistant|>:<begin_of_image>"""
|
| 139 |
+
else:
|
| 140 |
+
input_str = f"""You are a helpful language and vision assistant. You are able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language.
|
| 141 |
+
|
| 142 |
+
<|User|>: <image_placeholder>
|
| 143 |
+
{prompt}
|
| 144 |
+
|
| 145 |
+
<|Assistant|>:"""
|
| 146 |
+
|
| 147 |
+
# 3. 生成Embedding
|
| 148 |
+
|
| 149 |
+
# 把<image_placeholder>替换为576个<image_placeholder>
|
| 150 |
+
input_str = input_str.replace("<image_placeholder>", "<image_placeholder>" * 576)
|
| 151 |
+
input = tokenizer.encode(input_str)
|
| 152 |
+
input_ids = np.array([input.ids], dtype=np.int64)
|
| 153 |
+
input_len = len(input.ids)
|
| 154 |
+
attention_mask = np.array([input.attention_mask], dtype=np.int64)
|
| 155 |
+
images_seq_mask = np.array([[1 if id == 100581 else 0 for id in input.ids]], dtype=np.bool_) # 为什么<image_placeholder>有两个id?
|
| 156 |
+
position_ids = np.expand_dims(np.arange(input_len), axis=0)
|
| 157 |
+
#图片预处理
|
| 158 |
+
if image:
|
| 159 |
+
img = cv2.imread(image)
|
| 160 |
+
if img is None:
|
| 161 |
+
raise ValueError(f"无法读取图片: {image}")
|
| 162 |
+
# 将BGR转换为RGB
|
| 163 |
+
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
| 164 |
+
# 调整尺寸为目标大小:384x384
|
| 165 |
+
target_size = 384
|
| 166 |
+
img = cv2.resize(img, (target_size, target_size), interpolation=cv2.INTER_LINEAR)
|
| 167 |
+
# 转换数据类型为float32,并使用rescale_factor归一化像素值到[0,1]
|
| 168 |
+
img = img.astype(np.float32) * 0.00392156862745098 # 0.00392156 = 1/255
|
| 169 |
+
# 根据配置文件归一化: (img - image_mean) / image_std,其中 image_mean = [0.5, 0.5, 0.5], image_std = [0.5, 0.5, 0.5]
|
| 170 |
+
img = (img - np.array([0.5, 0.5, 0.5], dtype=np.float32)) / np.array([0.5, 0.5, 0.5], dtype=np.float32)
|
| 171 |
+
# 如果图像尺寸不是正方形,还可以用background_color填充,这里由于直接resize为正方形,故忽略这部分
|
| 172 |
+
# 转换图像维度为 [batch_size, num_images, channels, height, width]
|
| 173 |
+
# 先将HWC格式转为CHW
|
| 174 |
+
img = img.transpose(2, 0, 1) # 得到 [3, 384, 384]
|
| 175 |
+
pixel_values = np.expand_dims(np.expand_dims(img, axis=0), axis=1) # [1, 1, 3, 384, 384]
|
| 176 |
+
images_emb_mask = np.ones((1, 1, 576), dtype=np.bool_)
|
| 177 |
+
else:
|
| 178 |
+
pixel_values = np.zeros((0, 0, 3, 384, 384), dtype=np.float32)
|
| 179 |
+
images_emb_mask = np.zeros((1, 0, 576), dtype=np.bool_)
|
| 180 |
+
|
| 181 |
+
# 手动处理输入embeddings
|
| 182 |
+
# 1. 先获取文本embeddings
|
| 183 |
+
text_inputs_embeds = text_embeds.run(None, {"input_ids": input_ids})[0] # [1, input_len, 2048]
|
| 184 |
+
|
| 185 |
+
# 2. 如果有图片,获取视觉embeddings并插入到文本中
|
| 186 |
+
if image:
|
| 187 |
+
# 运行视觉编码器
|
| 188 |
+
vision_embeds = vision_encoder.run(None, {"pixel_values": pixel_values})[0] # [1, 576, 2048]
|
| 189 |
+
|
| 190 |
+
# 找到所有 <image_placeholder> token 的位置(images_seq_mask中为True的位置)
|
| 191 |
+
image_token_positions = np.where(images_seq_mask[0])[0] # 获取所有为True的索引
|
| 192 |
+
|
| 193 |
+
# 将视觉embeddings插入到对应位置
|
| 194 |
+
# 遍历每个图片token位置,替换为对应的视觉embedding
|
| 195 |
+
for idx, pos in enumerate(image_token_positions):
|
| 196 |
+
if idx < vision_embeds.shape[1]: # 确保不超出vision_embeds的范围
|
| 197 |
+
text_inputs_embeds[0, pos, :] = vision_embeds[0, idx, :]
|
| 198 |
+
|
| 199 |
+
inputs_embeds = text_inputs_embeds
|
| 200 |
+
|
| 201 |
+
# 4. 语言模型推理(使用 RKLLM)
|
| 202 |
+
# 用于保存生成的图片 token(这里仅保留条件分支生成的 token)
|
| 203 |
+
generated_tokens = []
|
| 204 |
+
|
| 205 |
+
# 初始化可复用的对象
|
| 206 |
+
rkllm_input = RKLLMInput()
|
| 207 |
+
rkllm_input.input_type = RKLLMInputType.RKLLM_INPUT_EMBED
|
| 208 |
+
embed_input = RKLLMEmbedInput()
|
| 209 |
+
infer_params = RKLLMInferParam()
|
| 210 |
+
infer_params.mode = RKLLMInferMode.RKLLM_INFER_GET_LAST_HIDDEN_LAYER
|
| 211 |
+
infer_params.keep_history = 1
|
| 212 |
+
|
| 213 |
+
def run_rkllm_inference(inputs_embeds):
|
| 214 |
+
"""使用 RKLLM 进行推理,输入 embedding,输出 hidden states"""
|
| 215 |
+
global rkllm_result_data
|
| 216 |
+
|
| 217 |
+
# 重置结果
|
| 218 |
+
rkllm_result_data = {
|
| 219 |
+
'hidden_states': None,
|
| 220 |
+
'finished': False,
|
| 221 |
+
'error': False
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
# 更新embedding输入数据
|
| 225 |
+
embed_flat = inputs_embeds.flatten().astype(np.float32)
|
| 226 |
+
embed_c_array = (ctypes.c_float * len(embed_flat))(*embed_flat)
|
| 227 |
+
embed_input.embed = embed_c_array
|
| 228 |
+
embed_input.n_tokens = inputs_embeds.shape[1] # sequence length
|
| 229 |
+
|
| 230 |
+
rkllm_input._union_data.embed_input = embed_input
|
| 231 |
+
|
| 232 |
+
# 运行推理
|
| 233 |
+
rkllm_runtime.run(rkllm_input, infer_params)
|
| 234 |
+
|
| 235 |
+
# 等待结果
|
| 236 |
+
while not rkllm_result_data['finished']:
|
| 237 |
+
time.sleep(0.001) # 短暂等待
|
| 238 |
+
|
| 239 |
+
if rkllm_result_data['error']:
|
| 240 |
+
raise RuntimeError("RKLLM 推理出错")
|
| 241 |
+
|
| 242 |
+
return rkllm_result_data['hidden_states']
|
| 243 |
+
|
| 244 |
+
# 循环生成576个图片 token
|
| 245 |
+
with tqdm.tqdm(range(576)) as pbar:
|
| 246 |
+
for i in pbar:
|
| 247 |
+
# 使用 RKLLM 进行推理
|
| 248 |
+
hidden_states = run_rkllm_inference(inputs_embeds)
|
| 249 |
+
|
| 250 |
+
if hidden_states is None:
|
| 251 |
+
raise RuntimeError("RKLLM 未返回有效的 hidden states")
|
| 252 |
+
|
| 253 |
+
# 重新整形为期望的格式 [batch_size, sequence_length, hidden_size]
|
| 254 |
+
if len(hidden_states.shape) == 2:
|
| 255 |
+
# 如果是 [num_tokens, hidden_size],添加 batch 维度
|
| 256 |
+
hidden_states = hidden_states.reshape(1, hidden_states.shape[0], hidden_states.shape[1])
|
| 257 |
+
|
| 258 |
+
# 取最后一个 token 的 hidden state
|
| 259 |
+
hs = hidden_states[:, -1:, :] # shape: [1, 1, 2048]
|
| 260 |
+
|
| 261 |
+
# 使用 Head 得到当前步输出的 logits
|
| 262 |
+
logits = (gen_head if mode == "t2i" else lm_head).run(None, {"hidden_states": hs})[0]
|
| 263 |
+
logits = logits[:, -1, :] # shape: [1, vocab_size]
|
| 264 |
+
|
| 265 |
+
# 温度采样,调整 logits 分布并随机采样 (不能用贪婪采样)
|
| 266 |
+
logits = logits / tempature
|
| 267 |
+
# 计算 softmax
|
| 268 |
+
exp_logits = np.exp(logits - np.max(logits, axis=-1, keepdims=True))[0]
|
| 269 |
+
probs = exp_logits / np.sum(exp_logits, axis=-1, keepdims=True)
|
| 270 |
+
# 多项式采样
|
| 271 |
+
probs = probs.astype(np.float64)
|
| 272 |
+
probs /= probs.sum()
|
| 273 |
+
next_token = int(np.random.multinomial(1, probs).argmax())
|
| 274 |
+
pbar.set_postfix(next_token=tokenizer.decode([next_token]))
|
| 275 |
+
generated_tokens.append(next_token)
|
| 276 |
+
if next_token == 100001: # eos
|
| 277 |
+
break
|
| 278 |
+
|
| 279 |
+
# 将生成的 token 转换为 embedding 并与之前的 embedding 拼接
|
| 280 |
+
if mode == "t2i":
|
| 281 |
+
new_embed = gen_img_embeds.run(None, {"image_ids": np.array([[next_token]], dtype=np.int64)})[0]
|
| 282 |
+
else:
|
| 283 |
+
new_embed = text_embeds.run(None, {"input_ids": np.array([[next_token]], dtype=np.int64)})[0]
|
| 284 |
+
|
| 285 |
+
# 将新生成的 embedding 拼接到 inputs_embeds 后面
|
| 286 |
+
# inputs_embeds = np.concatenate([inputs_embeds, new_embed], axis=1)
|
| 287 |
+
inputs_embeds = new_embed
|
| 288 |
+
|
| 289 |
+
rkllm_runtime.clear_kv_cache(False)
|
| 290 |
+
|
| 291 |
+
# 5. 图片或者文本解码
|
| 292 |
+
if mode == "t2i":
|
| 293 |
+
# 将生成的576个图片token拼接为数组,输入到VQVAE解码器进行图像解码
|
| 294 |
+
generated_tokens_array = np.array([generated_tokens], dtype=np.int64) # shape: [1, 576]
|
| 295 |
+
decoded_image = image_decode.run(None, {"generated_tokens": generated_tokens_array})[0] # 输出形状: [1, 3, 384, 384]
|
| 296 |
+
decoded_image = np.clip((decoded_image + 1) / 2 * 255, 0, 255)
|
| 297 |
+
# 后处理图像:将图像从CHW转换为HWC,并利用cv2保存为png格式
|
| 298 |
+
decoded_image = np.squeeze(decoded_image, axis=0) # [3, 384, 384]
|
| 299 |
+
decoded_image = np.transpose(decoded_image, (1, 2, 0)) # [384, 384, 3]
|
| 300 |
+
cv2.imwrite("generated.png", cv2.cvtColor(decoded_image, cv2.COLOR_RGB2BGR))
|
| 301 |
+
print("(generated.png)")
|
| 302 |
+
else:
|
| 303 |
+
decoded_text = tokenizer.decode(generated_tokens)
|
| 304 |
+
print(f"{decoded_text}")
|
| 305 |
+
|
| 306 |
+
# 清理资源
|
| 307 |
+
print("清理 RKLLM 资源...")
|
| 308 |
+
rkllm_runtime.destroy()
|
vision_encoder.rknn
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2fad8869233fe5c212fd035a51dc3dc21b32b08822f8cf28a23426288eb278c9
|
| 3 |
+
size 642104989
|