yongqiang commited on
Commit ·
1757cc5
1
Parent(s): 04f3e31
update model&script
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- README.md +2 -2
- infer_axmodel.py +56 -141
- smolvlm2_axmodel/llama_p1024_l13_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l14_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l15_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l16_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l17_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l18_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l19_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l1_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l20_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l21_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l22_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l23_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l24_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l25_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l26_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l27_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l28_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l29_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l2_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l30_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l31_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l3_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l4_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l5_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l6_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l7_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l8_together.axmodel +0 -3
- smolvlm2_axmodel/llama_p1024_l9_together.axmodel +0 -3
- smolvlm2_axmodel/{llama_p1024_l0_together.axmodel → llama_p128_l0_together.axmodel} +2 -2
- smolvlm2_axmodel/{llama_p1024_l10_together.axmodel → llama_p128_l10_together.axmodel} +2 -2
- smolvlm2_axmodel/{llama_p1024_l11_together.axmodel → llama_p128_l11_together.axmodel} +2 -2
- smolvlm2_axmodel/{llama_p1024_l12_together.axmodel → llama_p128_l12_together.axmodel} +2 -2
- smolvlm2_axmodel/llama_p128_l13_together.axmodel +3 -0
- smolvlm2_axmodel/llama_p128_l14_together.axmodel +3 -0
- smolvlm2_axmodel/llama_p128_l15_together.axmodel +3 -0
- smolvlm2_axmodel/llama_p128_l16_together.axmodel +3 -0
- smolvlm2_axmodel/llama_p128_l17_together.axmodel +3 -0
- smolvlm2_axmodel/llama_p128_l18_together.axmodel +3 -0
- smolvlm2_axmodel/llama_p128_l19_together.axmodel +3 -0
- smolvlm2_axmodel/llama_p128_l1_together.axmodel +3 -0
- smolvlm2_axmodel/llama_p128_l20_together.axmodel +3 -0
- smolvlm2_axmodel/llama_p128_l21_together.axmodel +3 -0
- smolvlm2_axmodel/llama_p128_l22_together.axmodel +3 -0
- smolvlm2_axmodel/llama_p128_l23_together.axmodel +3 -0
- smolvlm2_axmodel/llama_p128_l24_together.axmodel +3 -0
- smolvlm2_axmodel/llama_p128_l25_together.axmodel +3 -0
- smolvlm2_axmodel/llama_p128_l26_together.axmodel +3 -0
- smolvlm2_axmodel/llama_p128_l27_together.axmodel +3 -0
README.md
CHANGED
|
@@ -72,5 +72,5 @@ ai@ai-bj ~/yongqiang/SmolVLM2-500M-Video-Instruct $ python3 infer_axmodel.py
|
|
| 72 |
|
| 73 |
input prompt: Can you describe this image?
|
| 74 |
|
| 75 |
-
answer >> The image
|
| 76 |
-
```
|
|
|
|
| 72 |
|
| 73 |
input prompt: Can you describe this image?
|
| 74 |
|
| 75 |
+
answer >> The image depicts a close-up view of a pink flower with a bee on it. The bee, which appears to be a bumblebee, is perched on the flower's center, which is surrounded by a cluster of other flowers. The bee is in the process of collecting nectar from the flower, which is a common behavior for bees. The flower itself has a yellow center with a cluster of yellow stamens surrounding it. The petals of the flower are a vibrant shade of pink, and the bee is positioned very close to^@ the camera, making it the focal point of the image. The background of the image is slightly blurred, but it appears to be a garden or a field with other flowers and plants, contributing to the overall natural setting of the image.
|
| 76 |
+
```
|
infer_axmodel.py
CHANGED
|
@@ -9,16 +9,15 @@ from transformers import AutoConfig
|
|
| 9 |
from typing import List, Tuple
|
| 10 |
from axengine import InferenceSession
|
| 11 |
from ml_dtypes import bfloat16
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
# connector = torch.load("SmolVLMConnector.pkl", map_location=device, weights_only=False)
|
| 18 |
-
encoder = ort.InferenceSession(f'./vit_mdoel/vision_model.onnx', providers=["CPUExecutionProvider"])
|
| 19 |
|
| 20 |
|
| 21 |
def run_vision_model(
|
|
|
|
| 22 |
pixel_values,
|
| 23 |
patch_attention_mask=None,
|
| 24 |
):
|
|
@@ -45,13 +44,15 @@ def run_vision_model(
|
|
| 45 |
elif not self._use_flash_attention_2:
|
| 46 |
patch_attention_mask = _prepare_4d_attention_mask(patch_attention_mask, hidden_states.dtype)
|
| 47 |
|
|
|
|
|
|
|
| 48 |
encoder_outputs = encoder.run(None, {"input": hidden_states.detach().cpu().to(dtype=torch.float32).numpy()})[0]
|
| 49 |
encoder_outputs = torch.from_numpy(encoder_outputs).to(device, dtype=hidden_states.dtype)
|
| 50 |
|
| 51 |
return encoder_outputs
|
| 52 |
|
| 53 |
|
| 54 |
-
def get_image_features(pixel_values: torch.FloatTensor, pixel_attention_mask: torch.LongTensor = None):
|
| 55 |
"""
|
| 56 |
Encodes images into continuous embeddings that can be forwarded to the language model.
|
| 57 |
|
|
@@ -90,7 +91,7 @@ def get_image_features(pixel_values: torch.FloatTensor, pixel_attention_mask: to
|
|
| 90 |
patch_attention_mask = (patches_subgrid.sum(dim=(-1, -2)) > 0).bool()
|
| 91 |
|
| 92 |
# Get sequence from the vision encoder
|
| 93 |
-
image_hidden_states = run_vision_model(pixel_values, patch_attention_mask)
|
| 94 |
|
| 95 |
# Modality projection & resampling
|
| 96 |
# image_hidden_states = connector(image_hidden_states) # 已经 fuse 到了 onnx 中
|
|
@@ -132,51 +133,59 @@ def inputs_merger(
|
|
| 132 |
return merged_embeds
|
| 133 |
|
| 134 |
|
| 135 |
-
def post_process(data, topk=1, topp=0.9, temperature=0.6):
|
| 136 |
-
def top_p(l: np.ndarray, p: float) -> np.ndarray:
|
| 137 |
-
index = np.argsort(l)
|
| 138 |
-
res = l.copy()
|
| 139 |
-
sum_p = 0
|
| 140 |
-
for i in index[::-1]:
|
| 141 |
-
if sum_p >= p:
|
| 142 |
-
res[i] = 0
|
| 143 |
-
sum_p += res[i]
|
| 144 |
-
return res / sum_p
|
| 145 |
-
|
| 146 |
-
def softmax(l: np.ndarray) -> np.ndarray:
|
| 147 |
-
l_max = l - l.max()
|
| 148 |
-
l_exp = np.exp(l_max)
|
| 149 |
-
res = l_exp / np.sum(l_exp)
|
| 150 |
-
return res.astype(np.float64)
|
| 151 |
-
|
| 152 |
-
r = data.astype(np.float32)
|
| 153 |
-
r = r.flatten()
|
| 154 |
-
candidate_index = np.argpartition(r, -topk)[-topk:]
|
| 155 |
-
candidate_value = r[candidate_index]
|
| 156 |
-
candidate_value /= temperature
|
| 157 |
-
candidate_soft = softmax(candidate_value)
|
| 158 |
-
candidate_soft = top_p(candidate_soft, topp)
|
| 159 |
-
candidate_soft = candidate_soft.astype(np.float64) / candidate_soft.sum()
|
| 160 |
-
pos = np.random.multinomial(1, candidate_soft).argmax()
|
| 161 |
-
next_token = candidate_index[pos]
|
| 162 |
-
return next_token, candidate_index, candidate_soft
|
| 163 |
-
|
| 164 |
-
|
| 165 |
if __name__ == "__main__":
|
| 166 |
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
processor = AutoProcessor.from_pretrained(hf_model_path)
|
| 172 |
config = AutoConfig.from_pretrained(hf_model_path, trust_remote_code=True)
|
| 173 |
tokenizer = processor.tokenizer
|
| 174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
messages = [
|
| 176 |
{
|
| 177 |
"role": "user",
|
| 178 |
"content": [
|
| 179 |
-
{"type": "image", "
|
|
|
|
| 180 |
{"type": "text", "text": prompt},
|
| 181 |
]
|
| 182 |
},
|
|
@@ -201,7 +210,7 @@ if __name__ == "__main__":
|
|
| 201 |
"""
|
| 202 |
miniforge-pypy3/envs/lerobot/lib/python3.10/site-packages/transformers/models/smolvlm/modeling_smolvlm.py(681)get_image_features()
|
| 203 |
"""
|
| 204 |
-
image_hidden_states = get_image_features(pixel_values, pixel_attention_mask)
|
| 205 |
|
| 206 |
inputs_embeds = inputs_merger(
|
| 207 |
input_ids=input_ids,
|
|
@@ -213,104 +222,10 @@ if __name__ == "__main__":
|
|
| 213 |
prefill_data = prefill_data.astype(bfloat16)
|
| 214 |
token_ids = input_ids[0].cpu().numpy().tolist()
|
| 215 |
token_len = len(token_ids)
|
| 216 |
-
|
| 217 |
-
lastN = 2048
|
| 218 |
cfg = config.text_config
|
| 219 |
|
| 220 |
-
|
| 221 |
-
k_caches = [
|
| 222 |
-
np.zeros((1, lastN, kv_dim), dtype=bfloat16)
|
| 223 |
-
for _ in range(cfg.num_hidden_layers)
|
| 224 |
-
]
|
| 225 |
-
v_caches = [
|
| 226 |
-
np.zeros((1, lastN, kv_dim), dtype=bfloat16)
|
| 227 |
-
for _ in range(cfg.num_hidden_layers)
|
| 228 |
-
]
|
| 229 |
-
|
| 230 |
-
prefill_decoder_sessins = []
|
| 231 |
-
for i in tqdm(range(cfg.num_hidden_layers), desc="Init InferenceSession"):
|
| 232 |
-
session = InferenceSession(
|
| 233 |
-
f"{axmodel_path}/llama_p1024_l{i}_together.axmodel"
|
| 234 |
-
)
|
| 235 |
-
prefill_decoder_sessins.append(session)
|
| 236 |
-
post_process_session = InferenceSession(
|
| 237 |
-
f"{axmodel_path}/llama_post.axmodel"
|
| 238 |
-
)
|
| 239 |
-
print("model load done!")
|
| 240 |
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
"""
|
| 244 |
-
prefill_len = 1024
|
| 245 |
-
|
| 246 |
-
if prefill_len > 0:
|
| 247 |
-
indices = np.array(list(range(prefill_len)), np.uint32).reshape(
|
| 248 |
-
(1, prefill_len)
|
| 249 |
-
)
|
| 250 |
-
indices[:, token_len:] = 0
|
| 251 |
-
mask = np.zeros((1, prefill_len, prefill_len)) - 65536
|
| 252 |
-
data = np.zeros((1, prefill_len, cfg.hidden_size)).astype(bfloat16)
|
| 253 |
-
data[:, 0:token_len] = prefill_data
|
| 254 |
-
for i, t in enumerate(token_ids):
|
| 255 |
-
mask[:, i, : i + 1] = 0
|
| 256 |
-
mask = mask.astype(bfloat16)
|
| 257 |
-
for i in range(cfg.num_hidden_layers):
|
| 258 |
-
input_feed = {
|
| 259 |
-
"K_cache": np.zeros((1, 1, cfg.hidden_size), dtype=bfloat16),
|
| 260 |
-
"V_cache": np.zeros((1, 1, cfg.hidden_size), dtype=bfloat16),
|
| 261 |
-
"indices": indices,
|
| 262 |
-
"input": data,
|
| 263 |
-
"mask": mask,
|
| 264 |
-
}
|
| 265 |
-
outputs = prefill_decoder_sessins[i].run(None, input_feed, shape_group=1)
|
| 266 |
-
k_caches[i][:, :token_len, :] = outputs[0][:, :token_len, :]
|
| 267 |
-
v_caches[i][:, :token_len, :] = outputs[1][:, :token_len, :]
|
| 268 |
-
data[:, :token_len] = outputs[2][:, :token_len, :]
|
| 269 |
-
|
| 270 |
-
post_out = post_process_session.run(None, {"input": data[:, token_len - 1, :][None, ...]})[0]
|
| 271 |
-
next_token, posssible_tokens, possible_soft = post_process(post_out, topk=1)
|
| 272 |
-
posibles = [tokenizer.decode([t]) for t in posssible_tokens]
|
| 273 |
-
posible_soft = [str((t, s)) for t, s in zip(posibles, possible_soft)]
|
| 274 |
-
token_ids.append(next_token)
|
| 275 |
-
# print("prefill done!")
|
| 276 |
-
print(f"input prompt: {prompt}\n")
|
| 277 |
-
print("answer >>", tokenizer.decode(token_ids[token_len], skip_special_tokens=True), end='', flush=True)
|
| 278 |
-
|
| 279 |
-
"""
|
| 280 |
-
decode
|
| 281 |
-
"""
|
| 282 |
-
mask = np.zeros((1, 1, lastN + 1), dtype=np.float32).astype(bfloat16)
|
| 283 |
-
mask[:, :, :lastN] -= 65536
|
| 284 |
-
mask[:, :, :token_len] = 0
|
| 285 |
-
for start_indice in range(lastN + 1):
|
| 286 |
-
if prefill_len > 0 and start_indice < token_len:
|
| 287 |
-
continue
|
| 288 |
-
next_token = token_ids[start_indice]
|
| 289 |
-
indices = np.array([start_indice], np.uint32).reshape((1, 1))
|
| 290 |
-
data = embeds[next_token, :].reshape((1, 1, cfg.hidden_size)).astype(bfloat16)
|
| 291 |
-
|
| 292 |
-
for i in range(cfg.num_hidden_layers):
|
| 293 |
-
input_feed = {
|
| 294 |
-
"K_cache": k_caches[i],
|
| 295 |
-
"V_cache": v_caches[i],
|
| 296 |
-
"indices": indices,
|
| 297 |
-
"input": data,
|
| 298 |
-
"mask": mask,
|
| 299 |
-
}
|
| 300 |
-
outputs = prefill_decoder_sessins[i].run(None, input_feed, shape_group=0)
|
| 301 |
-
k_caches[i][:, start_indice, :] = outputs[0][:, :, :]
|
| 302 |
-
v_caches[i][:, start_indice, :] = outputs[1][:, :, :]
|
| 303 |
-
data = outputs[2]
|
| 304 |
-
|
| 305 |
-
mask[..., start_indice] = 0
|
| 306 |
-
if start_indice < token_len - 1:
|
| 307 |
-
pass
|
| 308 |
-
else:
|
| 309 |
-
post_out = post_process_session.run(None, {"input": data})[0]
|
| 310 |
-
next_token, posssible_tokens, possible_soft = post_process(post_out)
|
| 311 |
-
token_ids.append(next_token)
|
| 312 |
-
print(tokenizer.decode(next_token, skip_special_tokens=True), end='', flush=True)
|
| 313 |
-
|
| 314 |
-
if next_token == tokenizer.eos_token_id:
|
| 315 |
-
break
|
| 316 |
print("\n")
|
|
|
|
| 9 |
from typing import List, Tuple
|
| 10 |
from axengine import InferenceSession
|
| 11 |
from ml_dtypes import bfloat16
|
| 12 |
+
from utils.infer_func import InferManager
|
| 13 |
+
import argparse
|
| 14 |
+
from PIL import Image
|
| 15 |
+
from torchvision.transforms import Resize, ToTensor, Normalize, Compose
|
| 16 |
+
from transformers.image_utils import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD
|
|
|
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
def run_vision_model(
|
| 20 |
+
encoder,
|
| 21 |
pixel_values,
|
| 22 |
patch_attention_mask=None,
|
| 23 |
):
|
|
|
|
| 44 |
elif not self._use_flash_attention_2:
|
| 45 |
patch_attention_mask = _prepare_4d_attention_mask(patch_attention_mask, hidden_states.dtype)
|
| 46 |
|
| 47 |
+
# 保存 vit-encoder 的量化校准集
|
| 48 |
+
# np.save("../model_convert/vit_encoder_calibrations/hidden_states_5.npy", hidden_states.detach().cpu().to(dtype=torch.float32).numpy())
|
| 49 |
encoder_outputs = encoder.run(None, {"input": hidden_states.detach().cpu().to(dtype=torch.float32).numpy()})[0]
|
| 50 |
encoder_outputs = torch.from_numpy(encoder_outputs).to(device, dtype=hidden_states.dtype)
|
| 51 |
|
| 52 |
return encoder_outputs
|
| 53 |
|
| 54 |
|
| 55 |
+
def get_image_features(encoder, pixel_values: torch.FloatTensor, pixel_attention_mask: torch.LongTensor = None):
|
| 56 |
"""
|
| 57 |
Encodes images into continuous embeddings that can be forwarded to the language model.
|
| 58 |
|
|
|
|
| 91 |
patch_attention_mask = (patches_subgrid.sum(dim=(-1, -2)) > 0).bool()
|
| 92 |
|
| 93 |
# Get sequence from the vision encoder
|
| 94 |
+
image_hidden_states = run_vision_model(encoder, pixel_values, patch_attention_mask)
|
| 95 |
|
| 96 |
# Modality projection & resampling
|
| 97 |
# image_hidden_states = connector(image_hidden_states) # 已经 fuse 到了 onnx 中
|
|
|
|
| 133 |
return merged_embeds
|
| 134 |
|
| 135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
if __name__ == "__main__":
|
| 137 |
|
| 138 |
+
"""
|
| 139 |
+
python3 infer_axmodel.py -i ../assets/panda.jpg --vit_model ./vit-models/vision_model.axmodel
|
| 140 |
+
"""
|
| 141 |
+
|
| 142 |
+
prompt = None
|
| 143 |
+
parser = argparse.ArgumentParser(description="Model configuration parameters")
|
| 144 |
+
parser.add_argument("--hf_model", type=str, default="./SmolVLM2-500M-Video-Instruct/",
|
| 145 |
+
help="Path to HuggingFace model")
|
| 146 |
+
parser.add_argument("--axmodel_path", type=str, default="./SmolVLM2-500M-Video-Instruct_axmodel/",
|
| 147 |
+
help="Path to save compiled axmodel of llama model")
|
| 148 |
+
parser.add_argument("--vit_model", type=str, default='./vit-models/vision_model.axmodel',
|
| 149 |
+
help="Path to save compiled axmodel of llama model")
|
| 150 |
+
parser.add_argument("-i", "--images", type=str, default="../assets/bee.jpg",
|
| 151 |
+
help="Path to the test image.")
|
| 152 |
+
parser.add_argument("-q", "--question", type=str, default="Can you describe this image?",
|
| 153 |
+
help="Your question that you want to ask the model.")
|
| 154 |
+
args = parser.parse_args()
|
| 155 |
+
|
| 156 |
+
hf_model_path = args.hf_model
|
| 157 |
+
axmodel_path = args.axmodel_path
|
| 158 |
+
images = args.images
|
| 159 |
+
prompt = args.question
|
| 160 |
+
|
| 161 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 162 |
+
embeddings = torch.load("./embeds/SmolVLMVisionEmbeddings.pkl", map_location=device, weights_only=False)
|
| 163 |
+
embeds = np.load(os.path.join(axmodel_path, "model.embed_tokens.weight.npy"))
|
| 164 |
+
|
| 165 |
+
encoder = InferenceSession(args.vit_model)
|
| 166 |
|
| 167 |
processor = AutoProcessor.from_pretrained(hf_model_path)
|
| 168 |
config = AutoConfig.from_pretrained(hf_model_path, trust_remote_code=True)
|
| 169 |
tokenizer = processor.tokenizer
|
| 170 |
|
| 171 |
+
TARGET_IMAGE_SIZE = (512, 512)
|
| 172 |
+
image = Image.open(images).convert('RGB')
|
| 173 |
+
|
| 174 |
+
# 固定输入图像 size: 512x512
|
| 175 |
+
preprocess = Compose([
|
| 176 |
+
Resize(TARGET_IMAGE_SIZE),
|
| 177 |
+
# ToTensor(),
|
| 178 |
+
# Normalize(mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD),
|
| 179 |
+
])
|
| 180 |
+
|
| 181 |
+
preprocessed_image = preprocess(image)
|
| 182 |
+
|
| 183 |
messages = [
|
| 184 |
{
|
| 185 |
"role": "user",
|
| 186 |
"content": [
|
| 187 |
+
{"type": "image", "image": preprocessed_image}, # 这里可以直接使用 PIL Image 对象
|
| 188 |
+
# {"type": "image", "url": images}, # 也可以使用 url
|
| 189 |
{"type": "text", "text": prompt},
|
| 190 |
]
|
| 191 |
},
|
|
|
|
| 210 |
"""
|
| 211 |
miniforge-pypy3/envs/lerobot/lib/python3.10/site-packages/transformers/models/smolvlm/modeling_smolvlm.py(681)get_image_features()
|
| 212 |
"""
|
| 213 |
+
image_hidden_states = get_image_features(encoder, pixel_values, pixel_attention_mask)
|
| 214 |
|
| 215 |
inputs_embeds = inputs_merger(
|
| 216 |
input_ids=input_ids,
|
|
|
|
| 222 |
prefill_data = prefill_data.astype(bfloat16)
|
| 223 |
token_ids = input_ids[0].cpu().numpy().tolist()
|
| 224 |
token_len = len(token_ids)
|
|
|
|
|
|
|
| 225 |
cfg = config.text_config
|
| 226 |
|
| 227 |
+
imer = InferManager(cfg, axmodel_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
|
| 229 |
+
token_ids = imer.prefill(tokenizer, token_ids, prefill_data[0], slice_len=128)
|
| 230 |
+
imer.decode(tokenizer, token_ids, embeds, slice_len=128)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
print("\n")
|
smolvlm2_axmodel/llama_p1024_l13_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:9420f15bb5b591f258212242bc5fa5566ba45f4d697d0599999114961152d1fd
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l14_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:397511107011f700388029e604c2f5ec6d092f9cb6e09ab890a198932173193c
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l15_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:689d9286ad7cf81345352f85bfbb8387934fe7ccb76d3f56563ded5f1d7cdb7b
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l16_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:b91fecc232c92c9faa5fca4ca1bff0802abc8351457f9b34ef55327ccdcbc85a
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l17_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:9404c81f4a02fe332ae1f4ed5361d2f68eea66a9550233cc4c1d4455afc95797
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l18_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:ffa8d959498bd479d2bbb2c42e883a21bb173fbcb73f5d1bbdebe6c8365e8e21
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l19_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:66265cbf7cd8571f949c23ca6a5918f8c95fb3413e4349cb9c9f3ac18231ca21
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l1_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:9addcae5bad93adaf9f8df49d4cbfa82024be2d2e0b2e815537121a7417ecb88
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l20_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:69430a836a9eb0d46242419a999e761d61a0c4cc4d17eafbe373641551ac0a8b
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l21_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:6a19009fd1a1d28c9414cb9421af4c66473088a0b3caea9157bde6aac071e1ce
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l22_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:ec30ac9fd2a52f281b76a037d0aa146b8144277aed3408a6c281e5a7df8ba62a
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l23_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:1093d36fa84d6248b1a4728d8ae2aadb1143894eaf3d960e12fd3753d3ab4da2
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l24_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:ff63d4efb6dd75433205ce87e4d69d7850dad86555b2919864f04c5df3a8a844
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l25_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:83d8b772f3aef6356234912a371baebcb6c0897faf3d524091b7ea2fc56f77bc
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l26_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:033f9deb6fe2288347d1af507d7a31deb0633614dfb0efe9a3a9c962afbe44eb
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l27_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:c0c8c035eb371dd31d53844534c4d321efc933e1097ad3e9d87afd52dba74214
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l28_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:8d33cae03279cab06a856cfacc3e84414c615082a4a358bd09c4a5996c17c575
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l29_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:84583f5ef60b629b34d47c7deeb3200c096d6d6bf3de3f6bec4da6ae005b5a1e
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l2_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:4514475633a7317118fe4486200bbed73929bd4210c6da4041591797ad93fb3a
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l30_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:39e1612aac9b1604146b61b4fc37eaada2299f62078260689bf03812c256c75b
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l31_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:5f2f54bcb7d01ea69a3177b72d49e3bdab2d0e0403e86085903389cc6839b5fd
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l3_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:a991d67e4c1dc4bf58689ce4a58362f6bcc73a87257bcb2982774a0b056ca720
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l4_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:9a43e6886989c31dfffeae70177fc9464322bded5bb69515e31aaade31b431b5
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l5_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:ed59bef655c1eae8eb7af4566ef21fd874cfac72b67bbfd1a7279e1a1cffd2c8
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l6_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:848640700c17925475ef9f9edeaa0fccf235e90a5ad159430682ac389910d86b
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l7_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:46e4bce8f94d80d12e3b1a5ceae7ba62cbaa06f0ddf11f13999b1936a98bc0a1
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l8_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:b3ba57d8f2cd4d932445600d161a04b0a1160f452425c5abd08f94bece56f23f
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/llama_p1024_l9_together.axmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:0464cccfdfb0566069bad977d98f70b9e15e8e0b642a6e01ca2b16b5f7eb170a
|
| 3 |
-
size 12002005
|
|
|
|
|
|
|
|
|
|
|
|
smolvlm2_axmodel/{llama_p1024_l0_together.axmodel → llama_p128_l0_together.axmodel}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe798dad363285aa06db28f00cabf919db772d17d7bb842a48f5b76c4bb31f17
|
| 3 |
+
size 14502053
|
smolvlm2_axmodel/{llama_p1024_l10_together.axmodel → llama_p128_l10_together.axmodel}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:730853a20a5ff783ccc8b97568ebd7bb4320922bd2e28383005ebca8389d40df
|
| 3 |
+
size 14502053
|
smolvlm2_axmodel/{llama_p1024_l11_together.axmodel → llama_p128_l11_together.axmodel}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9114384bfed1547e72099ed94e5f0d509170ac7872c8727e00b4d9e0a9c26a6
|
| 3 |
+
size 14502053
|
smolvlm2_axmodel/{llama_p1024_l12_together.axmodel → llama_p128_l12_together.axmodel}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd4b6c59aac2909279181165c81cfd6aaa7e9765b2eb5d7eab6f28b15b638c47
|
| 3 |
+
size 14502053
|
smolvlm2_axmodel/llama_p128_l13_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f21a64d4d6c47ee8c3e9784caa82037b00e380846809dda9d5f45463d6c9e259
|
| 3 |
+
size 14502053
|
smolvlm2_axmodel/llama_p128_l14_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d762756ab6cf454f60238687bdb49e48f74de405a190ce3f8baea2d63fd77e15
|
| 3 |
+
size 14502053
|
smolvlm2_axmodel/llama_p128_l15_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:137147c5d1a536e31ccbd01814a4e058c3a700f88bc73fab2417724c047d1c8a
|
| 3 |
+
size 14502053
|
smolvlm2_axmodel/llama_p128_l16_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6cff0f01c402c967927451782f6483830a35b9d5f247ec002c0531080f58a583
|
| 3 |
+
size 14502053
|
smolvlm2_axmodel/llama_p128_l17_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7544a41af808812decbf454b484adc5d01317b7044616a3a6b921f81d2a07904
|
| 3 |
+
size 14502053
|
smolvlm2_axmodel/llama_p128_l18_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94a891f3a8427520be964d31f2e1323b63f9d0a942cb015f2b1712339feedee9
|
| 3 |
+
size 14502053
|
smolvlm2_axmodel/llama_p128_l19_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c7f15489ea06f05b5a04a7437cb3da35bbd85a4dc92b41e235f5287c182cbbc
|
| 3 |
+
size 14502053
|
smolvlm2_axmodel/llama_p128_l1_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7548a3c40a85f10022123f21654e88534fc4041cc36f7e12f15812675d2d693
|
| 3 |
+
size 14502053
|
smolvlm2_axmodel/llama_p128_l20_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72c500fdf67380a5d0e9bb81098b48cb172f8ee178ba5256c951a2334f079302
|
| 3 |
+
size 14502053
|
smolvlm2_axmodel/llama_p128_l21_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:960d618d2bf5b4d5cb78a782da41c0e3ceacf6e50684f7fdf1ccc3492c4b5044
|
| 3 |
+
size 14502053
|
smolvlm2_axmodel/llama_p128_l22_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87f52c18d8599e280e3479fb5d41f8a5efa9aeb44967b8a45e301af1f7dfc4bc
|
| 3 |
+
size 14502053
|
smolvlm2_axmodel/llama_p128_l23_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95c57d34b7fcba5be2f3be9402fcf2f0819ac9c711033a8a975e84e80d8112d6
|
| 3 |
+
size 14502053
|
smolvlm2_axmodel/llama_p128_l24_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7fb89bdbd126e5b6b053eb2a8c0e253eb05ef46c49cfe612a9f7926c168e1b37
|
| 3 |
+
size 14502053
|
smolvlm2_axmodel/llama_p128_l25_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7278b00282db9f988b095a42edb131b2c364f831fa90edda0a82457a2c519729
|
| 3 |
+
size 14502053
|
smolvlm2_axmodel/llama_p128_l26_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5074683de5b5141e400af1648bb7d3b4e2f7d090643883457eecfac2c58030f
|
| 3 |
+
size 14502053
|
smolvlm2_axmodel/llama_p128_l27_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06bf197bfe241e75bb24136dcb77590bbf427d0b8c90d0f70149dde4dfba5297
|
| 3 |
+
size 14502053
|