yongqiang commited on Jul 14, 2025

Commit

1757cc5

1 Parent(s): 04f3e31

update model&script

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

README.md +2 -2
infer_axmodel.py +56 -141
smolvlm2_axmodel/llama_p1024_l13_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l14_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l15_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l16_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l17_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l18_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l19_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l1_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l20_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l21_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l22_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l23_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l24_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l25_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l26_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l27_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l28_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l29_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l2_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l30_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l31_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l3_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l4_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l5_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l6_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l7_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l8_together.axmodel +0 -3
smolvlm2_axmodel/llama_p1024_l9_together.axmodel +0 -3
smolvlm2_axmodel/{llama_p1024_l0_together.axmodel → llama_p128_l0_together.axmodel} +2 -2
smolvlm2_axmodel/{llama_p1024_l10_together.axmodel → llama_p128_l10_together.axmodel} +2 -2
smolvlm2_axmodel/{llama_p1024_l11_together.axmodel → llama_p128_l11_together.axmodel} +2 -2
smolvlm2_axmodel/{llama_p1024_l12_together.axmodel → llama_p128_l12_together.axmodel} +2 -2
smolvlm2_axmodel/llama_p128_l13_together.axmodel +3 -0
smolvlm2_axmodel/llama_p128_l14_together.axmodel +3 -0
smolvlm2_axmodel/llama_p128_l15_together.axmodel +3 -0
smolvlm2_axmodel/llama_p128_l16_together.axmodel +3 -0
smolvlm2_axmodel/llama_p128_l17_together.axmodel +3 -0
smolvlm2_axmodel/llama_p128_l18_together.axmodel +3 -0
smolvlm2_axmodel/llama_p128_l19_together.axmodel +3 -0
smolvlm2_axmodel/llama_p128_l1_together.axmodel +3 -0
smolvlm2_axmodel/llama_p128_l20_together.axmodel +3 -0
smolvlm2_axmodel/llama_p128_l21_together.axmodel +3 -0
smolvlm2_axmodel/llama_p128_l22_together.axmodel +3 -0
smolvlm2_axmodel/llama_p128_l23_together.axmodel +3 -0
smolvlm2_axmodel/llama_p128_l24_together.axmodel +3 -0
smolvlm2_axmodel/llama_p128_l25_together.axmodel +3 -0
smolvlm2_axmodel/llama_p128_l26_together.axmodel +3 -0
smolvlm2_axmodel/llama_p128_l27_together.axmodel +3 -0

README.md CHANGED Viewed

@@ -72,5 +72,5 @@ ai@ai-bj ~/yongqiang/SmolVLM2-500M-Video-Instruct $ python3 infer_axmodel.py
 input prompt: Can you describe this image?
-answer >>  The image captures a close-up view of a pink flower, prominently featuring a bumblebee. The bumblebee, with its black and yellow stripes, is in the center of the frame, its body slightly tilted to the left. The flower, with its petals fully spread, is the main subject of the image. The background is blurred, drawing focus to the flower and the bumblebee. The blurred background suggests a garden or a field, providing a sense of depth to the image. The^@ colors in the image are vibrant, with the pink of the flower contrasting against the green of the leaves and the brown of the stems. The image does not provide enough detail to confidently identify the specific location or landmark referred to as "sa_16743".
-```

 input prompt: Can you describe this image?
+answer >>  The image depicts a close-up view of a pink flower with a bee on it. The bee, which appears to be a bumblebee, is perched on the flower's center, which is surrounded by a cluster of other flowers. The bee is in the process of collecting nectar from the flower, which is a common behavior for bees. The flower itself has a yellow center with a cluster of yellow stamens surrounding it. The petals of the flower are a vibrant shade of pink, and the bee is positioned very close to^@ the camera, making it the focal point of the image. The background of the image is slightly blurred, but it appears to be a garden or a field with other flowers and plants, contributing to the overall natural setting of the image.
+```

infer_axmodel.py CHANGED Viewed

@@ -9,16 +9,15 @@ from transformers import AutoConfig
 from typing import List, Tuple
 from axengine import InferenceSession
 from ml_dtypes import bfloat16
-device = "cuda" if torch.cuda.is_available() else "cpu"
-embeddings = torch.load("./embeds/SmolVLMVisionEmbeddings.pkl", map_location=device, weights_only=False)
-embeds = np.load(os.path.join("./smolvlm2_axmodel", "model.embed_tokens.weight.npy"))
-# connector = torch.load("SmolVLMConnector.pkl", map_location=device, weights_only=False)
-encoder = ort.InferenceSession(f'./vit_mdoel/vision_model.onnx', providers=["CPUExecutionProvider"])
 def run_vision_model(
     pixel_values,
     patch_attention_mask=None,
 ):
@@ -45,13 +44,15 @@ def run_vision_model(
     elif not self._use_flash_attention_2:
         patch_attention_mask = _prepare_4d_attention_mask(patch_attention_mask, hidden_states.dtype)
     encoder_outputs = encoder.run(None, {"input": hidden_states.detach().cpu().to(dtype=torch.float32).numpy()})[0]
     encoder_outputs = torch.from_numpy(encoder_outputs).to(device, dtype=hidden_states.dtype)
     return encoder_outputs
-def get_image_features(pixel_values: torch.FloatTensor, pixel_attention_mask: torch.LongTensor = None):
     """
     Encodes images into continuous embeddings that can be forwarded to the language model.
@@ -90,7 +91,7 @@ def get_image_features(pixel_values: torch.FloatTensor, pixel_attention_mask: to
     patch_attention_mask = (patches_subgrid.sum(dim=(-1, -2)) > 0).bool()
     # Get sequence from the vision encoder
-    image_hidden_states = run_vision_model(pixel_values, patch_attention_mask)
     # Modality projection & resampling
     # image_hidden_states = connector(image_hidden_states) # 已经 fuse 到了 onnx 中
@@ -132,51 +133,59 @@ def inputs_merger(
     return merged_embeds
-def post_process(data, topk=1, topp=0.9, temperature=0.6):
-    def top_p(l: np.ndarray, p: float) -> np.ndarray:
-        index = np.argsort(l)
-        res = l.copy()
-        sum_p = 0
-        for i in index[::-1]:
-            if sum_p >= p:
-                res[i] = 0
-            sum_p += res[i]
-        return res / sum_p
-    def softmax(l: np.ndarray) -> np.ndarray:
-        l_max = l - l.max()
-        l_exp = np.exp(l_max)
-        res = l_exp / np.sum(l_exp)
-        return res.astype(np.float64)
-    r = data.astype(np.float32)
-    r = r.flatten()
-    candidate_index = np.argpartition(r, -topk)[-topk:]
-    candidate_value = r[candidate_index]
-    candidate_value /= temperature
-    candidate_soft = softmax(candidate_value)
-    candidate_soft = top_p(candidate_soft, topp)
-    candidate_soft = candidate_soft.astype(np.float64) / candidate_soft.sum()
-    pos = np.random.multinomial(1, candidate_soft).argmax()
-    next_token = candidate_index[pos]
-    return next_token, candidate_index, candidate_soft
 if __name__ == "__main__":
-    hf_model_path = "./smolvlm2_tokenizer/"
-    axmodel_path = "./smolvlm2_axmodel"
-    prompt = 'Can you describe this image?'
     processor = AutoProcessor.from_pretrained(hf_model_path)
     config = AutoConfig.from_pretrained(hf_model_path, trust_remote_code=True)
     tokenizer = processor.tokenizer
     messages = [
         {
             "role": "user",
             "content": [
-                {"type": "image", "url": "./assets/bee.jpg"},
                 {"type": "text", "text": prompt},
             ]
         },
@@ -201,7 +210,7 @@ if __name__ == "__main__":
     """
     miniforge-pypy3/envs/lerobot/lib/python3.10/site-packages/transformers/models/smolvlm/modeling_smolvlm.py(681)get_image_features()
     """
-    image_hidden_states = get_image_features(pixel_values, pixel_attention_mask)
     inputs_embeds = inputs_merger(
         input_ids=input_ids,
@@ -213,104 +222,10 @@ if __name__ == "__main__":
     prefill_data = prefill_data.astype(bfloat16)
     token_ids = input_ids[0].cpu().numpy().tolist()
     token_len = len(token_ids)
-    lastN = 2048
     cfg = config.text_config
-    kv_dim = cfg.hidden_size // cfg.num_attention_heads * cfg.num_key_value_heads
-    k_caches = [
-        np.zeros((1, lastN, kv_dim), dtype=bfloat16)
-        for _ in range(cfg.num_hidden_layers)
-    ]
-    v_caches = [
-        np.zeros((1, lastN, kv_dim), dtype=bfloat16)
-        for _ in range(cfg.num_hidden_layers)
-    ]
-    prefill_decoder_sessins = []
-    for i in tqdm(range(cfg.num_hidden_layers), desc="Init InferenceSession"):
-        session = InferenceSession(
-            f"{axmodel_path}/llama_p1024_l{i}_together.axmodel"
-        )
-        prefill_decoder_sessins.append(session)
-    post_process_session = InferenceSession(
-        f"{axmodel_path}/llama_post.axmodel"
-    )
-    print("model load done!")
-    """
-        prefill
-    """
-    prefill_len = 1024
-    if prefill_len > 0:
-        indices = np.array(list(range(prefill_len)), np.uint32).reshape(
-            (1, prefill_len)
-        )
-        indices[:, token_len:] = 0
-        mask = np.zeros((1, prefill_len, prefill_len)) - 65536
-        data = np.zeros((1, prefill_len, cfg.hidden_size)).astype(bfloat16)
-        data[:, 0:token_len] = prefill_data
-        for i, t in enumerate(token_ids):
-            mask[:, i, : i + 1] = 0
-        mask = mask.astype(bfloat16)
-        for i in range(cfg.num_hidden_layers):
-            input_feed = {
-                "K_cache": np.zeros((1, 1, cfg.hidden_size), dtype=bfloat16),
-                "V_cache": np.zeros((1, 1, cfg.hidden_size), dtype=bfloat16),
-                "indices": indices,
-                "input": data,
-                "mask": mask,
-            }
-            outputs = prefill_decoder_sessins[i].run(None, input_feed, shape_group=1)
-            k_caches[i][:, :token_len, :] = outputs[0][:, :token_len, :]
-            v_caches[i][:, :token_len, :] = outputs[1][:, :token_len, :]
-            data[:, :token_len] = outputs[2][:, :token_len, :]
-    post_out = post_process_session.run(None, {"input": data[:, token_len - 1, :][None, ...]})[0]
-    next_token, posssible_tokens, possible_soft = post_process(post_out, topk=1)
-    posibles = [tokenizer.decode([t]) for t in posssible_tokens]
-    posible_soft = [str((t, s)) for t, s in zip(posibles, possible_soft)]
-    token_ids.append(next_token)
-    # print("prefill done!")
-    print(f"input prompt: {prompt}\n")
-    print("answer >>", tokenizer.decode(token_ids[token_len], skip_special_tokens=True), end='', flush=True)
-    """
-        decode
-    """
-    mask = np.zeros((1, 1, lastN + 1), dtype=np.float32).astype(bfloat16)
-    mask[:, :, :lastN] -= 65536
-    mask[:, :, :token_len] = 0
-    for start_indice in range(lastN + 1):
-        if prefill_len > 0 and start_indice < token_len:
-            continue
-        next_token = token_ids[start_indice]
-        indices = np.array([start_indice], np.uint32).reshape((1, 1))
-        data = embeds[next_token, :].reshape((1, 1, cfg.hidden_size)).astype(bfloat16)
-        for i in range(cfg.num_hidden_layers):
-            input_feed = {
-                "K_cache": k_caches[i],
-                "V_cache": v_caches[i],
-                "indices": indices,
-                "input": data,
-                "mask": mask,
-            }
-            outputs = prefill_decoder_sessins[i].run(None, input_feed, shape_group=0)
-            k_caches[i][:, start_indice, :] = outputs[0][:, :, :]
-            v_caches[i][:, start_indice, :] = outputs[1][:, :, :]
-            data = outputs[2]
-        mask[..., start_indice] = 0
-        if start_indice < token_len - 1:
-            pass
-        else:
-            post_out = post_process_session.run(None, {"input": data})[0]
-            next_token, posssible_tokens, possible_soft = post_process(post_out)
-            token_ids.append(next_token)
-            print(tokenizer.decode(next_token, skip_special_tokens=True), end='', flush=True)
-        if next_token == tokenizer.eos_token_id:
-            break
     print("\n")

 from typing import List, Tuple
 from axengine import InferenceSession
 from ml_dtypes import bfloat16
+from utils.infer_func import InferManager
+import argparse
+from PIL import Image
+from torchvision.transforms import Resize, ToTensor, Normalize, Compose
+from transformers.image_utils import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD
 def run_vision_model(
+    encoder,
     pixel_values,
     patch_attention_mask=None,
 ):
     elif not self._use_flash_attention_2:
         patch_attention_mask = _prepare_4d_attention_mask(patch_attention_mask, hidden_states.dtype)
+    # 保存 vit-encoder 的量化校准集
+    # np.save("../model_convert/vit_encoder_calibrations/hidden_states_5.npy", hidden_states.detach().cpu().to(dtype=torch.float32).numpy())
     encoder_outputs = encoder.run(None, {"input": hidden_states.detach().cpu().to(dtype=torch.float32).numpy()})[0]
     encoder_outputs = torch.from_numpy(encoder_outputs).to(device, dtype=hidden_states.dtype)
     return encoder_outputs
+def get_image_features(encoder, pixel_values: torch.FloatTensor, pixel_attention_mask: torch.LongTensor = None):
     """
     Encodes images into continuous embeddings that can be forwarded to the language model.
     patch_attention_mask = (patches_subgrid.sum(dim=(-1, -2)) > 0).bool()
     # Get sequence from the vision encoder
+    image_hidden_states = run_vision_model(encoder, pixel_values, patch_attention_mask)
     # Modality projection & resampling
     # image_hidden_states = connector(image_hidden_states) # 已经 fuse 到了 onnx 中
     return merged_embeds
 if __name__ == "__main__":
+    """
+    python3 infer_axmodel.py -i ../assets/panda.jpg --vit_model ./vit-models/vision_model.axmodel
+    """
+    prompt = None
+    parser = argparse.ArgumentParser(description="Model configuration parameters")
+    parser.add_argument("--hf_model", type=str, default="./SmolVLM2-500M-Video-Instruct/",
+                        help="Path to HuggingFace model")
+    parser.add_argument("--axmodel_path", type=str, default="./SmolVLM2-500M-Video-Instruct_axmodel/",
+                        help="Path to save compiled axmodel of llama model")
+    parser.add_argument("--vit_model", type=str, default='./vit-models/vision_model.axmodel',
+                        help="Path to save compiled axmodel of llama model")
+    parser.add_argument("-i", "--images", type=str, default="../assets/bee.jpg",
+                        help="Path to the test image.")
+    parser.add_argument("-q", "--question", type=str, default="Can you describe this image?",
+                        help="Your question that you want to ask the model.")
+    args = parser.parse_args()
+    hf_model_path = args.hf_model
+    axmodel_path = args.axmodel_path
+    images = args.images
+    prompt = args.question
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    embeddings = torch.load("./embeds/SmolVLMVisionEmbeddings.pkl", map_location=device, weights_only=False)
+    embeds = np.load(os.path.join(axmodel_path, "model.embed_tokens.weight.npy"))
+    encoder = InferenceSession(args.vit_model)
     processor = AutoProcessor.from_pretrained(hf_model_path)
     config = AutoConfig.from_pretrained(hf_model_path, trust_remote_code=True)
     tokenizer = processor.tokenizer
+    TARGET_IMAGE_SIZE = (512, 512)
+    image = Image.open(images).convert('RGB')
+    # 固定输入图像 size: 512x512
+    preprocess = Compose([
+        Resize(TARGET_IMAGE_SIZE),
+        # ToTensor(),
+        # Normalize(mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD),
+    ])
+    preprocessed_image = preprocess(image)
     messages = [
         {
             "role": "user",
             "content": [
+                {"type": "image", "image": preprocessed_image}, # 这里可以直接使用 PIL Image 对象
+                # {"type": "image", "url": images}, # 也可以使用 url
                 {"type": "text", "text": prompt},
             ]
         },
     """
     miniforge-pypy3/envs/lerobot/lib/python3.10/site-packages/transformers/models/smolvlm/modeling_smolvlm.py(681)get_image_features()
     """
+    image_hidden_states = get_image_features(encoder, pixel_values, pixel_attention_mask)
     inputs_embeds = inputs_merger(
         input_ids=input_ids,
     prefill_data = prefill_data.astype(bfloat16)
     token_ids = input_ids[0].cpu().numpy().tolist()
     token_len = len(token_ids)
     cfg = config.text_config
+    imer = InferManager(cfg, axmodel_path)
+    token_ids = imer.prefill(tokenizer, token_ids, prefill_data[0], slice_len=128)
+    imer.decode(tokenizer, token_ids, embeds, slice_len=128)
     print("\n")

smolvlm2_axmodel/llama_p1024_l13_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9420f15bb5b591f258212242bc5fa5566ba45f4d697d0599999114961152d1fd
-size 12002005

smolvlm2_axmodel/llama_p1024_l14_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:397511107011f700388029e604c2f5ec6d092f9cb6e09ab890a198932173193c
-size 12002005

smolvlm2_axmodel/llama_p1024_l15_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:689d9286ad7cf81345352f85bfbb8387934fe7ccb76d3f56563ded5f1d7cdb7b
-size 12002005

smolvlm2_axmodel/llama_p1024_l16_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b91fecc232c92c9faa5fca4ca1bff0802abc8351457f9b34ef55327ccdcbc85a
-size 12002005

smolvlm2_axmodel/llama_p1024_l17_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9404c81f4a02fe332ae1f4ed5361d2f68eea66a9550233cc4c1d4455afc95797
-size 12002005

smolvlm2_axmodel/llama_p1024_l18_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ffa8d959498bd479d2bbb2c42e883a21bb173fbcb73f5d1bbdebe6c8365e8e21
-size 12002005

smolvlm2_axmodel/llama_p1024_l19_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:66265cbf7cd8571f949c23ca6a5918f8c95fb3413e4349cb9c9f3ac18231ca21
-size 12002005

smolvlm2_axmodel/llama_p1024_l1_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9addcae5bad93adaf9f8df49d4cbfa82024be2d2e0b2e815537121a7417ecb88
-size 12002005

smolvlm2_axmodel/llama_p1024_l20_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:69430a836a9eb0d46242419a999e761d61a0c4cc4d17eafbe373641551ac0a8b
-size 12002005

smolvlm2_axmodel/llama_p1024_l21_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6a19009fd1a1d28c9414cb9421af4c66473088a0b3caea9157bde6aac071e1ce
-size 12002005

smolvlm2_axmodel/llama_p1024_l22_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ec30ac9fd2a52f281b76a037d0aa146b8144277aed3408a6c281e5a7df8ba62a
-size 12002005

smolvlm2_axmodel/llama_p1024_l23_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:1093d36fa84d6248b1a4728d8ae2aadb1143894eaf3d960e12fd3753d3ab4da2
-size 12002005

smolvlm2_axmodel/llama_p1024_l24_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ff63d4efb6dd75433205ce87e4d69d7850dad86555b2919864f04c5df3a8a844
-size 12002005

smolvlm2_axmodel/llama_p1024_l25_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:83d8b772f3aef6356234912a371baebcb6c0897faf3d524091b7ea2fc56f77bc
-size 12002005

smolvlm2_axmodel/llama_p1024_l26_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:033f9deb6fe2288347d1af507d7a31deb0633614dfb0efe9a3a9c962afbe44eb
-size 12002005

smolvlm2_axmodel/llama_p1024_l27_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c0c8c035eb371dd31d53844534c4d321efc933e1097ad3e9d87afd52dba74214
-size 12002005

smolvlm2_axmodel/llama_p1024_l28_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8d33cae03279cab06a856cfacc3e84414c615082a4a358bd09c4a5996c17c575
-size 12002005

smolvlm2_axmodel/llama_p1024_l29_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:84583f5ef60b629b34d47c7deeb3200c096d6d6bf3de3f6bec4da6ae005b5a1e
-size 12002005

smolvlm2_axmodel/llama_p1024_l2_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4514475633a7317118fe4486200bbed73929bd4210c6da4041591797ad93fb3a
-size 12002005

smolvlm2_axmodel/llama_p1024_l30_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:39e1612aac9b1604146b61b4fc37eaada2299f62078260689bf03812c256c75b
-size 12002005

smolvlm2_axmodel/llama_p1024_l31_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5f2f54bcb7d01ea69a3177b72d49e3bdab2d0e0403e86085903389cc6839b5fd
-size 12002005

smolvlm2_axmodel/llama_p1024_l3_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a991d67e4c1dc4bf58689ce4a58362f6bcc73a87257bcb2982774a0b056ca720
-size 12002005

smolvlm2_axmodel/llama_p1024_l4_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9a43e6886989c31dfffeae70177fc9464322bded5bb69515e31aaade31b431b5
-size 12002005

smolvlm2_axmodel/llama_p1024_l5_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ed59bef655c1eae8eb7af4566ef21fd874cfac72b67bbfd1a7279e1a1cffd2c8
-size 12002005

smolvlm2_axmodel/llama_p1024_l6_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:848640700c17925475ef9f9edeaa0fccf235e90a5ad159430682ac389910d86b
-size 12002005

smolvlm2_axmodel/llama_p1024_l7_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:46e4bce8f94d80d12e3b1a5ceae7ba62cbaa06f0ddf11f13999b1936a98bc0a1
-size 12002005

smolvlm2_axmodel/llama_p1024_l8_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b3ba57d8f2cd4d932445600d161a04b0a1160f452425c5abd08f94bece56f23f
-size 12002005

smolvlm2_axmodel/llama_p1024_l9_together.axmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0464cccfdfb0566069bad977d98f70b9e15e8e0b642a6e01ca2b16b5f7eb170a
-size 12002005

smolvlm2_axmodel/{llama_p1024_l0_together.axmodel → llama_p128_l0_together.axmodel} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:125ac7e80a94dbd3920fb0e0077ccad612abe8fabc2040dda09b19813ce96f68
-size 12002005

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe798dad363285aa06db28f00cabf919db772d17d7bb842a48f5b76c4bb31f17
+size 14502053

smolvlm2_axmodel/{llama_p1024_l10_together.axmodel → llama_p128_l10_together.axmodel} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:12f5aa82a4dcc3a66aaad951b1ea87c50e618c93adade3a2d1a7b5614169f5a1
-size 12002005

 version https://git-lfs.github.com/spec/v1
+oid sha256:730853a20a5ff783ccc8b97568ebd7bb4320922bd2e28383005ebca8389d40df
+size 14502053

smolvlm2_axmodel/{llama_p1024_l11_together.axmodel → llama_p128_l11_together.axmodel} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba247ba036a831b6201b53a03bf9847e16be239b386846cf22980da6695cc0d6
-size 12002005

 version https://git-lfs.github.com/spec/v1
+oid sha256:a9114384bfed1547e72099ed94e5f0d509170ac7872c8727e00b4d9e0a9c26a6
+size 14502053

smolvlm2_axmodel/{llama_p1024_l12_together.axmodel → llama_p128_l12_together.axmodel} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:011aea9b7e4fcadec5d1b2c386ff4a12e2f3f0e0e31eca634afc8acc9f0d343b
-size 12002005

 version https://git-lfs.github.com/spec/v1
+oid sha256:fd4b6c59aac2909279181165c81cfd6aaa7e9765b2eb5d7eab6f28b15b638c47
+size 14502053

smolvlm2_axmodel/llama_p128_l13_together.axmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f21a64d4d6c47ee8c3e9784caa82037b00e380846809dda9d5f45463d6c9e259
+size 14502053

smolvlm2_axmodel/llama_p128_l14_together.axmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d762756ab6cf454f60238687bdb49e48f74de405a190ce3f8baea2d63fd77e15
+size 14502053

smolvlm2_axmodel/llama_p128_l15_together.axmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:137147c5d1a536e31ccbd01814a4e058c3a700f88bc73fab2417724c047d1c8a
+size 14502053

smolvlm2_axmodel/llama_p128_l16_together.axmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6cff0f01c402c967927451782f6483830a35b9d5f247ec002c0531080f58a583
+size 14502053

smolvlm2_axmodel/llama_p128_l17_together.axmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7544a41af808812decbf454b484adc5d01317b7044616a3a6b921f81d2a07904
+size 14502053

smolvlm2_axmodel/llama_p128_l18_together.axmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:94a891f3a8427520be964d31f2e1323b63f9d0a942cb015f2b1712339feedee9
+size 14502053

smolvlm2_axmodel/llama_p128_l19_together.axmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c7f15489ea06f05b5a04a7437cb3da35bbd85a4dc92b41e235f5287c182cbbc
+size 14502053

smolvlm2_axmodel/llama_p128_l1_together.axmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e7548a3c40a85f10022123f21654e88534fc4041cc36f7e12f15812675d2d693
+size 14502053

smolvlm2_axmodel/llama_p128_l20_together.axmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:72c500fdf67380a5d0e9bb81098b48cb172f8ee178ba5256c951a2334f079302
+size 14502053

smolvlm2_axmodel/llama_p128_l21_together.axmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:960d618d2bf5b4d5cb78a782da41c0e3ceacf6e50684f7fdf1ccc3492c4b5044
+size 14502053

smolvlm2_axmodel/llama_p128_l22_together.axmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:87f52c18d8599e280e3479fb5d41f8a5efa9aeb44967b8a45e301af1f7dfc4bc
+size 14502053

smolvlm2_axmodel/llama_p128_l23_together.axmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:95c57d34b7fcba5be2f3be9402fcf2f0819ac9c711033a8a975e84e80d8112d6
+size 14502053

smolvlm2_axmodel/llama_p128_l24_together.axmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7fb89bdbd126e5b6b053eb2a8c0e253eb05ef46c49cfe612a9f7926c168e1b37
+size 14502053

smolvlm2_axmodel/llama_p128_l25_together.axmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7278b00282db9f988b095a42edb131b2c364f831fa90edda0a82457a2c519729
+size 14502053

smolvlm2_axmodel/llama_p128_l26_together.axmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e5074683de5b5141e400af1648bb7d3b4e2f7d090643883457eecfac2c58030f
+size 14502053

smolvlm2_axmodel/llama_p128_l27_together.axmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06bf197bfe241e75bb24136dcb77590bbf427d0b8c90d0f70149dde4dfba5297
+size 14502053