Prem-1B-SQL-ctx1024 / meta.yaml
sp00ktober's picture
Upload folder using huggingface_hub
087fc6f verified
model_info:
name: anemll-Prem-1B-SQL-ctx1024
version: 0.3.5
description: |
Demonstarates running Prem-1B-SQL on Apple Neural Engine
Context length: 1024
Batch size: 64
Chunks: 1
license: MIT
author: Anemll
framework: Core ML
language: Python
architecture: llama
parameters:
context_length: 1024
batch_size: 64
lut_embeddings: none
lut_ffn: 6
lut_lmhead: 6
num_chunks: 1
model_prefix: llama
embeddings: llama_embeddings.mlmodelc
lm_head: llama_lm_head_lut6.mlmodelc
ffn: llama_FFN_PF_lut6_chunk_01of01.mlmodelc
split_lm_head: 8
vocab_size: 32256
lm_head_chunk_sizes: [4032, 4032, 4032, 4032, 4032, 4032, 4032, 4032]
prefill_dynamic_slice: true
# =============================================================================
# Conversion Parameters (for troubleshooting)
# =============================================================================
# Generated: 2026-03-17 16:49:22
#
# model_path: /tmp/ios_models/downloads/Prem-1B-SQL
# output_dir: /tmp/ios_models/Prem-1B-SQL-ctx1024
# command_line: ./anemll/utils/convert_model.sh --model /tmp/ios_models/downloads/Prem-1B-SQL --output /tmp/ios_models/Prem-1B-SQL-ctx1024 --context 1024 --batch 64 --chunk 1 --lut2 6 --lut3 6 --prefix llama --force-mlprogram-compile
# context_length: 1024
# batch_size: 64
# num_chunks: 1
# lut_part1: none
# lut_part2: 6
# lut_part3: 6
# prefix: llama
# architecture: llama
# argmax_in_model: false
# split_rotate: false
# single_cache: false
# dynamic_prefill_slice: true
# monolithic: false
# anemll_version: 0.3.5
# vocab_size: 32256
# lm_head_chunk_sizes: "[4032, 4032, 4032, 4032, 4032, 4032, 4032, 4032]"
# =============================================================================