model_info: name: anemll-Prem-1B-SQL-ctx1024 version: 0.3.5 description: | Demonstarates running Prem-1B-SQL on Apple Neural Engine Context length: 1024 Batch size: 64 Chunks: 1 license: MIT author: Anemll framework: Core ML language: Python architecture: llama parameters: context_length: 1024 batch_size: 64 lut_embeddings: none lut_ffn: 6 lut_lmhead: 6 num_chunks: 1 model_prefix: llama embeddings: llama_embeddings.mlmodelc lm_head: llama_lm_head_lut6.mlmodelc ffn: llama_FFN_PF_lut6_chunk_01of01.mlmodelc split_lm_head: 8 vocab_size: 32256 lm_head_chunk_sizes: [4032, 4032, 4032, 4032, 4032, 4032, 4032, 4032] prefill_dynamic_slice: true # ============================================================================= # Conversion Parameters (for troubleshooting) # ============================================================================= # Generated: 2026-03-17 16:49:22 # # model_path: /tmp/ios_models/downloads/Prem-1B-SQL # output_dir: /tmp/ios_models/Prem-1B-SQL-ctx1024 # command_line: ./anemll/utils/convert_model.sh --model /tmp/ios_models/downloads/Prem-1B-SQL --output /tmp/ios_models/Prem-1B-SQL-ctx1024 --context 1024 --batch 64 --chunk 1 --lut2 6 --lut3 6 --prefix llama --force-mlprogram-compile # context_length: 1024 # batch_size: 64 # num_chunks: 1 # lut_part1: none # lut_part2: 6 # lut_part3: 6 # prefix: llama # architecture: llama # argmax_in_model: false # split_rotate: false # single_cache: false # dynamic_prefill_slice: true # monolithic: false # anemll_version: 0.3.5 # vocab_size: 32256 # lm_head_chunk_sizes: "[4032, 4032, 4032, 4032, 4032, 4032, 4032, 4032]" # =============================================================================