| model_info: | |
| name: anemll-Prem-1B-SQL-ctx1024 | |
| version: 0.3.5 | |
| description: | | |
| Demonstarates running Prem-1B-SQL on Apple Neural Engine | |
| Context length: 1024 | |
| Batch size: 64 | |
| Chunks: 1 | |
| license: MIT | |
| author: Anemll | |
| framework: Core ML | |
| language: Python | |
| architecture: llama | |
| parameters: | |
| context_length: 1024 | |
| batch_size: 64 | |
| lut_embeddings: none | |
| lut_ffn: 6 | |
| lut_lmhead: 6 | |
| num_chunks: 1 | |
| model_prefix: llama | |
| embeddings: llama_embeddings.mlmodelc | |
| lm_head: llama_lm_head_lut6.mlmodelc | |
| ffn: llama_FFN_PF_lut6_chunk_01of01.mlmodelc | |
| split_lm_head: 8 | |
| vocab_size: 32256 | |
| lm_head_chunk_sizes: [4032, 4032, 4032, 4032, 4032, 4032, 4032, 4032] | |
| prefill_dynamic_slice: true | |
| # ============================================================================= | |
| # Conversion Parameters (for troubleshooting) | |
| # ============================================================================= | |
| # Generated: 2026-03-17 16:49:22 | |
| # | |
| # model_path: /tmp/ios_models/downloads/Prem-1B-SQL | |
| # output_dir: /tmp/ios_models/Prem-1B-SQL-ctx1024 | |
| # command_line: ./anemll/utils/convert_model.sh --model /tmp/ios_models/downloads/Prem-1B-SQL --output /tmp/ios_models/Prem-1B-SQL-ctx1024 --context 1024 --batch 64 --chunk 1 --lut2 6 --lut3 6 --prefix llama --force-mlprogram-compile | |
| # context_length: 1024 | |
| # batch_size: 64 | |
| # num_chunks: 1 | |
| # lut_part1: none | |
| # lut_part2: 6 | |
| # lut_part3: 6 | |
| # prefix: llama | |
| # architecture: llama | |
| # argmax_in_model: false | |
| # split_rotate: false | |
| # single_cache: false | |
| # dynamic_prefill_slice: true | |
| # monolithic: false | |
| # anemll_version: 0.3.5 | |
| # vocab_size: 32256 | |
| # lm_head_chunk_sizes: "[4032, 4032, 4032, 4032, 4032, 4032, 4032, 4032]" | |
| # ============================================================================= | |