LiquidAI
/

LFM2.5-1.2B-Instruct-MLX-8bit

@@ -44,10 +44,19 @@ MLX export of [LFM2.5-1.2B-Instruct](https://huggingface.co/LiquidAI/LFM2.5-1.2B
 |----------|-------|
 | Parameters | 1.2B |
 | Precision | 8-bit |
-| Group Size | 64 |
-| Size | 1.2 GB |
 | Context Length | 128K |
 ## Use with mlx
 ```bash
@@ -56,6 +65,7 @@ pip install mlx-lm
 ```python
 from mlx_lm import load, generate
 model, tokenizer = load("LiquidAI/LFM2.5-1.2B-Instruct-8bit")
@@ -67,7 +77,18 @@ if tokenizer.chat_template is not None:
         messages, tokenize=False, add_generation_prompt=True
     )
-response = generate(model, tokenizer, prompt=prompt, verbose=True)
 ```
 ## License

 |----------|-------|
 | Parameters | 1.2B |
 | Precision | 8-bit |
+| Group Size | 64 || Size | 1.2 GB |
 | Context Length | 128K |
+## Recommended Sampling Parameters
+| Parameter | Value |
+|-----------|-------|
+| temperature | 0.1 |
+| top_k | 50 |
+| top_p | 0.1 |
+| repetition_penalty | 1.05 |
+| max_tokens | 512 |
 ## Use with mlx
 ```bash
 ```python
 from mlx_lm import load, generate
+from mlx_lm.sample_utils import make_sampler, make_logits_processors
 model, tokenizer = load("LiquidAI/LFM2.5-1.2B-Instruct-8bit")
         messages, tokenize=False, add_generation_prompt=True
     )
+sampler = make_sampler(temp=0.1, top_k=50, top_p=0.1)
+logits_processors = make_logits_processors(repetition_penalty=1.05)
+response = generate(
+    model,
+    tokenizer,
+    prompt=prompt,
+    max_tokens=512,
+    sampler=sampler,
+    logits_processors=logits_processors,
+    verbose=True,
+)
 ```
 ## License