Instructions to use FrontiersMind/Nandi-Mini-150M with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use FrontiersMind/Nandi-Mini-150M with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="FrontiersMind/Nandi-Mini-150M", trust_remote_code=True)# Load model directly from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained("FrontiersMind/Nandi-Mini-150M", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use FrontiersMind/Nandi-Mini-150M with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "FrontiersMind/Nandi-Mini-150M" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "FrontiersMind/Nandi-Mini-150M", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/FrontiersMind/Nandi-Mini-150M
- SGLang
How to use FrontiersMind/Nandi-Mini-150M with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "FrontiersMind/Nandi-Mini-150M" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "FrontiersMind/Nandi-Mini-150M", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "FrontiersMind/Nandi-Mini-150M" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "FrontiersMind/Nandi-Mini-150M", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use FrontiersMind/Nandi-Mini-150M with Docker Model Runner:
docker model run hf.co/FrontiersMind/Nandi-Mini-150M
| # Copyright 2026 The HuggingFace Inc. team. All rights reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| """Tokenization classes for the Nandi family.""" | |
| from tokenizers import Regex, Tokenizer, decoders, normalizers, pre_tokenizers | |
| from tokenizers.models import BPE | |
| from transformers.tokenization_utils_tokenizers import TokenizersBackend | |
| from transformers.utils import logging | |
| logger = logging.get_logger(__name__) | |
| PRETOKENIZE_REGEX = r"""(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?(?:\p{L}\p{M}*)+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+""" | |
| class NandiTokenizer(TokenizersBackend): | |
| model_input_names = ["input_ids", "attention_mask"] | |
| model = BPE | |
| def __init__( | |
| self, | |
| vocab: str | dict[str, int] | None = None, | |
| merges: str | list[str] | None = None, | |
| vocab_file=None, | |
| merges_file=None, | |
| unk_token: str = "<|endoftext|>", | |
| bos_token: str = "<|im_start|>", | |
| eos_token: str = "<|endoftext|>", | |
| pad_token: str = "<|pad|>", | |
| add_prefix_space: bool | None = None, | |
| **kwargs, | |
| ): | |
| self._vocab = ( | |
| vocab | |
| if vocab is not None | |
| else { | |
| "<|endoftext|>": 0, | |
| } | |
| ) | |
| self._merges = merges or [] | |
| self._tokenizer = Tokenizer( | |
| BPE( | |
| vocab=self._vocab, | |
| merges=self._merges, | |
| dropout=None, | |
| unk_token=None, | |
| continuing_subword_prefix="", | |
| end_of_word_suffix="", | |
| fuse_unk=False, | |
| byte_fallback=False, | |
| ) | |
| ) | |
| self._tokenizer.decoder = decoders.ByteLevel() | |
| self._tokenizer.normalizer = normalizers.NFC() | |
| self._tokenizer.pre_tokenizer = pre_tokenizers.Sequence( | |
| [ | |
| pre_tokenizers.Split( | |
| Regex(PRETOKENIZE_REGEX), | |
| behavior="isolated", | |
| invert=False, | |
| ), | |
| pre_tokenizers.ByteLevel( | |
| add_prefix_space=False, | |
| trim_offsets=True, | |
| use_regex=False | |
| ), | |
| ] | |
| ) | |
| super().__init__( | |
| vocab_file=vocab_file, | |
| merges_file=merges_file, | |
| unk_token=unk_token, | |
| bos_token=bos_token, | |
| eos_token=eos_token, | |
| pad_token=pad_token, | |
| add_prefix_space=add_prefix_space, | |
| **kwargs, | |
| ) | |
| def __call__(self, text, *args, **kwargs): | |
| add_special_tokens = kwargs.get("add_special_tokens", False) | |
| def add_prefix(t): | |
| if isinstance(t, str): | |
| return "<|im_start|> " + t | |
| return t | |
| # Only inject when special tokens are disabled | |
| if not add_special_tokens: | |
| if isinstance(text, list): | |
| text = [add_prefix(t) for t in text] | |
| else: | |
| text = add_prefix(text) | |
| return super().__call__(text, *args, **kwargs) | |
| def encode( | |
| self, | |
| text, | |
| text_pair=None, | |
| add_special_tokens: bool = True, | |
| padding=False, | |
| truncation=None, | |
| max_length=None, | |
| stride: int = 0, | |
| padding_side=None, | |
| return_tensors=None, | |
| **kwargs, | |
| ): | |
| if isinstance(text, str): | |
| # This is a temporary fix to match the behaviour of the training pipeline | |
| text = "<|im_start|>" + " " + text | |
| return super().encode( | |
| text, | |
| text_pair=text_pair, | |
| add_special_tokens=add_special_tokens, | |
| padding=padding, | |
| truncation=truncation, | |
| max_length=max_length, | |
| stride=stride, | |
| padding_side=padding_side, | |
| return_tensors=return_tensors, | |
| **kwargs, | |
| ) | |
| __all__ = ["NandiTokenizer"] | |