Text Generation
LiteRT-LM
English
custom
hermes-edge
mobile-ai
on-device
ios
iphone-16
apple-neural-engine
deepseek
dspark
speculative-decoding
hermes-agent
tool-calling
raven-ecosystem
Instructions to use bclermo/hermes-edge with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- LiteRT-LM
How to use bclermo/hermes-edge with LiteRT-LM:
# LiteRT-LM runs on various platforms (Android, iOS, Windows, Linux, macOS, IoT, Web/WASM) # and supports many APIs (C++, Python, Kotlin, Swift, JavaScript, Flutter). # For platform-specific integration guides, please refer to the official developer website: # https://ai.google.dev/edge/litert-lm # To try LiteRT-LM, the easiest way is to use our CLI tool. # 1. Install the LiteRT-LM CLI tool: pip install litert-lm # 2. Download and run this model locally: # See: https://ai.google.dev/edge/litert-lm/cli litert-lm run \ --from-huggingface-repo=bclermo/hermes-edge \ model.litertlm \ --prompt="Write me a poem"
- Notebooks
- Google Colab
- Kaggle
File size: 7,011 Bytes
a84640a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 | #!/usr/bin/env python3
"""Lightweight training / fine-tuning for the Hermes mobile transformer.
Designed for the mobile model sizes (270M / 1B), this script does supervised
fine-tuning on agentic chat data formatted with the Hermes tool-calling
template (see :mod:`hermes.chat_template`). It is deliberately framework-light
(plain PyTorch + an optional JSONL dataset) so it runs on a single GPU or even
CPU for smoke tests, and produces a checkpoint that
``scripts/convert_to_litertlm.py`` can consume directly.
Dataset format (JSONL), one conversation per line::
{"messages": [
{"role": "user", "content": "What is 12*9?"},
{"role": "assistant", "content": "<tool_call>{\"name\":\"calculator\",\"arguments\":{\"expression\":\"12*9\"}}</tool_call>"},
{"role": "tool", "content": "108"},
{"role": "assistant", "content": "12 * 9 = 108."}
],
"tools": [{"name": "calculator", "description": "...", "parameters": {...}}]}
Example::
python scripts/train.py \
--preset hermes-1b \
--data data/agentic_sft.jsonl \
--tokenizer tokenizer/hermes.model \
--output checkpoints/hermes-1b.pt \
--epochs 1 --batch-size 4 --lr 2e-4
"""
from __future__ import annotations
import argparse
import json
import logging
import os
import sys
from typing import Any, Dict, List
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from hermes.chat_template import Message, build_prompt # noqa: E402
from hermes.config import get_config # noqa: E402
from hermes.model import build_model # noqa: E402
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
)
logger = logging.getLogger("hermes.train")
def load_tokenizer(path: str):
try:
import sentencepiece as spm
except ImportError as exc:
raise ImportError(
"sentencepiece is required for training. `pip install sentencepiece`."
) from exc
if not os.path.exists(path):
raise FileNotFoundError(
f"Tokenizer not found at {path}. Train one with "
"scripts/train_tokenizer.py first."
)
sp = spm.SentencePieceProcessor()
sp.load(path)
return sp
def encode_example(example: Dict[str, Any], sp, max_len: int) -> List[int]:
messages = [Message(m["role"], m["content"]) for m in example["messages"]]
tools = example.get("tools")
prompt = build_prompt(messages, tools=tools, add_generation_prompt=False)
ids = sp.encode(prompt, out_type=int)
return ids[:max_len]
class JsonlDataset:
"""Tiny map-style dataset over a JSONL agentic-chat file."""
def __init__(self, path: str, sp, max_len: int) -> None:
self.examples: List[List[int]] = []
with open(path, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
ids = encode_example(json.loads(line), sp, max_len)
if len(ids) >= 2:
self.examples.append(ids)
logger.info("Loaded %d training examples from %s", len(self.examples), path)
def __len__(self) -> int:
return len(self.examples)
def __getitem__(self, idx: int) -> List[int]:
return self.examples[idx]
def collate(batch: List[List[int]], pad_id: int):
import torch
max_len = max(len(x) for x in batch)
input_ids = torch.full((len(batch), max_len), pad_id, dtype=torch.long)
for i, ids in enumerate(batch):
input_ids[i, : len(ids)] = torch.tensor(ids, dtype=torch.long)
return input_ids
def train(args: argparse.Namespace) -> int:
import torch
from torch.utils.data import DataLoader
config = get_config(args.preset)
device = (
"cuda"
if torch.cuda.is_available()
else ("mps" if torch.backends.mps.is_available() else "cpu")
)
logger.info("Training %s on %s (~%.0fM params)", args.preset, device,
config.estimated_parameters() / 1e6)
sp = load_tokenizer(args.tokenizer)
if sp.get_piece_size() != config.vocab_size:
logger.warning(
"Tokenizer vocab (%d) != config vocab (%d); using tokenizer size.",
sp.get_piece_size(), config.vocab_size,
)
config.vocab_size = sp.get_piece_size()
dataset = JsonlDataset(args.data, sp, config.max_seq_len)
if len(dataset) == 0:
logger.error("No usable training examples; aborting.")
return 1
loader = DataLoader(
dataset,
batch_size=args.batch_size,
shuffle=True,
collate_fn=lambda b: collate(b, config.pad_token_id),
)
model = build_model(config).to(device)
if args.init_checkpoint and os.path.exists(args.init_checkpoint):
ckpt = torch.load(args.init_checkpoint, map_location="cpu")
model.load_state_dict(ckpt.get("model", ckpt), strict=False)
logger.info("Initialized from %s", args.init_checkpoint)
optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=0.01)
model.train()
step = 0
for epoch in range(args.epochs):
for input_ids in loader:
input_ids = input_ids.to(device)
out = model(input_ids, labels=input_ids)
loss = out["loss"]
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
optimizer.step()
optimizer.zero_grad()
step += 1
if step % args.log_every == 0:
logger.info("epoch=%d step=%d loss=%.4f", epoch, step, loss.item())
os.makedirs(os.path.dirname(args.output) or ".", exist_ok=True)
torch.save(
{"model": model.state_dict(), "config": config.__dict__},
args.output,
)
logger.info("Saved checkpoint to %s", args.output)
logger.info(
"Next: python scripts/convert_to_litertlm.py --checkpoint %s "
"--tokenizer %s --preset %s",
args.output, args.tokenizer, args.preset,
)
return 0
def parse_args(argv=None) -> argparse.Namespace:
p = argparse.ArgumentParser(description="Train/fine-tune Hermes mobile model")
p.add_argument("--preset", default="hermes-1b", choices=["hermes-1b", "hermes-270m"])
p.add_argument("--data", required=True, help="Path to agentic-chat JSONL dataset")
p.add_argument("--tokenizer", required=True, help="SentencePiece .model path")
p.add_argument("--output", default="checkpoints/hermes-1b.pt")
p.add_argument("--init-checkpoint", default=None, help="Optional warm-start checkpoint")
p.add_argument("--epochs", type=int, default=1)
p.add_argument("--batch-size", type=int, default=4)
p.add_argument("--lr", type=float, default=2e-4)
p.add_argument("--grad-clip", type=float, default=1.0)
p.add_argument("--log-every", type=int, default=10)
return p.parse_args(argv)
if __name__ == "__main__":
sys.exit(train(parse_args()))
|