Text Generation
Transformers
PyTorch
English
taonet_mini_t2
taonet
taotern
ssm
state-space-model
dplr
custom_code
experimental
Instructions to use TaoTern/TaoNet-mini-T2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use TaoTern/TaoNet-mini-T2 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="TaoTern/TaoNet-mini-T2", trust_remote_code=True)# Load model directly from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained("TaoTern/TaoNet-mini-T2", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use TaoTern/TaoNet-mini-T2 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "TaoTern/TaoNet-mini-T2" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "TaoTern/TaoNet-mini-T2", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/TaoTern/TaoNet-mini-T2
- SGLang
How to use TaoTern/TaoNet-mini-T2 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "TaoTern/TaoNet-mini-T2" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "TaoTern/TaoNet-mini-T2", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "TaoTern/TaoNet-mini-T2" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "TaoTern/TaoNet-mini-T2", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use TaoTern/TaoNet-mini-T2 with Docker Model Runner:
docker model run hf.co/TaoTern/TaoNet-mini-T2
| """Checkpoint management utilities. | |
| Canonical Checkpoint Format (new): | |
| { | |
| 'step': int, # Training step number | |
| 'model_state': Dict[str, Tensor], # Model state dict | |
| 'optimizer_state': Dict, # Optimizer state dict (optional) | |
| 'config': Dict, # TrainingConfig as dict | |
| 'metrics': Dict[str, float], # Training metrics | |
| 'global_step': int, # (deprecated, kept for compat) same as step | |
| 'current_epoch': int, # (optional) current epoch number | |
| 'best_loss': float, # (optional) best validation loss | |
| } | |
| Legacy Checkpoint Format (old, from BaseTrainer): | |
| { | |
| 'global_step': int, | |
| 'current_epoch': int, | |
| 'best_loss': float, | |
| 'model_state_dict': Dict[str, Tensor], # ← Note: uses '_dict' suffix | |
| 'optimizer_state_dict': Dict, | |
| 'config': Dict, | |
| } | |
| The load() function auto-detects and migrates legacy format to canonical format. | |
| """ | |
| from pathlib import Path | |
| from typing import Dict, Any, Optional | |
| import torch | |
| from taoTrain.config import TrainingConfig | |
| class CheckpointManager: | |
| """Manage model checkpoints with versioning.""" | |
| def __init__( | |
| self, | |
| checkpoint_dir: str | Path, | |
| keep_last_n: int = 3, | |
| track_best: bool = True, | |
| ): | |
| """ | |
| Initialize checkpoint manager. | |
| Args: | |
| checkpoint_dir: Directory to save checkpoints | |
| keep_last_n: Number of recent checkpoints to keep | |
| track_best: Whether to track best model | |
| """ | |
| self.checkpoint_dir = Path(checkpoint_dir) | |
| self.checkpoint_dir.mkdir(parents=True, exist_ok=True) | |
| self.keep_last_n = keep_last_n | |
| self.track_best = track_best | |
| self.best_metric = None | |
| self.best_metric_name = None | |
| self.saved_checkpoints = [] | |
| def save( | |
| self, | |
| step: int, | |
| model_state: Dict[str, Any], | |
| optimizer_state: Optional[Dict[str, Any]] = None, | |
| config: Optional[TrainingConfig] = None, | |
| metrics: Optional[Dict[str, float]] = None, | |
| is_best: bool = False, | |
| ) -> Path: | |
| """ | |
| Save a checkpoint. | |
| Args: | |
| step: Training step | |
| model_state: Model state dict | |
| optimizer_state: Optimizer state dict | |
| config: Training config | |
| metrics: Metrics dict | |
| is_best: Whether this is the best model so far | |
| Returns: | |
| Path to saved checkpoint | |
| """ | |
| checkpoint = { | |
| "step": step, | |
| "model_state": model_state, | |
| "optimizer_state": optimizer_state, | |
| "config": config.to_dict() if config else None, | |
| "metrics": metrics or {}, | |
| } | |
| filename = f"checkpoint_step_{step:06d}.pt" | |
| if is_best: | |
| filename = "best_model.pt" | |
| path = self.checkpoint_dir / filename | |
| torch.save(checkpoint, path) | |
| # Track saved checkpoints | |
| if not is_best: | |
| self.saved_checkpoints.append((step, path)) | |
| # Clean up old checkpoints | |
| if len(self.saved_checkpoints) > self.keep_last_n: | |
| _, old_path = self.saved_checkpoints.pop(0) | |
| if old_path.exists(): | |
| old_path.unlink() | |
| return path | |
| def load( | |
| self, | |
| checkpoint_path: str | Path, | |
| device: Optional[torch.device] = None, | |
| ) -> Dict[str, Any]: | |
| """ | |
| Load a checkpoint with backward-compatible format handling. | |
| Auto-detects checkpoint format (canonical or legacy) and normalizes | |
| to canonical format in-memory. Legacy checkpoints are migrated without | |
| modifying the file. | |
| Args: | |
| checkpoint_path: Path to checkpoint | |
| device: Device to load to | |
| Returns: | |
| Checkpoint dict in canonical format with 'model_state' key | |
| """ | |
| if device is None: | |
| device = torch.device("cpu") | |
| checkpoint = torch.load(checkpoint_path, map_location=device) | |
| # Auto-detect and migrate legacy format to canonical format | |
| checkpoint = self._normalize_checkpoint_format(checkpoint) | |
| return checkpoint | |
| def _normalize_checkpoint_format(self, checkpoint: Dict[str, Any]) -> Dict[str, Any]: | |
| """ | |
| Normalize checkpoint to canonical format. | |
| Detects if checkpoint is in legacy format (from BaseTrainer with 'model_state_dict') | |
| and migrates it to canonical format (with 'model_state'). | |
| Args: | |
| checkpoint: Raw checkpoint dict | |
| Returns: | |
| Normalized checkpoint dict with canonical keys | |
| """ | |
| # Check if this is a legacy checkpoint (has 'model_state_dict' but not 'model_state') | |
| if "model_state_dict" in checkpoint and "model_state" not in checkpoint: | |
| # Migrate legacy format to canonical | |
| migrated = { | |
| "step": checkpoint.get("global_step", 0), | |
| "model_state": checkpoint["model_state_dict"], | |
| "optimizer_state": checkpoint.get("optimizer_state_dict"), | |
| "config": checkpoint.get("config"), | |
| "metrics": {}, | |
| # Keep legacy keys for backward compatibility in code that uses them | |
| "global_step": checkpoint.get("global_step", 0), | |
| "current_epoch": checkpoint.get("current_epoch", 0), | |
| "best_loss": checkpoint.get("best_loss", float('inf')), | |
| } | |
| print(f"\n✓ [CheckpointManager] Detected legacy checkpoint format. Auto-migrated to canonical format.") | |
| return migrated | |
| # Already in canonical format or unknown format | |
| if "model_state" not in checkpoint: | |
| # If neither format detected, ensure model_state is accessible | |
| # (might be a raw state_dict) | |
| print(f"\n⚠ [CheckpointManager] Checkpoint format unclear. Assuming raw state_dict format.") | |
| checkpoint["model_state"] = checkpoint | |
| return checkpoint | |
| def get_latest(self) -> Optional[Path]: | |
| """Get path to latest checkpoint.""" | |
| if not self.saved_checkpoints: | |
| return None | |
| return self.saved_checkpoints[-1][1] | |
| def get_best(self) -> Optional[Path]: | |
| """Get path to best checkpoint.""" | |
| best_path = self.checkpoint_dir / "best_model.pt" | |
| if best_path.exists(): | |
| return best_path | |
| return None | |
| def list_checkpoints(self) -> list[Path]: | |
| """List all saved checkpoints.""" | |
| return sorted(self.checkpoint_dir.glob("checkpoint_step_*.pt")) | |