Text Generation
Transformers
PyTorch
English
taonet_mini_t2
taonet
taotern
ssm
state-space-model
dplr
custom_code
experimental
Instructions to use TaoTern/TaoNet-mini-T2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use TaoTern/TaoNet-mini-T2 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="TaoTern/TaoNet-mini-T2", trust_remote_code=True)# Load model directly from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained("TaoTern/TaoNet-mini-T2", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use TaoTern/TaoNet-mini-T2 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "TaoTern/TaoNet-mini-T2" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "TaoTern/TaoNet-mini-T2", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/TaoTern/TaoNet-mini-T2
- SGLang
How to use TaoTern/TaoNet-mini-T2 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "TaoTern/TaoNet-mini-T2" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "TaoTern/TaoNet-mini-T2", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "TaoTern/TaoNet-mini-T2" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "TaoTern/TaoNet-mini-T2", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use TaoTern/TaoNet-mini-T2 with Docker Model Runner:
docker model run hf.co/TaoTern/TaoNet-mini-T2
| """Base classes for models, trainers, and datasets.""" | |
| from abc import ABC, abstractmethod | |
| from pathlib import Path | |
| from typing import Optional, Any, Iterator | |
| import torch | |
| import torch.nn as nn | |
| from torch.utils.data import Dataset as TorchDataset | |
| from taoTrain.config import TrainingConfig, ModelConfig | |
| # ============================================================================ | |
| # Base Model | |
| # ============================================================================ | |
| class BaseModel(nn.Module, ABC): | |
| """Abstract base class for language models.""" | |
| def __init__(self, config: ModelConfig): | |
| """Initialize model with config.""" | |
| super().__init__() | |
| self.config = config | |
| def forward( | |
| self, | |
| input_ids: torch.Tensor, | |
| attention_mask: Optional[torch.Tensor] = None, | |
| labels: Optional[torch.Tensor] = None, | |
| ) -> dict[str, torch.Tensor]: | |
| """ | |
| Forward pass. | |
| Args: | |
| input_ids: Shape (batch_size, seq_length) | |
| attention_mask: Shape (batch_size, seq_length), optional | |
| labels: Shape (batch_size, seq_length), optional (for loss computation) | |
| Returns: | |
| Dict with keys: | |
| - 'logits': Shape (batch_size, seq_length, vocab_size) | |
| - 'loss': Scalar (if labels provided) | |
| """ | |
| pass | |
| def count_parameters(self) -> int: | |
| """Count total trainable parameters.""" | |
| return sum(p.numel() for p in self.parameters() if p.requires_grad) | |
| def get_num_layers(self) -> int: | |
| """Get number of layers (for model architecture).""" | |
| return self.config.num_layers | |
| # ============================================================================ | |
| # Base Dataset | |
| # ============================================================================ | |
| class BaseDataset(TorchDataset, ABC): | |
| """Abstract base class for datasets.""" | |
| def __init__(self, config: "TrainingConfig"): | |
| """Initialize dataset.""" | |
| self.config = config | |
| self.data = None | |
| def __len__(self) -> int: | |
| """Return dataset size.""" | |
| pass | |
| def __getitem__(self, idx: int) -> dict[str, torch.Tensor]: | |
| """ | |
| Get a single sample. | |
| Returns: | |
| Dict with keys: | |
| - 'input_ids': 1D tensor of token IDs | |
| - 'attention_mask': 1D tensor of attention mask | |
| - 'labels': 1D tensor of labels (optional) | |
| """ | |
| pass | |
| def load_dataset(self) -> None: | |
| """Load dataset from HuggingFace or other source.""" | |
| pass | |
| def preprocess(self) -> None: | |
| """Preprocess dataset (tokenization, etc).""" | |
| pass | |
| # ============================================================================ | |
| # Base Trainer | |
| # ============================================================================ | |
| class BaseTrainer(ABC): | |
| """Abstract base class for trainers.""" | |
| def __init__( | |
| self, | |
| model: BaseModel, | |
| train_dataset: BaseDataset, | |
| val_dataset: Optional[BaseDataset], | |
| config: TrainingConfig, | |
| device: torch.device, | |
| ): | |
| """Initialize trainer.""" | |
| self.model = model.to(device) | |
| self.train_dataset = train_dataset | |
| self.val_dataset = val_dataset | |
| self.config = config | |
| self.device = device | |
| # Training state | |
| self.global_step = 0 | |
| self.current_epoch = 0 | |
| self.best_loss = float('inf') | |
| # Logging | |
| self.logger = None | |
| # Optimizer and scheduler (to be set up by subclass) | |
| self.optimizer = None | |
| self.scheduler = None | |
| def training_step(self, batch: dict[str, torch.Tensor]) -> dict[str, float]: | |
| """ | |
| Single training step. | |
| Args: | |
| batch: Training batch with input_ids, attention_mask, labels, etc. | |
| Returns: | |
| Dict with metrics (e.g., {'loss': 0.5, 'accuracy': 0.8}) | |
| """ | |
| pass | |
| def validation_step(self, batch: dict[str, torch.Tensor]) -> dict[str, float]: | |
| """ | |
| Single validation step. | |
| Args: | |
| batch: Validation batch | |
| Returns: | |
| Dict with validation metrics | |
| """ | |
| pass | |
| def train_epoch(self) -> dict[str, float]: | |
| """ | |
| Train for one epoch. | |
| Returns: | |
| Dict with epoch-level metrics | |
| """ | |
| pass | |
| def validate(self) -> dict[str, float]: | |
| """ | |
| Run validation on the entire validation set. | |
| Returns: | |
| Dict with validation metrics | |
| """ | |
| pass | |
| def save_checkpoint(self, path: str | Path) -> None: | |
| """ | |
| Save checkpoint in canonical format. | |
| Uses canonical checkpoint format: | |
| { | |
| 'step': int, | |
| 'model_state': state_dict, | |
| 'optimizer_state': state_dict, | |
| 'config': dict, | |
| 'metrics': dict, | |
| 'global_step': int, # Legacy compat | |
| 'current_epoch': int, # Legacy compat | |
| 'best_loss': float, # Legacy compat | |
| } | |
| Args: | |
| path: Path to save checkpoint | |
| """ | |
| path = Path(path) | |
| path.parent.mkdir(parents=True, exist_ok=True) | |
| # Save in canonical format | |
| checkpoint = { | |
| # Canonical format keys | |
| 'step': self.global_step, | |
| 'model_state': self.model.state_dict(), | |
| 'optimizer_state': self.optimizer.state_dict() if self.optimizer else None, | |
| 'config': self.config.to_dict(), | |
| 'metrics': {}, | |
| # Legacy format keys (for backward compatibility with code that reads them) | |
| 'global_step': self.global_step, | |
| 'current_epoch': self.current_epoch, | |
| 'best_loss': self.best_loss, | |
| } | |
| torch.save(checkpoint, path) | |
| def load_checkpoint(self, path: str | Path) -> None: | |
| """ | |
| Load checkpoint (handles both canonical and legacy formats). | |
| Args: | |
| path: Path to checkpoint | |
| """ | |
| path = Path(path) | |
| checkpoint = torch.load(path, map_location=self.device) | |
| # Try canonical keys first, fall back to legacy keys | |
| model_state_key = 'model_state' if 'model_state' in checkpoint else 'model_state_dict' | |
| optimizer_state_key = 'optimizer_state' if 'optimizer_state' in checkpoint else 'optimizer_state_dict' | |
| self.model.load_state_dict(checkpoint[model_state_key]) | |
| if self.optimizer and checkpoint.get(optimizer_state_key): | |
| self.optimizer.load_state_dict(checkpoint[optimizer_state_key]) | |
| # Try canonical 'step' first, fall back to legacy 'global_step' | |
| self.global_step = checkpoint.get('step', checkpoint.get('global_step', 0)) | |
| self.current_epoch = checkpoint.get('current_epoch', 0) | |
| self.best_loss = checkpoint.get('best_loss', float('inf')) | |
| def _get_lr(self) -> float: | |
| """Get current learning rate from optimizer.""" | |
| for param_group in self.optimizer.param_groups: | |
| return param_group['lr'] | |
| return 0.0 | |
| # ============================================================================ | |
| # Utility functions | |
| # ============================================================================ | |
| def create_model(config: TrainingConfig, device: torch.device) -> BaseModel: | |
| """Create model from config (calls registry).""" | |
| from taoTrain.models import get_model | |
| return get_model(config.model, device=device) | |
| def create_datasets( | |
| config: TrainingConfig, | |
| ) -> tuple[BaseDataset, Optional[BaseDataset]]: | |
| """Create train and validation datasets using factory pattern.""" | |
| # Import here to avoid circular imports | |
| from taoTrain.data import DatasetFactory | |
| # Create train dataset | |
| train_dataset = DatasetFactory.create_dataset(config, split="train") | |
| # Create validation dataset (only for HuggingFace datasets with explicit validation split) | |
| val_dataset = None | |
| if not config.dataset.local and hasattr(config.dataset, "validation_split"): | |
| val_dataset = DatasetFactory.create_dataset(config, split="validation") | |
| return train_dataset, val_dataset | |