Add HuggingFace Transformers compatibility with AutoModel and Pipeline support

- Add modeling_namer.py with NamerModel (PreTrainedModel + GenerationMixin)
- Add NamerPipeline for easy inference: pipe.generate(42) -> 'forty two'
- Add config.json and generation_config.json for HF integration
- Add convert_checkpoint.py for converting old checkpoints
- Update README with HF usage examples
- Update namer/__init__.py to export new HF-compatible classes

Files changed (8) hide show

.gitignore +1 -0
README.md +62 -36
config.json +17 -0
convert_checkpoint.py +29 -0
generation_config.json +8 -0
modeling_namer.py +342 -0
namer/__init__.py +24 -1
namer/modeling_namer.py +342 -0

.gitignore CHANGED Viewed

@@ -39,3 +39,4 @@ Thumbs.db
 # Project specific
 namer_model.pt
 .pip-tmp/

 # Project specific
 namer_model.pt
 .pip-tmp/
+pip-tmp/

README.md CHANGED Viewed

@@ -33,63 +33,62 @@ Namer is a sequence-to-sequence transformer trained to read digits of a number a
 ## Usage
-### Quick Start
 ```python
-import torch
-from namer import load_namer_model, predict_number_name
-# Load model
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model = load_namer_model("namer_model.pt", device)
-# Convert number to name
-name = predict_number_name(model, 42)
-print(f"42 -> '{name}'")  # Output: forty two
-```
-### Interactive Mode
-```bash
-python -m namer infer
 ```
-Then enter numbers to convert interactively.
-### API
 ```python
-from namer.inference import predict_number_name
-# Single prediction
-name = predict_number_name(model, 123456)
-# Returns: "one hundred twenty three thousand four hundred fifty six"
 ```
-## Model Architecture
-- **Type**: Sequence-to-sequence transformer
-- **Input**: Digits of the integer (as token indices)
-- **Output**: English words representing the number
-- **Vocabulary**: English number words (zero-nineteen, twenty-ninety, hundred, thousand, million, billion, etc.)
-## Files
-| File | Description |
-|------|-------------|
-| `namer_model.pt` | Trained model weights |
-| `namer/models.py` | Transformer architecture |
-| `namer/inference.py` | Prediction utilities |
-| `namer/utils.py` | Encoding/decoding utilities |
-## Training
-To train from scratch:
 ```bash
-python -m namer train
 ```
 ## Installation
 Choose either repository — both have identical code:
@@ -113,6 +112,33 @@ pip install -e .
 pip install git+https://github.com/edwinhere/namer.git
 ```
 ## Citation
 If you use this model, please cite:

 ## Usage
+### 🚀 HuggingFace Transformers (Recommended)
+Load and use the model with HuggingFace's `AutoModel` API:
 ```python
+from transformers import AutoModel
+from namer import NamerPipeline
+# Load model from HuggingFace
+model = AutoModel.from_pretrained(
+    "edwinhere/namer",
+    trust_remote_code=True
+)
+# Create pipeline
+pipe = NamerPipeline(model)
+# Generate number names
+result = pipe.generate(42)           # "forty two"
+result = pipe.generate(1234567)      # "one million two hundred thirty four thousand five hundred sixty seven"
+# Or use callable interface (HF compatible)
+result = pipe(42)  # {"generated_text": "forty two"}
 ```
+Alternatively, use the convenience function:
 ```python
+from namer import load_namer_pipeline
+pipe = load_namer_pipeline("edwinhere/namer")
+print(pipe.generate(42))  # "forty two"
 ```
+### 🔄 Original API (Local)
+```python
+import torch
+from namer import load_namer_model, predict_number_name
+# Load model
+model = load_namer_model("namer_model.pt")
+# Convert number to name
+name = predict_number_name(model, 42)
+print(f"42 -> '{name}'")
+```
+### 💻 Interactive Mode
 ```bash
+python -m namer infer
 ```
+Then enter numbers to convert interactively.
 ## Installation
 Choose either repository — both have identical code:
 pip install git+https://github.com/edwinhere/namer.git
 ```
+## Model Architecture
+- **Type**: Sequence-to-sequence transformer
+- **Input**: Digits of the integer (as token indices)
+- **Output**: English words representing the number
+- **Vocabulary**: English number words (zero-nineteen, twenty-ninety, hundred, thousand, million, billion, etc.)
+- **Max Output Length**: 20 tokens
+## Files
+| File | Description |
+|------|-------------|
+| `pytorch_model.bin` | HuggingFace model weights |
+| `config.json` | Model configuration |
+| `generation_config.json` | Generation parameters |
+| `modeling_namer.py` | HF-compatible model implementation |
+| `namer_model.pt` | Original PyTorch checkpoint |
+| `namer/` | Source code package |
+## Training
+To train from scratch:
+```bash
+python -m namer train
+```
 ## Citation
 If you use this model, please cite:

config.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  "architectures": [
+    "NamerModel"
+  ],
+  "d_model": 128,
+  "dim_feedforward": 512,
+  "dropout": 0.0,
+  "dtype": "float32",
+  "eos_token_id": 40,
+  "max_output_len": 20,
+  "model_type": "custom",
+  "nhead": 4,
+  "num_encoder_layers": 4,
+  "pad_token_id": 10,
+  "transformers_version": "5.8.0",
+  "vocab_size": 41
+}

convert_checkpoint.py ADDED Viewed

	@@ -0,0 +1,29 @@

+"""Convert old checkpoint format to HuggingFace format."""
+import torch
+from modeling_namer import NamerModel, NamerConfig
+# Load old checkpoint
+checkpoint = torch.load("namer_model.pt", map_location="cpu")
+# Create config from checkpoint
+config = NamerConfig(
+    vocab_size=checkpoint["vocab_size"],
+    max_output_len=checkpoint["max_output_len"],
+    d_model=checkpoint.get("d_model", 128),
+    nhead=4,
+    num_encoder_layers=4,
+    dim_feedforward=512,
+    dropout=0.0,
+)
+# Create new model
+model = NamerModel(config)
+# Load old weights into new model
+model.load_state_dict(checkpoint["model_state_dict"], strict=False)
+# Save in HF format
+model.save_pretrained(".")
+print("Model converted and saved to current directory")
+print("Files saved: pytorch_model.bin, config.json")

generation_config.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "_from_model_config": true,
+  "eos_token_id": 40,
+  "output_attentions": false,
+  "output_hidden_states": false,
+  "pad_token_id": 10,
+  "transformers_version": "5.8.0"
+}

modeling_namer.py ADDED Viewed

	@@ -0,0 +1,342 @@

+"""HuggingFace compatible Namer model."""
+from __future__ import annotations
+import math
+from typing import Optional, Union
+import torch
+import torch.nn as nn
+from transformers import PreTrainedModel, PretrainedConfig
+from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions
+from transformers.generation import GenerationMixin
+class NamerConfig(PretrainedConfig):
+    """Configuration class for NamerModel."""
+    model_type = "custom"
+    def __init__(
+        self,
+        vocab_size: int = 41,
+        max_output_len: int = 20,
+        d_model: int = 128,
+        nhead: int = 4,
+        num_encoder_layers: int = 4,
+        dim_feedforward: int = 512,
+        dropout: float = 0.1,
+        pad_token_id: int = 10,
+        eos_token_id: int = 40,  # <EOS> token index
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.max_output_len = max_output_len
+        self.d_model = d_model
+        self.nhead = nhead
+        self.num_encoder_layers = num_encoder_layers
+        self.dim_feedforward = dim_feedforward
+        self.dropout = dropout
+        super().__init__(
+            pad_token_id=pad_token_id,
+            eos_token_id=eos_token_id,
+            **kwargs,
+        )
+class PositionalEncoding(nn.Module):
+    """Sinusoidal positional encoding for transformer."""
+    def __init__(self, d_model: int, max_len: int = 5000) -> None:
+        super().__init__()
+        pe = torch.zeros(max_len, d_model)
+        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
+        div_term = torch.exp(
+            torch.arange(0, d_model, 2).float()
+            * (-math.log(10000.0) / d_model)
+        )
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        self.register_buffer("pe", pe)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Add positional encoding to input."""
+        return x + self.pe[: x.size(1)]
+class NamerModel(PreTrainedModel, GenerationMixin):
+    """HuggingFace compatible Namer transformer model.
+    Converts integer digit sequences to English number names.
+    """
+    config_class = NamerConfig
+    base_model_prefix = "namer"
+    def __init__(self, config: NamerConfig):
+        super().__init__(config)
+        self.vocab_size = config.vocab_size
+        self.max_output_len = config.max_output_len
+        self.d_model = config.d_model
+        # Digit embedding (10 digits + 1 padding token = 11)
+        self.digit_embedding = nn.Embedding(11, config.d_model, padding_idx=config.pad_token_id)
+        # Positional encoding
+        self.pos_encoder = PositionalEncoding(config.d_model, max_len=100)
+        # Transformer encoder
+        encoder_layer = nn.TransformerEncoderLayer(
+            d_model=config.d_model,
+            nhead=config.nhead,
+            dim_feedforward=config.dim_feedforward,
+            dropout=config.dropout,
+            batch_first=True,
+        )
+        self.transformer_encoder = nn.TransformerEncoder(
+            encoder_layer, num_layers=config.num_encoder_layers
+        )
+        # Output projection
+        self.output_projection = nn.Linear(config.d_model, config.vocab_size)
+        # Learned queries for each output position
+        self.output_queries = nn.Parameter(torch.randn(config.max_output_len, config.d_model))
+        # Cross-attention from output positions to encoded input
+        self.cross_attention = nn.MultiheadAttention(
+            config.d_model, config.nhead, dropout=config.dropout, batch_first=True
+        )
+        # Final output layers
+        self.output_norm = nn.LayerNorm(config.d_model)
+        self.post_init()
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        labels: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> CausalLMOutputWithCrossAttentions:
+        """Forward pass for HF compatibility.
+        Args:
+            input_ids: (batch_size, seq_len) tensor of digit indices (0-9), padding=10
+            attention_mask: Optional mask for padding
+            labels: Optional target labels for training
+        Returns:
+            CausalLMOutputWithCrossAttentions with logits
+        """
+        if input_ids is None:
+            raise ValueError("input_ids must be provided")
+        batch_size, seq_len = input_ids.shape
+        # Handle padding: convert -1 padding to 10 (our padding index)
+        digits = input_ids.clone()
+        digits[digits == -1] = self.config.pad_token_id
+        # Create padding mask for transformer (True = padding)
+        if attention_mask is None:
+            src_key_padding_mask = digits == self.config.pad_token_id
+        else:
+            src_key_padding_mask = ~attention_mask.bool()
+        # Embed digits: (batch, seq_len, d_model)
+        embedded = self.digit_embedding(digits)
+        # Add positional encoding
+        embedded = self.pos_encoder(embedded)
+        # Transformer encoder: (batch, seq_len, d_model)
+        memory = self.transformer_encoder(
+            embedded, src_key_padding_mask=src_key_padding_mask
+        )
+        # Expand queries for batch: (batch, max_output_len, d_model)
+        queries = self.output_queries.unsqueeze(0).expand(batch_size, -1, -1)
+        # Cross-attention from queries to encoded input
+        attn_output, _ = self.cross_attention(
+            queries, memory, memory, key_padding_mask=src_key_padding_mask
+        )
+        # Normalize and project to vocab
+        output = self.output_norm(attn_output)
+        logits = self.output_projection(output)
+        loss = None
+        if labels is not None:
+            loss_fct = nn.CrossEntropyLoss(ignore_index=-100)
+            loss = loss_fct(logits.view(-1, self.vocab_size), labels.view(-1))
+        return CausalLMOutputWithCrossAttentions(
+            loss=loss,
+            logits=logits,
+            hidden_states=None,
+            attentions=None,
+            cross_attentions=None,
+        )
+    def prepare_inputs_for_generation(self, input_ids, **kwargs):
+        """Prepare inputs for text generation."""
+        return {"input_ids": input_ids}
+    def _reorder_cache(self, past_key_values, beam_idx):
+        """Reorder cache for beam search."""
+        return past_key_values
+class NamerPipeline:
+    """Simple pipeline for Namer model inference.
+    Usage:
+        from transformers import AutoModel
+        # Load model
+        model = AutoModel.from_pretrained(
+            "edwinhere/namer",
+            trust_remote_code=True
+        )
+        # Create pipeline
+        pipe = NamerPipeline(model)
+        # Generate
+        result = pipe.generate(42)  # "forty two"
+        result = pipe(42)  # {"generated_text": "forty two"}
+    """
+    def __init__(self, model: NamerModel, tokenizer=None, device: str = None):
+        if device is None:
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model = model.to(device)
+        self.model.eval()
+        self.device = device
+        self.tokenizer = tokenizer  # Placeholder if we add a tokenizer later
+        # Vocabulary mapping (index -> word)
+        # Must match utils.py vocabulary exactly
+        self.id2word = {
+            0: "zero", 1: "one", 2: "two", 3: "three", 4: "four",
+            5: "five", 6: "six", 7: "seven", 8: "eight", 9: "nine",
+            10: "ten", 11: "eleven", 12: "twelve", 13: "thirteen", 14: "fourteen",
+            15: "fifteen", 16: "sixteen", 17: "seventeen", 18: "eighteen", 19: "nineteen",
+            20: "twenty", 21: "thirty", 22: "forty", 23: "fifty",
+            24: "sixty", 25: "seventy", 26: "eighty", 27: "ninety",
+            28: "hundred",
+            29: "thousand", 30: "million", 31: "billion", 32: "trillion",
+            33: "quadrillion", 34: "quintillion", 35: "sextillion",
+            36: "septillion", 37: "octillion", 38: "nonillion", 39: "decillion",
+            40: "<EOS>"
+        }
+        # Reverse mapping
+        self.word2id = {v: k for k, v in self.id2word.items()}
+    def _int_to_digits(self, n: int) -> list[int]:
+        """Convert integer to list of digit indices."""
+        if n == 0:
+            return [0]
+        digits = []
+        while n > 0:
+            digits.append(n % 10)
+            n //= 10
+        return digits[::-1]  # Reverse to get most significant digit first
+    def _decode(self, token_ids: list[int]) -> str:
+        """Decode token IDs to text, stopping at first EOS."""
+        words = []
+        eos_idx = self.model.config.eos_token_id  # Should be 40
+        for idx in token_ids:
+            if idx == eos_idx:  # Stop at EOS
+                break
+            if idx in self.id2word:
+                word = self.id2word[idx]
+                if word != "<EOS>":  # Skip EOS token itself
+                    words.append(word)
+        return " ".join(words) if words else "zero"
+    def generate(self, text: Union[str, int], **kwargs) -> str:
+        """Generate English name for a number.
+        Args:
+            text: Integer or string representation of integer
+        Returns:
+            English name of the number
+        """
+        # Parse input
+        if isinstance(text, str):
+            n = int(text.strip())
+        else:
+            n = int(text)
+        # Convert to digits
+        digits = self._int_to_digits(n)
+        # Pad to max length (20)
+        while len(digits) < 20:
+            digits.append(10)  # padding token
+        # Create tensor
+        input_ids = torch.tensor([digits], dtype=torch.long).to(self.device)
+        # Forward pass
+        with torch.no_grad():
+            outputs = self.model(input_ids)
+            logits = outputs.logits
+            predictions = logits.argmax(dim=-1)[0].cpu().tolist()
+        # Decode
+        return self._decode(predictions)
+    def __call__(self, text: Union[str, int], **kwargs) -> dict:
+        """Callable interface for pipeline.
+        Returns dict with 'generated_text' key for HF pipeline compatibility.
+        """
+        result = self.generate(text, **kwargs)
+        return {"generated_text": result}
+def load_namer_pipeline(model_name_or_path: str = "edwinhere/namer", device: str = None, **kwargs):
+    """Load a Namer pipeline with model.
+    This is a convenience function that loads both the model and creates
+    a pipeline for easy inference.
+    Args:
+        model_name_or_path: HuggingFace model ID or local path
+        device: Device to run on ('cuda', 'cpu', or None for auto)
+        **kwargs: Additional args passed to from_pretrained
+    Returns:
+        NamerPipeline instance ready for inference
+    Example:
+        >>> pipe = load_namer_pipeline("edwinhere/namer")
+        >>> pipe.generate(42)
+        'forty two'
+        >>> pipe(123)
+        {'generated_text': 'one hundred twenty three'}
+    """
+    from transformers import AutoModel
+    model = AutoModel.from_pretrained(
+        model_name_or_path,
+        trust_remote_code=True,
+        **kwargs
+    )
+    return NamerPipeline(model, device=device)

namer/__init__.py CHANGED Viewed

@@ -1,7 +1,8 @@
 """Namer - A PyTorch transformer model for converting numbers to English names."""
-__version__ = "0.2.0"
 from namer.models import NamerTransformer, load_namer_model
 from namer.inference import predict_number_name
 from namer.utils import (
@@ -15,7 +16,20 @@ from namer.utils import (
     read_double,
 )
 __all__ = [
     "NamerTransformer",
     "load_namer_model",
     "predict_number_name",
@@ -28,3 +42,12 @@ __all__ = [
     "read_triplet",
     "read_double",
 ]

 """Namer - A PyTorch transformer model for converting numbers to English names."""
+__version__ = "0.3.0"
+# Original API
 from namer.models import NamerTransformer, load_namer_model
 from namer.inference import predict_number_name
 from namer.utils import (
     read_double,
 )
+# HuggingFace compatible API
+try:
+    from .modeling_namer import (
+        NamerModel,
+        NamerConfig,
+        NamerPipeline,
+        load_namer_pipeline,
+    )
+    HF_AVAILABLE = True
+except ImportError:
+    HF_AVAILABLE = False
 __all__ = [
+    # Original API
     "NamerTransformer",
     "load_namer_model",
     "predict_number_name",
     "read_triplet",
     "read_double",
 ]
+if HF_AVAILABLE:
+    __all__.extend([
+        # HuggingFace API
+        "NamerModel",
+        "NamerConfig",
+        "NamerPipeline",
+        "load_namer_pipeline",
+    ])

namer/modeling_namer.py ADDED Viewed

	@@ -0,0 +1,342 @@

+"""HuggingFace compatible Namer model."""
+from __future__ import annotations
+import math
+from typing import Optional, Union
+import torch
+import torch.nn as nn
+from transformers import PreTrainedModel, PretrainedConfig
+from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions
+from transformers.generation import GenerationMixin
+class NamerConfig(PretrainedConfig):
+    """Configuration class for NamerModel."""
+    model_type = "custom"
+    def __init__(
+        self,
+        vocab_size: int = 41,
+        max_output_len: int = 20,
+        d_model: int = 128,
+        nhead: int = 4,
+        num_encoder_layers: int = 4,
+        dim_feedforward: int = 512,
+        dropout: float = 0.1,
+        pad_token_id: int = 10,
+        eos_token_id: int = 40,  # <EOS> token index
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.max_output_len = max_output_len
+        self.d_model = d_model
+        self.nhead = nhead
+        self.num_encoder_layers = num_encoder_layers
+        self.dim_feedforward = dim_feedforward
+        self.dropout = dropout
+        super().__init__(
+            pad_token_id=pad_token_id,
+            eos_token_id=eos_token_id,
+            **kwargs,
+        )
+class PositionalEncoding(nn.Module):
+    """Sinusoidal positional encoding for transformer."""
+    def __init__(self, d_model: int, max_len: int = 5000) -> None:
+        super().__init__()
+        pe = torch.zeros(max_len, d_model)
+        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
+        div_term = torch.exp(
+            torch.arange(0, d_model, 2).float()
+            * (-math.log(10000.0) / d_model)
+        )
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        self.register_buffer("pe", pe)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Add positional encoding to input."""
+        return x + self.pe[: x.size(1)]
+class NamerModel(PreTrainedModel, GenerationMixin):
+    """HuggingFace compatible Namer transformer model.
+    Converts integer digit sequences to English number names.
+    """
+    config_class = NamerConfig
+    base_model_prefix = "namer"
+    def __init__(self, config: NamerConfig):
+        super().__init__(config)
+        self.vocab_size = config.vocab_size
+        self.max_output_len = config.max_output_len
+        self.d_model = config.d_model
+        # Digit embedding (10 digits + 1 padding token = 11)
+        self.digit_embedding = nn.Embedding(11, config.d_model, padding_idx=config.pad_token_id)
+        # Positional encoding
+        self.pos_encoder = PositionalEncoding(config.d_model, max_len=100)
+        # Transformer encoder
+        encoder_layer = nn.TransformerEncoderLayer(
+            d_model=config.d_model,
+            nhead=config.nhead,
+            dim_feedforward=config.dim_feedforward,
+            dropout=config.dropout,
+            batch_first=True,
+        )
+        self.transformer_encoder = nn.TransformerEncoder(
+            encoder_layer, num_layers=config.num_encoder_layers
+        )
+        # Output projection
+        self.output_projection = nn.Linear(config.d_model, config.vocab_size)
+        # Learned queries for each output position
+        self.output_queries = nn.Parameter(torch.randn(config.max_output_len, config.d_model))
+        # Cross-attention from output positions to encoded input
+        self.cross_attention = nn.MultiheadAttention(
+            config.d_model, config.nhead, dropout=config.dropout, batch_first=True
+        )
+        # Final output layers
+        self.output_norm = nn.LayerNorm(config.d_model)
+        self.post_init()
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        labels: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> CausalLMOutputWithCrossAttentions:
+        """Forward pass for HF compatibility.
+        Args:
+            input_ids: (batch_size, seq_len) tensor of digit indices (0-9), padding=10
+            attention_mask: Optional mask for padding
+            labels: Optional target labels for training
+        Returns:
+            CausalLMOutputWithCrossAttentions with logits
+        """
+        if input_ids is None:
+            raise ValueError("input_ids must be provided")
+        batch_size, seq_len = input_ids.shape
+        # Handle padding: convert -1 padding to 10 (our padding index)
+        digits = input_ids.clone()
+        digits[digits == -1] = self.config.pad_token_id
+        # Create padding mask for transformer (True = padding)
+        if attention_mask is None:
+            src_key_padding_mask = digits == self.config.pad_token_id
+        else:
+            src_key_padding_mask = ~attention_mask.bool()
+        # Embed digits: (batch, seq_len, d_model)
+        embedded = self.digit_embedding(digits)
+        # Add positional encoding
+        embedded = self.pos_encoder(embedded)
+        # Transformer encoder: (batch, seq_len, d_model)
+        memory = self.transformer_encoder(
+            embedded, src_key_padding_mask=src_key_padding_mask
+        )
+        # Expand queries for batch: (batch, max_output_len, d_model)
+        queries = self.output_queries.unsqueeze(0).expand(batch_size, -1, -1)
+        # Cross-attention from queries to encoded input
+        attn_output, _ = self.cross_attention(
+            queries, memory, memory, key_padding_mask=src_key_padding_mask
+        )
+        # Normalize and project to vocab
+        output = self.output_norm(attn_output)
+        logits = self.output_projection(output)
+        loss = None
+        if labels is not None:
+            loss_fct = nn.CrossEntropyLoss(ignore_index=-100)
+            loss = loss_fct(logits.view(-1, self.vocab_size), labels.view(-1))
+        return CausalLMOutputWithCrossAttentions(
+            loss=loss,
+            logits=logits,
+            hidden_states=None,
+            attentions=None,
+            cross_attentions=None,
+        )
+    def prepare_inputs_for_generation(self, input_ids, **kwargs):
+        """Prepare inputs for text generation."""
+        return {"input_ids": input_ids}
+    def _reorder_cache(self, past_key_values, beam_idx):
+        """Reorder cache for beam search."""
+        return past_key_values
+class NamerPipeline:
+    """Simple pipeline for Namer model inference.
+    Usage:
+        from transformers import AutoModel
+        # Load model
+        model = AutoModel.from_pretrained(
+            "edwinhere/namer",
+            trust_remote_code=True
+        )
+        # Create pipeline
+        pipe = NamerPipeline(model)
+        # Generate
+        result = pipe.generate(42)  # "forty two"
+        result = pipe(42)  # {"generated_text": "forty two"}
+    """
+    def __init__(self, model: NamerModel, tokenizer=None, device: str = None):
+        if device is None:
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model = model.to(device)
+        self.model.eval()
+        self.device = device
+        self.tokenizer = tokenizer  # Placeholder if we add a tokenizer later
+        # Vocabulary mapping (index -> word)
+        # Must match utils.py vocabulary exactly
+        self.id2word = {
+            0: "zero", 1: "one", 2: "two", 3: "three", 4: "four",
+            5: "five", 6: "six", 7: "seven", 8: "eight", 9: "nine",
+            10: "ten", 11: "eleven", 12: "twelve", 13: "thirteen", 14: "fourteen",
+            15: "fifteen", 16: "sixteen", 17: "seventeen", 18: "eighteen", 19: "nineteen",
+            20: "twenty", 21: "thirty", 22: "forty", 23: "fifty",
+            24: "sixty", 25: "seventy", 26: "eighty", 27: "ninety",
+            28: "hundred",
+            29: "thousand", 30: "million", 31: "billion", 32: "trillion",
+            33: "quadrillion", 34: "quintillion", 35: "sextillion",
+            36: "septillion", 37: "octillion", 38: "nonillion", 39: "decillion",
+            40: "<EOS>"
+        }
+        # Reverse mapping
+        self.word2id = {v: k for k, v in self.id2word.items()}
+    def _int_to_digits(self, n: int) -> list[int]:
+        """Convert integer to list of digit indices."""
+        if n == 0:
+            return [0]
+        digits = []
+        while n > 0:
+            digits.append(n % 10)
+            n //= 10
+        return digits[::-1]  # Reverse to get most significant digit first
+    def _decode(self, token_ids: list[int]) -> str:
+        """Decode token IDs to text, stopping at first EOS."""
+        words = []
+        eos_idx = self.model.config.eos_token_id  # Should be 40
+        for idx in token_ids:
+            if idx == eos_idx:  # Stop at EOS
+                break
+            if idx in self.id2word:
+                word = self.id2word[idx]
+                if word != "<EOS>":  # Skip EOS token itself
+                    words.append(word)
+        return " ".join(words) if words else "zero"
+    def generate(self, text: Union[str, int], **kwargs) -> str:
+        """Generate English name for a number.
+        Args:
+            text: Integer or string representation of integer
+        Returns:
+            English name of the number
+        """
+        # Parse input
+        if isinstance(text, str):
+            n = int(text.strip())
+        else:
+            n = int(text)
+        # Convert to digits
+        digits = self._int_to_digits(n)
+        # Pad to max length (20)
+        while len(digits) < 20:
+            digits.append(10)  # padding token
+        # Create tensor
+        input_ids = torch.tensor([digits], dtype=torch.long).to(self.device)
+        # Forward pass
+        with torch.no_grad():
+            outputs = self.model(input_ids)
+            logits = outputs.logits
+            predictions = logits.argmax(dim=-1)[0].cpu().tolist()
+        # Decode
+        return self._decode(predictions)
+    def __call__(self, text: Union[str, int], **kwargs) -> dict:
+        """Callable interface for pipeline.
+        Returns dict with 'generated_text' key for HF pipeline compatibility.
+        """
+        result = self.generate(text, **kwargs)
+        return {"generated_text": result}
+def load_namer_pipeline(model_name_or_path: str = "edwinhere/namer", device: str = None, **kwargs):
+    """Load a Namer pipeline with model.
+    This is a convenience function that loads both the model and creates
+    a pipeline for easy inference.
+    Args:
+        model_name_or_path: HuggingFace model ID or local path
+        device: Device to run on ('cuda', 'cpu', or None for auto)
+        **kwargs: Additional args passed to from_pretrained
+    Returns:
+        NamerPipeline instance ready for inference
+    Example:
+        >>> pipe = load_namer_pipeline("edwinhere/namer")
+        >>> pipe.generate(42)
+        'forty two'
+        >>> pipe(123)
+        {'generated_text': 'one hundred twenty three'}
+    """
+    from transformers import AutoModel
+    model = AutoModel.from_pretrained(
+        model_name_or_path,
+        trust_remote_code=True,
+        **kwargs
+    )
+    return NamerPipeline(model, device=device)