Edwin Jose Palathinkal commited on 17 days ago

Commit

2730fd2

0 Parent(s):

Initial commit

Files changed (17) hide show

.gitignore +41 -0
Makefile +100 -0
README.md +158 -0
namer/__init__.py +30 -0
namer/__main__.py +8 -0
namer/data.py +137 -0
namer/inference.py +97 -0
namer/main.py +211 -0
namer/models.py +169 -0
namer/training.py +192 -0
namer/utils.py +267 -0
pyproject.toml +100 -0
tests/__init__.py +1 -0
tests/test_data.py +86 -0
tests/test_inference.py +71 -0
tests/test_models.py +91 -0
tests/test_utils.py +193 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,41 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+*.egg-info/
+dist/
+build/
+*.egg
+# Virtual environments
+.venv/
+venv/
+ENV/
+env/
+# PyTorch
+*.pt
+*.pth
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+.tox/
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS
+.DS_Store
+Thumbs.db
+# Project specific
+namer_model.pt
+.pip-tmp/

Makefile ADDED Viewed

	@@ -0,0 +1,100 @@

+.PHONY: help install dev train infer test lint format clean distclean
+# Python environment
+PYTHON := python3
+VENV := .venv
+VENV_PYTHON := $(VENV)/bin/python
+# Default target
+help:
+	@echo "Namer - Number to Name Transformer"
+	@echo ""
+	@echo "Available targets:"
+	@echo "  make install    - Install package in development mode"
+	@echo "  make dev        - Install with dev dependencies"
+	@echo "  make train      - Train the model"
+	@echo "  make infer      - Run interactive inference"
+	@echo "  make test       - Run test suite"
+	@echo "  make lint       - Run linting (ruff)"
+	@echo "  make format     - Format code (ruff)"
+	@echo "  make typecheck  - Run type checking (mypy)"
+	@echo "  make clean      - Remove generated files and caches"
+	@echo "  make distclean  - Deep clean including venv"
+	@echo ""
+# Create virtual environment and install
+$(VENV):
+	@echo "Creating virtual environment..."
+	$(PYTHON) -m venv $(VENV)
+	$(VENV_PYTHON) -m pip install --upgrade pip
+# Install package
+install: $(VENV)
+	@echo "Installing package..."
+	$(VENV_PYTHON) -m pip install -e .
+# Install with dev dependencies
+dev: $(VENV)
+	@echo "Installing with dev dependencies..."
+	$(VENV_PYTHON) -m pip install -e ".[dev]"
+# Run training
+train: $(VENV)
+	@echo "Starting training..."
+	$(VENV_PYTHON) -m namer train
+# Run interactive inference
+infer: $(VENV)
+	@echo "Starting inference..."
+	$(VENV_PYTHON) -m namer infer
+# Run tests
+test: $(VENV)
+	@echo "Running tests..."
+	$(VENV_PYTHON) -m pytest -v
+# Run tests with coverage
+test-cov: $(VENV)
+	@echo "Running tests with coverage..."
+	$(VENV_PYTHON) -m pytest --cov=namer --cov-report=html --cov-report=term
+# Run linting
+lint: $(VENV)
+	@echo "Running ruff linter..."
+	$(VENV_PYTHON) -m ruff check namer tests
+# Fix linting issues
+lint-fix: $(VENV)
+	@echo "Fixing linting issues..."
+	$(VENV_PYTHON) -m ruff check --fix namer tests
+# Format code
+format: $(VENV)
+	@echo "Formatting code..."
+	$(VENV_PYTHON) -m ruff format namer tests
+# Run type checking
+typecheck: $(VENV)
+	@echo "Running mypy..."
+	$(VENV_PYTHON) -m mypy namer
+# Run all checks
+check: lint typecheck test
+	@echo "All checks passed!"
+# Clean generated files
+clean:
+	@echo "Cleaning generated files..."
+	rm -f namer_model.pt
+	rm -rf htmlcov .pytest_cache .coverage
+	find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
+	find . -type f -name "*.pyc" -delete
+	find . -type f -name "*.pyo" -delete
+	find . -type d -name "*.egg-info" -exec rm -rf {} + 2>/dev/null || true
+	@echo "Clean complete!"
+# Deep clean
+distclean: clean
+	@echo "Removing virtual environment..."
+	rm -rf $(VENV)
+	@echo "All clean! Run 'make dev' to start fresh."

README.md ADDED Viewed

	@@ -0,0 +1,158 @@

+# Namer
+A PyTorch transformer model that converts numbers to their English names.
+## Features
+- **Transformer architecture** with cross-attention mechanism
+- **Infinite dataset** training with early stopping
+- **Modular design** following Python best practices
+- **Type hints** throughout for better IDE support
+- **Comprehensive test suite** with pytest
+- **Modern tooling**: ruff (linting/formatting), mypy (type checking)
+## Installation
+```bash
+# Clone the repository
+git clone https://github.com/example/namer.git
+cd namer
+# Create virtual environment
+python -m venv .venv
+source .venv/bin/activate  # On Windows: .venv\Scripts\activate
+# Install in development mode
+pip install -e ".[dev]"
+```
+## Usage
+### Command Line Interface
+```bash
+# Show help
+namer --help
+# Run demonstrations
+namer demo
+# Train the model
+namer train
+# Train with custom settings
+namer train --epochs 50 --steps 2000 --batch-size 64 --lr 0.0005
+# Run interactive inference
+namer infer
+# Run quick test
+namer test
+```
+### Python API
+```python
+from namer import NamerTransformer, load_namer_model, predict_number_name
+# Load a trained model
+model = load_namer_model("namer_model.pt")
+# Predict number names
+name = predict_number_name(model, 123456)
+print(name)  # "one hundred twenty three thousand four hundred fifty six"
+```
+## Project Structure
+```
+namer/
+├── namer/                  # Main package
+│   ├── __init__.py        # Package exports
+│   ├── main.py            # CLI entry point
+│   ├── models.py          # Transformer model definitions
+│   ├── data.py            # Dataset classes
+│   ├── training.py        # Training loop
+│   ├── inference.py       # Inference utilities
+│   └── utils.py           # Number-to-name conversion utilities
+├── tests/                 # Test suite
+│   ├── test_utils.py
+│   ├── test_models.py
+│   ├── test_data.py
+│   └── test_inference.py
+├── pyproject.toml         # Project configuration
+├── README.md
+└── Makefile              # Convenience commands
+```
+## Development
+### Running Tests
+```bash
+# Run all tests
+pytest
+# Run with coverage
+pytest --cov=namer --cov-report=html
+# Run specific test file
+pytest tests/test_utils.py
+```
+### Linting and Formatting
+```bash
+# Check code style
+ruff check .
+# Fix auto-fixable issues
+ruff check --fix .
+# Format code
+ruff format .
+# Type checking
+mypy namer
+```
+### Makefile Commands
+```bash
+make help       # Show available commands
+make install    # Install dependencies
+make train      # Train the model
+make inference  # Run interactive inference
+make test       # Run tests
+make clean      # Clean generated files
+make distclean  # Deep clean including venv
+```
+## Model Architecture
+The `NamerTransformer` uses an encoder-only architecture:
+1. **Digit Embedding** - Embeds digits 0-9 (plus padding token)
+2. **Positional Encoding** - Sinusoidal positional embeddings
+3. **Transformer Encoder** - Multi-layer encoder with self-attention
+4. **Cross-Attention** - Learned output queries attend to encoded digits
+5. **Output Projection** - Projects to vocabulary for each output position
+## Training
+The model trains on an infinite dataset that generates random number-to-name mappings on-the-fly:
+- Numbers up to 999,999 (configurable)
+- Early stopping with patience (default: 10 epochs)
+- Cross-entropy loss with -1 padding ignored
+- Adam optimizer with configurable learning rate
+## Requirements
+- Python 3.10+
+- PyTorch 2.0+
+- CUDA-capable GPU (optional, falls back to CPU)
+## License
+MIT License - see LICENSE file for details.

namer/__init__.py ADDED Viewed

	@@ -0,0 +1,30 @@

+"""Namer - A PyTorch transformer model for converting numbers to English names."""
+__version__ = "0.2.0"
+from namer.models import NamerTransformer, load_namer_model
+from namer.inference import predict_number_name
+from namer.utils import (
+    VOCABULARY,
+    encode,
+    decode,
+    int_to_digits,
+    digits_to_int,
+    read_digits,
+    read_triplet,
+    read_double,
+)
+__all__ = [
+    "NamerTransformer",
+    "load_namer_model",
+    "predict_number_name",
+    "VOCABULARY",
+    "encode",
+    "decode",
+    "int_to_digits",
+    "digits_to_int",
+    "read_digits",
+    "read_triplet",
+    "read_double",
+]

namer/__main__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+"""Make namer package executable."""
+import sys
+from namer.main import main
+if __name__ == "__main__":
+    sys.exit(main())

namer/data.py ADDED Viewed

	@@ -0,0 +1,137 @@

+"""Dataset classes for Namer."""
+from __future__ import annotations
+import random
+import torch
+from torch.utils.data import IterableDataset, TensorDataset
+from namer.utils import EOS_IDX, encode, int_to_digits, read_digits
+class NamerDataset(TensorDataset):
+    """Finite dataset mapping random integers to encoded number names."""
+    def __init__(
+        self,
+        num_samples: int = 1000,
+        max_int: int = 999999,
+        max_seq_len: int = 20,
+        seed: int = 42,
+    ) -> None:
+        """Create a PyTorch TensorDataset mapping random integers to encoded number names.
+        Args:
+            num_samples: Number of samples to generate
+            max_int: Maximum random integer value
+            max_seq_len: Maximum sequence length for padding
+            seed: Random seed for reproducibility
+        """
+        rng = random.Random(seed)
+        digit_sequences: list[list[int]] = []
+        encoded_names: list[list[int]] = []
+        for _ in range(num_samples):
+            n = rng.randint(0, max_int)
+            digits = int_to_digits(n)
+            name = read_digits(digits)
+            encoded = encode(name)
+            digit_sequences.append(digits)
+            encoded_names.append(encoded)
+        # Pad sequences
+        padded_digits: list[list[int]] = []
+        padded_encoded: list[list[int]] = []
+        for digits, encoded in zip(digit_sequences, encoded_names):
+            # Pad digits with 10 to indicate padding
+            digits_padded = digits + [10] * (max_seq_len - len(digits))
+            digits_padded = digits_padded[:max_seq_len]
+            # Append EOS token to encoded, then pad with -1
+            encoded_with_eos = encoded + [EOS_IDX]
+            encoded_padded = encoded_with_eos + [-1] * (max_seq_len - len(encoded_with_eos))
+            encoded_padded = encoded_padded[:max_seq_len]
+            padded_digits.append(digits_padded)
+            padded_encoded.append(encoded_padded)
+        # Convert to tensors
+        digits_tensor = torch.tensor(padded_digits, dtype=torch.long)
+        encoded_tensor = torch.tensor(padded_encoded, dtype=torch.long)
+        super().__init__(digits_tensor, encoded_tensor)
+class InfiniteNamerDataset(IterableDataset):
+    """Infinite dataset that generates random number-to-name mappings on-the-fly.
+    Uses Python generators to produce an endless stream of training samples.
+    Each iteration yields fresh random samples.
+    """
+    def __init__(
+        self,
+        max_int: int = 999999,
+        max_seq_len: int = 20,
+        seed: int | None = None,
+    ) -> None:
+        """Initialize the infinite dataset.
+        Args:
+            max_int: Maximum random integer value
+            max_seq_len: Maximum sequence length for padding
+            seed: Random seed (optional, for reproducibility)
+        """
+        self.max_int = max_int
+        self.max_seq_len = max_seq_len
+        self.seed = seed
+        self.rng = random.Random(seed)
+    def _generate_sample(self) -> tuple[torch.Tensor, torch.Tensor]:
+        """Generate a single (digits, encoded_name) sample."""
+        n = self.rng.randint(0, self.max_int)
+        digits = int_to_digits(n)
+        name = read_digits(digits)
+        encoded = encode(name)
+        # Pad digits with 10 (padding index)
+        digits_padded = digits + [10] * (self.max_seq_len - len(digits))
+        digits_padded = digits_padded[: self.max_seq_len]
+        # Append EOS and pad with -1
+        encoded_with_eos = encoded + [EOS_IDX]
+        encoded_padded = encoded_with_eos + [-1] * (self.max_seq_len - len(encoded_with_eos))
+        encoded_padded = encoded_padded[: self.max_seq_len]
+        return (
+            torch.tensor(digits_padded, dtype=torch.long),
+            torch.tensor(encoded_padded, dtype=torch.long),
+        )
+    def __iter__(self) -> InfiniteNamerDataset:
+        """Yield samples infinitely.
+        Each worker in multi-worker DataLoader gets its own iterator
+        with a unique seed based on worker_id.
+        """
+        worker_info = torch.utils.data.get_worker_info()
+        if worker_info is None:
+            # Single-process loading
+            rng_seed = self.seed if self.seed else random.randint(0, 2**32)
+            self.rng = random.Random(rng_seed)
+        else:
+            # Multi-worker: each worker gets unique seed
+            worker_id = worker_info.id
+            base_seed = self.seed if self.seed else random.randint(0, 2**32)
+            self.rng = random.Random(base_seed + worker_id * 1000)
+        return self
+    def __next__(self) -> tuple[torch.Tensor, torch.Tensor]:
+        """Generate the next sample."""
+        return self._generate_sample()

namer/inference.py ADDED Viewed

	@@ -0,0 +1,97 @@

+"""Inference utilities for Namer models."""
+from __future__ import annotations
+import torch
+from namer.models import NamerTransformer, load_namer_model
+from namer.utils import EOS_IDX, decode, int_to_digits
+def predict_number_name(
+    model: NamerTransformer,
+    n: int,
+    device: str | torch.device | None = None,
+) -> str:
+    """Predict the English name of a number using the trained model.
+    Stops generation when <EOS> token is predicted.
+    Args:
+        model: Trained model
+        n: Integer to convert to name
+        device: Device to run inference on (auto-detected if None)
+    Returns:
+        Predicted English name of the number
+    """
+    if device is None:
+        device = next(model.parameters()).device
+    model.eval()
+    with torch.no_grad():
+        digits = int_to_digits(n)
+        padded = digits + [10] * (model.max_output_len - len(digits))
+        input_tensor = torch.tensor([padded], dtype=torch.long).to(device)
+        logits = model(input_tensor)
+        predictions = logits.argmax(dim=-1)[0].cpu().tolist()
+        # Collect tokens until EOS is predicted or max length reached
+        pred_indices: list[int] = []
+        for idx in predictions:
+            if idx == EOS_IDX:
+                break
+            pred_indices.append(idx)
+        # Try to decode
+        try:
+            return decode(pred_indices)
+        except ValueError:
+            # If decoding fails, try progressively shorter sequences
+            for length in range(len(pred_indices), 0, -1):
+                try:
+                    return decode(pred_indices[:length])
+                except ValueError:
+                    continue
+            return f"<decode error: {pred_indices}>"
+def interactive_inference(model_path: str = "namer_model.pt") -> None:
+    """Run interactive inference session.
+    Args:
+        model_path: Path to the saved model file
+    """
+    import sys
+    print("Loading model...")
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(f"Using device: {device}")
+    try:
+        model = load_namer_model(model_path, device)
+        print("Model loaded successfully!\n")
+    except FileNotFoundError:
+        print(f"Error: Model file '{model_path}' not found.")
+        print("Please run training first: python -m namer train")
+        sys.exit(1)
+    print("Enter a number to convert (or 'quit' to exit):")
+    while True:
+        try:
+            user_input = input("> ").strip()
+            if user_input.lower() in ("quit", "exit", "q"):
+                break
+            n = int(user_input)
+            name = predict_number_name(model, n, device)
+            print(f"  {n} -> '{name}'\n")
+        except ValueError:
+            print("  Please enter a valid integer\n")
+        except KeyboardInterrupt:
+            print("\nGoodbye!")
+            break

namer/main.py ADDED Viewed

	@@ -0,0 +1,211 @@

+"""Main entry point for namer CLI."""
+from __future__ import annotations
+import argparse
+import sys
+import torch
+from namer.data import InfiniteNamerDataset
+from namer.inference import interactive_inference, predict_number_name
+from namer.models import NamerTransformer, load_namer_model
+from namer.training import save_model, train_namer_model
+from namer.utils import VOCABULARY, encode, int_to_digits, read_digits
+def demo_command(args: argparse.Namespace) -> None:
+    """Run number name demonstration."""
+    print("--- Number Names Demo ---")
+    print("\nread_double (two digits):")
+    double_cases = [(0, 7), (1, 1), (2, 3), (3, 0), (0, 0), (5, 9)]
+    for a, b in double_cases:
+        from namer.utils import read_double
+        print(f"  read_double({a}, {b}) = '{read_double(a, b)}'")
+    print("\nread_triplet (three digits):")
+    triplet_cases = [(1, 0, 6), (0, 0, 0), (9, 1, 9), (2, 0, 0), (0, 5, 5), (4, 2, 0)]
+    for a, b, c in triplet_cases:
+        from namer.utils import read_triplet
+        print(f"  read_triplet({a}, {b}, {c}) = '{read_triplet(a, b, c)}'")
+    print(f"\nVOCABULARY ({len(VOCABULARY)} words):")
+    print(f"  {VOCABULARY}")
+    print("\nencode (text to vocabulary indices):")
+    encode_cases = [
+        "one million",
+        "twenty three",
+        "one hundred twenty three",
+        "nine hundred nineteen",
+        "zero",
+    ]
+    for text in encode_cases:
+        print(f"  encode('{text}') = {encode(text)}")
+    print("\nencode/decode round-trip:")
+    for text in ["one million", "twenty three", "zero"]:
+        encoded = encode(text)
+        from namer.utils import decode
+        decoded = decode(encoded)
+        print(f"  '{text}' -> {encoded} -> '{decoded}'")
+    print("\nint_to_digits (integer to digit list):")
+    int_cases = [0, 7, 123, -456, 1002003, 9876543210]
+    for n in int_cases:
+        print(f"  int_to_digits({n}) = {int_to_digits(n)}")
+def train_command(
+    num_epochs: int = 30,
+    steps_per_epoch: int = 1000,
+    batch_size: int = 128,
+    learning_rate: float = 0.001,
+) -> None:
+    """Train the Namer model.
+    Args:
+        num_epochs: Number of training epochs
+        steps_per_epoch: Number of steps per epoch
+        batch_size: Batch size for training
+        learning_rate: Learning rate for optimizer
+    """
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    if device.type == "cuda":
+        print(f"Using GPU: {torch.cuda.get_device_name(device)}")
+    else:
+        print("Warning: CUDA not available, using CPU")
+    # Create infinite dataset for training
+    infinite_dataset = InfiniteNamerDataset(
+        max_int=999999,
+        max_seq_len=20,
+        seed=42,
+    )
+    # Create model
+    model = NamerTransformer(
+        vocab_size=len(VOCABULARY),
+        max_output_len=20,
+        d_model=128,
+        nhead=4,
+        num_encoder_layers=4,
+        dim_feedforward=512,
+        dropout=0.1,
+    )
+    print(f"\nTransformer Model parameters: {sum(p.numel() for p in model.parameters()):,}")
+    # Train model
+    trained_model = train_namer_model(
+        model=model,
+        infinite_dataset=infinite_dataset,
+        num_epochs=num_epochs,
+        steps_per_epoch=steps_per_epoch,
+        val_steps=100,
+        batch_size=batch_size,
+        learning_rate=learning_rate,
+    )
+    # Save model
+    save_model(trained_model)
+    # Test predictions
+    print("\n--- Model Predictions ---")
+    trained_model.eval()
+    test_numbers = [123, 4567, 89012, 555555, 999999, 42, 0, 1000]
+    device_obj = next(trained_model.parameters()).device
+    with torch.no_grad():
+        for n in test_numbers:
+            pred = predict_number_name(trained_model, n, device_obj)
+            actual = read_digits(int_to_digits(n))
+            match = "✓" if pred == actual else "✗"
+            print(f"  {n}: pred='{pred}', actual='{actual}' {match}")
+def test_command() -> None:
+    """Run quick inference test on saved model."""
+    try:
+        model = load_namer_model("namer_model.pt")
+    except FileNotFoundError:
+        print("Error: Model file 'namer_model.pt' not found.")
+        print("Please train the model first: python -m namer train")
+        sys.exit(1)
+    print("Running inference on loaded model:")
+    test_nums = [42, 123, 1000, 999999]
+    for n in test_nums:
+        pred = predict_number_name(model, n)
+        actual = read_digits(int_to_digits(n))
+        match = "✓" if pred == actual else "✗"
+        print(f"  {n} -> '{pred}' (actual: '{actual}') {match}")
+def main(argv: list[str] | None = None) -> int:
+    """Main CLI entry point.
+    Args:
+        argv: Command line arguments (defaults to sys.argv)
+    Returns:
+        Exit code
+    """
+    parser = argparse.ArgumentParser(
+        prog="namer",
+        description="A PyTorch transformer model for converting numbers to their English names.",
+    )
+    subparsers = parser.add_subparsers(dest="command", help="Available commands")
+    # Demo command
+    demo_parser = subparsers.add_parser("demo", help="Run number name demonstrations")
+    demo_parser.set_defaults(func=demo_command)
+    # Train command
+    train_parser = subparsers.add_parser("train", help="Train the model")
+    train_parser.add_argument(
+        "--epochs", type=int, default=30, help="Number of training epochs (default: 30)"
+    )
+    train_parser.add_argument(
+        "--steps", type=int, default=1000, help="Steps per epoch (default: 1000)"
+    )
+    train_parser.add_argument(
+        "--batch-size", type=int, default=128, help="Batch size (default: 128)"
+    )
+    train_parser.add_argument(
+        "--lr", type=float, default=0.001, help="Learning rate (default: 0.001)"
+    )
+    train_parser.set_defaults(
+        func=lambda args: train_command(
+            num_epochs=args.epochs,
+            steps_per_epoch=args.steps,
+            batch_size=args.batch_size,
+            learning_rate=args.lr,
+        )
+    )
+    # Inference command
+    infer_parser = subparsers.add_parser("infer", help="Run interactive inference")
+    infer_parser.set_defaults(func=lambda args: interactive_inference())
+    # Test command
+    test_parser = subparsers.add_parser("test", help="Run quick inference test")
+    test_parser.set_defaults(func=lambda args: test_command())
+    args = parser.parse_args(argv)
+    if args.command is None:
+        parser.print_help()
+        return 0
+    args.func(args)
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())

namer/models.py ADDED Viewed

	@@ -0,0 +1,169 @@

+"""Model definitions for Namer."""
+from __future__ import annotations
+import torch
+import torch.nn as nn
+class PositionalEncoding(nn.Module):
+    """Sinusoidal positional encoding for transformer."""
+    def __init__(self, d_model: int, max_len: int = 5000) -> None:
+        super().__init__()
+        pe = torch.zeros(max_len, d_model)
+        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
+        div_term = torch.exp(
+            torch.arange(0, d_model, 2).float()
+            * (-torch.log(torch.tensor(10000.0)) / d_model)
+        )
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        self.register_buffer("pe", pe)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Add positional encoding to input.
+        Args:
+            x: (batch_size, seq_len, d_model)
+        Returns:
+            Tensor with positional encoding added
+        """
+        return x + self.pe[: x.size(1)]
+class NamerTransformer(nn.Module):
+    """Transformer model for mapping digit sequences to number name tokens.
+    Architecture:
+    - Embedding layer for digits (11 values: 0-9 + padding)
+    - Positional encoding
+    - Transformer encoder layers
+    - Output projection to vocabulary for each position
+    """
+    def __init__(
+        self,
+        vocab_size: int = 40,
+        max_output_len: int = 20,
+        d_model: int = 128,
+        nhead: int = 4,
+        num_encoder_layers: int = 4,
+        dim_feedforward: int = 512,
+        dropout: float = 0.1,
+    ) -> None:
+        super().__init__()
+        self.vocab_size = vocab_size
+        self.max_output_len = max_output_len
+        self.d_model = d_model
+        # Digit embedding (10 digits + 1 padding token = 11)
+        self.digit_embedding = nn.Embedding(11, d_model, padding_idx=10)
+        # Positional encoding
+        self.pos_encoder = PositionalEncoding(d_model, max_len=100)
+        # Transformer encoder
+        encoder_layer = nn.TransformerEncoderLayer(
+            d_model=d_model,
+            nhead=nhead,
+            dim_feedforward=dim_feedforward,
+            dropout=dropout,
+            batch_first=True,
+        )
+        self.transformer_encoder = nn.TransformerEncoder(
+            encoder_layer, num_layers=num_encoder_layers
+        )
+        # Output projection
+        self.output_projection = nn.Linear(d_model, vocab_size)
+        # Learned queries for each output position
+        self.output_queries = nn.Parameter(torch.randn(max_output_len, d_model))
+        # Cross-attention from output positions to encoded input
+        self.cross_attention = nn.MultiheadAttention(
+            d_model, nhead, dropout=dropout, batch_first=True
+        )
+        # Final output layers
+        self.output_norm = nn.LayerNorm(d_model)
+    def forward(self, digits: torch.Tensor) -> torch.Tensor:
+        """Forward pass.
+        Args:
+            digits: (batch_size, seq_len) tensor of digit indices (0-9), padding=10
+        Returns:
+            (batch_size, max_output_len, vocab_size) logits
+        """
+        batch_size, seq_len = digits.shape
+        # Handle padding: convert -1 padding to 10 (our padding index)
+        digits = digits.clone()
+        digits[digits == -1] = 10
+        # Create padding mask for transformer (True = padding)
+        src_key_padding_mask = digits == 10
+        # Embed digits: (batch, seq_len, d_model)
+        embedded = self.digit_embedding(digits)
+        # Add positional encoding
+        embedded = self.pos_encoder(embedded)
+        # Transformer encoder: (batch, seq_len, d_model)
+        memory = self.transformer_encoder(
+            embedded, src_key_padding_mask=src_key_padding_mask
+        )
+        # Expand queries for batch: (batch, max_output_len, d_model)
+        queries = self.output_queries.unsqueeze(0).expand(batch_size, -1, -1)
+        # Cross-attention from queries to encoded input
+        attn_output, _ = self.cross_attention(
+            queries, memory, memory, key_padding_mask=src_key_padding_mask
+        )
+        # Normalize and project to vocab
+        output = self.output_norm(attn_output)
+        logits = self.output_projection(output)
+        return logits
+def load_namer_model(
+    model_path: str = "namer_model.pt",
+    device: str | torch.device = "cuda" if torch.cuda.is_available() else "cpu",
+) -> NamerTransformer:
+    """Load a trained Namer model for inference.
+    Args:
+        model_path: Path to the saved model file
+        device: Device to load the model on
+    Returns:
+        Loaded model in eval mode
+    """
+    checkpoint = torch.load(model_path, map_location=device)
+    model = NamerTransformer(
+        vocab_size=checkpoint["vocab_size"],
+        max_output_len=checkpoint["max_output_len"],
+        d_model=checkpoint.get("d_model", 128),
+        nhead=4,
+        num_encoder_layers=4,
+        dim_feedforward=512,
+        dropout=0.0,  # No dropout for inference
+    )
+    model.load_state_dict(checkpoint["model_state_dict"])
+    model.to(device)
+    model.eval()
+    return model

namer/training.py ADDED Viewed

	@@ -0,0 +1,192 @@

+"""Training utilities for Namer models."""
+from __future__ import annotations
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, TensorDataset
+from namer.models import NamerTransformer
+from namer.data import InfiniteNamerDataset
+def train_namer_model(
+    model: NamerTransformer,
+    dataset: TensorDataset | None = None,
+    infinite_dataset: InfiniteNamerDataset | None = None,
+    num_epochs: int = 50,
+    steps_per_epoch: int = 1000,
+    val_steps: int = 100,
+    batch_size: int = 64,
+    learning_rate: float = 0.001,
+    patience: int = 10,
+    device: str | torch.device = "cuda" if torch.cuda.is_available() else "cpu",
+) -> NamerTransformer:
+    """Train the model on a finite dataset or infinite iterable dataset.
+    Args:
+        model: The model to train
+        dataset: Finite TensorDataset with (digits, encoded_names) pairs
+        infinite_dataset: Infinite IterableDataset for infinite training
+        num_epochs: Number of training epochs
+        steps_per_epoch: Number of steps per epoch (for infinite dataset)
+        val_steps: Number of validation steps per epoch
+        batch_size: Batch size for training
+        learning_rate: Learning rate for optimizer
+        patience: Early stopping patience
+        device: Device to train on ('cuda' or 'cpu')
+    Returns:
+        Trained model
+    """
+    model = model.to(device)
+    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
+    criterion = nn.CrossEntropyLoss(ignore_index=-1)
+    print(f"Training on {device}")
+    print(f"Early stopping patience: {patience} epochs")
+    # Setup data loaders
+    if infinite_dataset is not None:
+        print(f"Using INFINITE dataset (max_int={infinite_dataset.max_int})")
+        print(f"Steps per epoch: {steps_per_epoch}, Val steps: {val_steps}")
+        train_loader = DataLoader(
+            infinite_dataset,
+            batch_size=batch_size,
+            num_workers=0,
+        )
+        val_loader = DataLoader(
+            infinite_dataset,
+            batch_size=batch_size,
+            num_workers=0,
+        )
+    else:
+        if dataset is None:
+            raise ValueError("Either dataset or infinite_dataset must be provided")
+        train_size = int(0.9 * len(dataset))
+        val_size = len(dataset) - train_size
+        train_dataset, val_dataset = torch.utils.data.random_split(
+            dataset, [train_size, val_size], generator=torch.Generator().manual_seed(42)
+        )
+        train_loader = DataLoader(
+            train_dataset, batch_size=batch_size, shuffle=True
+        )
+        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
+        steps_per_epoch = len(train_loader)
+        val_steps = len(val_loader)
+        print(f"Train samples: {len(train_dataset)}, Val samples: {len(val_dataset)}")
+    best_val_loss = float("inf")
+    epochs_without_improvement = 0
+    best_model_state: dict | None = None
+    for epoch in range(num_epochs):
+        # Training
+        model.train()
+        train_loss = 0.0
+        train_correct = 0
+        train_total = 0
+        train_iter = iter(train_loader)
+        for _ in range(steps_per_epoch):
+            digits_batch, target_batch = next(train_iter)
+            digits_batch = digits_batch.to(device)
+            target_batch = target_batch.to(device)
+            optimizer.zero_grad()
+            logits = model(digits_batch)
+            loss = criterion(
+                logits.view(-1, model.vocab_size), target_batch.view(-1)
+            )
+            loss.backward()
+            optimizer.step()
+            train_loss += loss.item()
+            mask = target_batch != -1
+            predictions = logits.argmax(dim=-1)
+            train_correct += ((predictions == target_batch) & mask).sum().item()
+            train_total += mask.sum().item()
+        train_loss /= steps_per_epoch
+        train_acc = train_correct / train_total if train_total > 0 else 0
+        # Validation
+        model.eval()
+        val_loss = 0.0
+        val_correct = 0
+        val_total = 0
+        with torch.no_grad():
+            val_iter = iter(val_loader)
+            for _ in range(val_steps):
+                digits_batch, target_batch = next(val_iter)
+                digits_batch = digits_batch.to(device)
+                target_batch = target_batch.to(device)
+                logits = model(digits_batch)
+                loss = criterion(
+                    logits.view(-1, model.vocab_size), target_batch.view(-1)
+                )
+                val_loss += loss.item()
+                mask = target_batch != -1
+                predictions = logits.argmax(dim=-1)
+                val_correct += ((predictions == target_batch) & mask).sum().item()
+                val_total += mask.sum().item()
+        val_loss /= val_steps
+        val_acc = val_correct / val_total if val_total > 0 else 0
+        if val_loss < best_val_loss:
+            best_val_loss = val_loss
+            epochs_without_improvement = 0
+            best_model_state = model.state_dict().copy()
+        else:
+            epochs_without_improvement += 1
+        if (epoch + 1) % 10 == 0 or epoch == 0:
+            print(
+                f"Epoch {epoch+1}/{num_epochs}: "
+                f"train_loss={train_loss:.4f}, train_acc={train_acc:.4f}, "
+                f"val_loss={val_loss:.4f}, val_acc={val_acc:.4f}, "
+                f"patience={epochs_without_improvement}/{patience}"
+            )
+        if epochs_without_improvement >= patience:
+            print(f"\nEarly stopping triggered! No improvement for {patience} epochs.")
+            break
+    print(f"\nBest validation loss: {best_val_loss:.4f}")
+    if best_model_state is not None:
+        model.load_state_dict(best_model_state)
+        print("Restored best model from checkpoint.")
+    return model
+def save_model(model: NamerTransformer, model_path: str = "namer_model.pt") -> None:
+    """Save a trained model to disk.
+    Args:
+        model: The model to save
+        model_path: Path where to save the model
+    """
+    checkpoint = {
+        "model_type": "transformer",
+        "model_state_dict": model.state_dict(),
+        "vocab_size": model.vocab_size,
+        "max_output_len": model.max_output_len,
+        "d_model": model.d_model,
+    }
+    torch.save(checkpoint, model_path)
+    print(f"Model saved to {model_path}")

namer/utils.py ADDED Viewed

	@@ -0,0 +1,267 @@

+"""Utility functions for number-to-name conversion."""
+from __future__ import annotations
+# Global constants for number names
+ONES: tuple[str, ...] = (
+    "zero", "one", "two", "three", "four",
+    "five", "six", "seven", "eight", "nine"
+)
+TEENS: tuple[str, ...] = (
+    "ten", "eleven", "twelve", "thirteen", "fourteen",
+    "fifteen", "sixteen", "seventeen", "eighteen", "nineteen"
+)
+TENS: tuple[str, ...] = (
+    "", "", "twenty", "thirty", "forty",
+    "fifty", "sixty", "seventy", "eighty", "ninety"
+)
+# Scale words for powers of 1000
+SCALES: tuple[str, ...] = (
+    "", "thousand", "million", "billion", "trillion",
+    "quadrillion", "quintillion", "sextillion", "septillion",
+    "octillion", "nonillion", "decillion"
+)
+# Combined vocabulary of all number words
+VOCABULARY: list[str] = []
+VOCABULARY.extend(ONES)
+VOCABULARY.extend(TEENS)
+VOCABULARY.extend([t for t in TENS if t])  # Exclude empty strings
+VOCABULARY.append("hundred")
+VOCABULARY.extend([s for s in SCALES if s])  # Exclude empty string
+VOCABULARY.append("<EOS>")  # End of sequence token
+# Create a word-to-index lookup for efficient encoding
+WORD_TO_INDEX: dict[str, int] = {word: idx for idx, word in enumerate(VOCABULARY)}
+# Special token indices
+EOS_IDX: int = VOCABULARY.index("<EOS>")
+def int_to_digits(n: int) -> list[int]:
+    """Convert an integer to a list of its decimal digits.
+    Args:
+        n: An integer (can be any size, positive, negative, or zero)
+    Returns:
+        List of digits (0-9). Returns [0] for zero.
+        Negative numbers return digits without the sign.
+    Example:
+        >>> int_to_digits(123)
+        [1, 2, 3]
+        >>> int_to_digits(0)
+        [0]
+        >>> int_to_digits(-456)
+        [4, 5, 6]
+    """
+    if n == 0:
+        return [0]
+    n = abs(n)
+    digits: list[int] = []
+    while n > 0:
+        digits.append(n % 10)
+        n //= 10
+    return digits[::-1]
+def digits_to_int(digits: list[int]) -> int:
+    """Convert a list of decimal digits to an integer.
+    This is the inverse of int_to_digits().
+    Args:
+        digits: List of digits (0-9)
+    Returns:
+        The integer value represented by the digits
+    Raises:
+        ValueError: If any digit is not 0-9
+    Example:
+        >>> digits_to_int([1, 2, 3])
+        123
+        >>> digits_to_int([0])
+        0
+    """
+    if not digits:
+        return 0
+    result = 0
+    for d in digits:
+        if not (0 <= d <= 9):
+            raise ValueError(f"Invalid digit {d}, must be 0-9")
+        result = result * 10 + d
+    return result
+def encode(text: str) -> list[int]:
+    """Encode a string of number words into a list of vocabulary indices.
+    Args:
+        text: String containing space-separated number words (e.g., "one million")
+    Returns:
+        List of indices corresponding to each word in VOCABULARY
+    Raises:
+        ValueError: If a word is not found in VOCABULARY
+    Example:
+        >>> encode("one million")
+        [1, 29]
+    """
+    if not text or not text.strip():
+        return []
+    words = text.strip().lower().split()
+    indices: list[int] = []
+    for word in words:
+        if word not in WORD_TO_INDEX:
+            raise ValueError(f"Unknown word '{word}' not in VOCABULARY")
+        indices.append(WORD_TO_INDEX[word])
+    return indices
+def decode(indices: list[int]) -> str:
+    """Decode a list of vocabulary indices into a string of number words.
+    This is the inverse of encode(). <EOS> tokens are ignored.
+    Args:
+        indices: List of indices into VOCABULARY (e.g., [1, 30])
+    Returns:
+        String of space-separated number words (e.g., "one million")
+    Raises:
+        ValueError: If an index is out of range
+    Example:
+        >>> decode([1, 30])
+        'one million'
+    """
+    if not indices:
+        return ""
+    words: list[str] = []
+    for idx in indices:
+        if not (0 <= idx < len(VOCABULARY)):
+            raise ValueError(f"Index {idx} out of range for VOCABULARY (size {len(VOCABULARY)})")
+        word = VOCABULARY[idx]
+        if word != "<EOS>":
+            words.append(word)
+    return " ".join(words)
+def read_double(a: int, b: int) -> str:
+    """Convert two digits (a, b) into the English name of the number they form.
+    Args:
+        a: Tens digit (0-9)
+        b: Ones digit (0-9)
+    Returns:
+        English name of the number (e.g., "twenty three", "eleven", "seven")
+    """
+    if not (0 <= a <= 9 and 0 <= b <= 9):
+        raise ValueError("Digits must be between 0 and 9")
+    number = a * 10 + b
+    if number < 10:
+        return ONES[number]
+    elif number < 20:
+        return TEENS[number - 10]
+    elif b == 0:
+        return TENS[a]
+    else:
+        return f"{TENS[a]} {ONES[b]}"
+def read_triplet(a: int, b: int, c: int) -> str:
+    """Convert three digits (a, b, c) into the English name of the number they form.
+    Args:
+        a: Hundreds digit (0-9)
+        b: Tens digit (0-9)
+        c: Ones digit (0-9)
+    Returns:
+        English name of the number (e.g., "one hundred six", "zero", "nine hundred nineteen")
+    """
+    if not (0 <= a <= 9 and 0 <= b <= 9 and 0 <= c <= 9):
+        raise ValueError("Digits must be between 0 and 9")
+    if a == 0:
+        return read_double(b, c)
+    remainder = read_double(b, c)
+    if b == 0 and c == 0:
+        return f"{ONES[a]} hundred"
+    else:
+        return f"{ONES[a]} hundred {remainder}"
+def read_digits(lst: list[int]) -> str:
+    """Convert a list of digits into the English name of the number they form.
+    Groups digits into triplets and combines with scale words (thousand, million, etc.)
+    Args:
+        lst: List of digits (0-9)
+    Returns:
+        English name of the number
+    """
+    if not lst:
+        return "zero"
+    for d in lst:
+        if not (0 <= d <= 9):
+            raise ValueError("All elements must be digits between 0 and 9")
+    if all(d == 0 for d in lst):
+        return "zero"
+    # Pad with leading zeros to make length a multiple of 3
+    padded = lst[:]
+    while len(padded) % 3 != 0:
+        padded = [0] + padded
+    # Group into triplets
+    triplets: list[tuple[int, int, int]] = []
+    for i in range(0, len(padded), 3):
+        triplets.append((padded[i], padded[i+1], padded[i+2]))
+    # Build the result by processing each triplet with its scale
+    parts: list[str] = []
+    num_triplets = len(triplets)
+    for i, (a, b, c) in enumerate(triplets):
+        if a == 0 and b == 0 and c == 0:
+            continue
+        triplet_name = read_triplet(a, b, c)
+        scale_index = num_triplets - 1 - i
+        scale = SCALES[scale_index] if scale_index < len(SCALES) else ""
+        if scale:
+            parts.append(f"{triplet_name} {scale}")
+        else:
+            parts.append(triplet_name)
+    return " ".join(parts)

pyproject.toml ADDED Viewed

	@@ -0,0 +1,100 @@

+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+name = "namer"
+version = "0.2.0"
+description = "A PyTorch transformer model for converting numbers to English names"
+readme = "README.md"
+license = {text = "MIT"}
+requires-python = ">=3.10"
+authors = [
+    {name = "Developer", email = "dev@example.com"}
+]
+keywords = ["pytorch", "transformer", "nlp", "numbers", "ml"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+]
+dependencies = [
+    "torch>=2.0.0",
+]
+[project.optional-dependencies]
+dev = [
+    "pytest>=7.0",
+    "pytest-cov>=4.0",
+    "ruff>=0.1.0",
+    "mypy>=1.0",
+]
+[project.scripts]
+namer = "namer.main:main"
+[project.urls]
+Homepage = "https://github.com/example/namer"
+Repository = "https://github.com/example/namer"
+Issues = "https://github.com/example/namer/issues"
+[tool.ruff]
+target-version = "py310"
+line-length = 88
+[tool.ruff.lint]
+select = [
+    "E",   # pycodestyle errors
+    "W",   # pycodestyle warnings
+    "F",   # Pyflakes
+    "I",   # isort
+    "N",   # pep8-naming
+    "D",   # pydocstyle
+    "UP",  # pyupgrade
+    "B",   # flake8-bugbear
+    "C4",  # flake8-comprehensions
+    "SIM", # flake8-simplify
+]
+ignore = ["D100", "D104"]  # Missing docstrings in public packages/modules
+[tool.ruff.lint.pydocstyle]
+convention = "google"
+[tool.mypy]
+python_version = "3.10"
+warn_return_any = true
+warn_unused_configs = true
+disallow_untyped_defs = true
+disallow_incomplete_defs = true
+check_untyped_defs = true
+warn_redundant_casts = true
+warn_unused_ignores = true
+show_error_codes = true
+[[tool.mypy.overrides]]
+module = ["torch.*"]
+ignore_missing_imports = true
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = ["test_*.py"]
+python_functions = ["test_*"]
+addopts = "-v --tb=short"
+[tool.coverage.run]
+source = ["namer"]
+omit = ["*/tests/*"]
+[tool.coverage.report]
+exclude_lines = [
+    "pragma: no cover",
+    "def __repr__",
+    "raise AssertionError",
+    "raise NotImplementedError",
+]

tests/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Tests for Namer package."""

tests/test_data.py ADDED Viewed

	@@ -0,0 +1,86 @@

+"""Tests for dataset classes."""
+import torch
+from torch.utils.data import DataLoader
+from namer.data import InfiniteNamerDataset, NamerDataset
+from namer.utils import EOS_IDX, VOCABULARY
+class TestNamerDataset:
+    """Tests for NamerDataset class."""
+    def test_length(self) -> None:
+        dataset = NamerDataset(num_samples=50, seed=42)
+        assert len(dataset) == 50
+    def test_sample_shape(self) -> None:
+        dataset = NamerDataset(num_samples=10, max_seq_len=20, seed=42)
+        digits, encoded = dataset[0]
+        assert digits.shape == (20,)
+        assert encoded.shape == (20,)
+        assert digits.dtype == torch.long
+        assert encoded.dtype == torch.long
+    def test_padding_value(self) -> None:
+        dataset = NamerDataset(num_samples=10, max_seq_len=20, seed=42)
+        digits, _ = dataset[0]
+        # Padding should be 10
+        assert (digits == 10).any() or len([d for d in digits if d != 10]) <= 6
+    def test_eos_present(self) -> None:
+        dataset = NamerDataset(num_samples=10, seed=42)
+        _, encoded = dataset[0]
+        # EOS token should be present
+        assert EOS_IDX in encoded.tolist()
+class TestInfiniteNamerDataset:
+    """Tests for InfiniteNamerDataset class."""
+    def test_iteration(self) -> None:
+        dataset = InfiniteNamerDataset(seed=42)
+        iterator = iter(dataset)
+        # Can get multiple samples
+        for _ in range(10):
+            digits, encoded = next(iterator)
+            assert digits.shape == (20,)
+            assert encoded.shape == (20,)
+    def test_data_loader(self) -> None:
+        dataset = InfiniteNamerDataset(seed=42)
+        loader = DataLoader(dataset, batch_size=4, num_workers=0)
+        iterator = iter(loader)
+        digits_batch, encoded_batch = next(iterator)
+        assert digits_batch.shape == (4, 20)
+        assert encoded_batch.shape == (4, 20)
+    def test_reproducibility(self) -> None:
+        dataset1 = InfiniteNamerDataset(seed=42)
+        dataset2 = InfiniteNamerDataset(seed=42)
+        iter1 = iter(dataset1)
+        iter2 = iter(dataset2)
+        for _ in range(5):
+            d1, e1 = next(iter1)
+            d2, e2 = next(iter2)
+            assert torch.equal(d1, d2)
+            assert torch.equal(e1, e2)
+    def test_vocab_range(self) -> None:
+        dataset = InfiniteNamerDataset(seed=42)
+        iterator = iter(dataset)
+        for _ in range(20):
+            _, encoded = next(iterator)
+            # Valid tokens should be within vocab range (excluding -1 padding)
+            valid_tokens = encoded[encoded != -1]
+            assert (valid_tokens >= 0).all()
+            assert (valid_tokens < len(VOCABULARY)).all()

tests/test_inference.py ADDED Viewed

	@@ -0,0 +1,71 @@

+"""Tests for inference utilities."""
+from unittest.mock import MagicMock, patch
+import pytest
+import torch
+from namer.inference import predict_number_name
+from namer.models import NamerTransformer
+from namer.utils import VOCABULARY, read_digits, int_to_digits
+class TestPredictNumberName:
+    """Tests for predict_number_name function."""
+    @pytest.fixture
+    def mock_model(self) -> MagicMock:
+        model = MagicMock(spec=NamerTransformer)
+        model.max_output_len = 20
+        model.vocab_size = len(VOCABULARY)
+        # Mock the device property
+        param = MagicMock()
+        param.device = torch.device("cpu")
+        model.parameters.return_value = iter([param])
+        return model
+    def test_basic_prediction(self, mock_model: MagicMock) -> None:
+        # Create fake logits that will select known tokens
+        # "one" is index 1 in VOCABULARY
+        fake_logits = torch.zeros(1, 20, len(VOCABULARY))
+        fake_logits[0, 0, 1] = 10.0  # "one"
+        fake_logits[0, 1, VOCABULARY.index("<EOS>")] = 10.0  # EOS
+        mock_model.return_value = fake_logits
+        mock_model.eval = MagicMock()
+        with patch("namer.inference.torch.no_grad"):
+            result = predict_number_name(mock_model, 1)
+        # Should decode to "one"
+        assert "one" in result.lower() or result.startswith("<")
+    def test_eos_stops_generation(self, mock_model: MagicMock) -> None:
+        # Logits that predict EOS immediately
+        fake_logits = torch.zeros(1, 20, len(VOCABULARY))
+        fake_logits[0, 0, VOCABULARY.index("<EOS>")] = 10.0
+        mock_model.return_value = fake_logits
+        mock_model.eval = MagicMock()
+        with patch("namer.inference.torch.no_grad"):
+            result = predict_number_name(mock_model, 0)
+        # Empty result when EOS is first
+        assert result == "" or result.startswith("<")
+    def test_device_override(self, mock_model: MagicMock) -> None:
+        fake_logits = torch.zeros(1, 20, len(VOCABULARY))
+        fake_logits[0, 0, 1] = 10.0
+        fake_logits[0, 1, VOCABULARY.index("<EOS>")] = 10.0
+        mock_model.return_value = fake_logits
+        mock_model.eval = MagicMock()
+        with patch("namer.inference.torch.no_grad"):
+            # Should not raise when device is specified
+            result = predict_number_name(mock_model, 1, device="cpu")
+        assert isinstance(result, str)

tests/test_models.py ADDED Viewed

	@@ -0,0 +1,91 @@

+"""Tests for model classes."""
+import pytest
+import torch
+from namer.models import NamerTransformer, PositionalEncoding
+from namer.utils import VOCABULARY
+class TestPositionalEncoding:
+    """Tests for PositionalEncoding module."""
+    def test_shape(self) -> None:
+        pe = PositionalEncoding(d_model=128)
+        x = torch.randn(2, 10, 128)  # batch=2, seq=10, dim=128
+        out = pe(x)
+        assert out.shape == (2, 10, 128)
+    def test_adds_position(self) -> None:
+        pe = PositionalEncoding(d_model=64)
+        x = torch.zeros(1, 5, 64)
+        out = pe(x)
+        # Output should be non-zero due to positional encoding
+        assert not torch.allclose(out, x)
+class TestNamerTransformer:
+    """Tests for NamerTransformer model."""
+    @pytest.fixture
+    def model(self) -> NamerTransformer:
+        return NamerTransformer(
+            vocab_size=len(VOCABULARY),
+            max_output_len=20,
+            d_model=64,
+            nhead=4,
+            num_encoder_layers=2,
+            dim_feedforward=128,
+            dropout=0.0,
+        )
+    def test_forward_shape(self, model: NamerTransformer) -> None:
+        batch_size = 4
+        seq_len = 10
+        digits = torch.randint(0, 10, (batch_size, seq_len))
+        logits = model(digits)
+        assert logits.shape == (batch_size, model.max_output_len, model.vocab_size)
+    def test_forward_with_padding(self, model: NamerTransformer) -> None:
+        batch_size = 2
+        seq_len = 10
+        digits = torch.full((batch_size, seq_len), 10)  # All padding
+        digits[:, :5] = torch.randint(0, 10, (batch_size, 5))
+        logits = model(digits)
+        assert logits.shape == (batch_size, model.max_output_len, model.vocab_size)
+    def test_forward_with_negative_padding(self, model: NamerTransformer) -> None:
+        batch_size = 2
+        seq_len = 10
+        digits = torch.full((batch_size, seq_len), -1)  # -1 padding
+        digits[:, :5] = torch.randint(0, 10, (batch_size, 5))
+        logits = model(digits)
+        assert logits.shape == (batch_size, model.max_output_len, model.vocab_size)
+    def test_output_is_logits(self, model: NamerTransformer) -> None:
+        digits = torch.randint(0, 10, (1, 5))
+        logits = model(digits)
+        # Logits should not be probabilities (no softmax applied)
+        assert not torch.all((logits >= 0) & (logits <= 1))
+    def test_gradient_flow(self, model: NamerTransformer) -> None:
+        digits = torch.randint(0, 10, (2, 5))
+        target = torch.randint(0, len(VOCABULARY), (2, model.max_output_len))
+        logits = model(digits)
+        loss = torch.nn.functional.cross_entropy(
+            logits.view(-1, model.vocab_size),
+            target.view(-1)
+        )
+        loss.backward()
+        # Check that gradients exist
+        for param in model.parameters():
+            assert param.grad is not None

tests/test_utils.py ADDED Viewed

	@@ -0,0 +1,193 @@

+"""Tests for utility functions."""
+import pytest
+from namer.utils import (
+    EOS_IDX,
+    VOCABULARY,
+    decode,
+    digits_to_int,
+    encode,
+    int_to_digits,
+    read_digits,
+    read_double,
+    read_triplet,
+)
+class TestIntToDigits:
+    """Tests for int_to_digits function."""
+    def test_zero(self) -> None:
+        assert int_to_digits(0) == [0]
+    def test_positive(self) -> None:
+        assert int_to_digits(123) == [1, 2, 3]
+        assert int_to_digits(7) == [7]
+    def test_negative(self) -> None:
+        assert int_to_digits(-456) == [4, 5, 6]
+    def test_large_number(self) -> None:
+        assert int_to_digits(1002003) == [1, 0, 0, 2, 0, 0, 3]
+class TestDigitsToInt:
+    """Tests for digits_to_int function."""
+    def test_empty(self) -> None:
+        assert digits_to_int([]) == 0
+    def test_single_digit(self) -> None:
+        assert digits_to_int([5]) == 5
+    def test_multiple_digits(self) -> None:
+        assert digits_to_int([1, 2, 3]) == 123
+    def test_with_zeros(self) -> None:
+        assert digits_to_int([1, 0, 0, 2]) == 1002
+    def test_invalid_digit(self) -> None:
+        with pytest.raises(ValueError, match="Invalid digit"):
+            digits_to_int([10])
+class TestRoundTrip:
+    """Tests for int_to_digits <-> digits_to_int round-trip."""
+    def test_round_trip(self) -> None:
+        for n in [0, 42, 123, 1000, 999999, 1000000]:
+            assert digits_to_int(int_to_digits(n)) == abs(n)
+class TestReadDouble:
+    """Tests for read_double function."""
+    def test_single_digit(self) -> None:
+        assert read_double(0, 7) == "seven"
+        assert read_double(0, 0) == "zero"
+    def test_teens(self) -> None:
+        assert read_double(1, 1) == "eleven"
+        assert read_double(1, 9) == "nineteen"
+    def test_tens(self) -> None:
+        assert read_double(3, 0) == "thirty"
+        assert read_double(5, 0) == "fifty"
+    def test_tens_and_ones(self) -> None:
+        assert read_double(2, 3) == "twenty three"
+        assert read_double(5, 9) == "fifty nine"
+    def test_invalid_digits(self) -> None:
+        with pytest.raises(ValueError, match="must be between 0 and 9"):
+            read_double(10, 5)
+class TestReadTriplet:
+    """Tests for read_triplet function."""
+    def test_hundreds(self) -> None:
+        assert read_triplet(1, 0, 6) == "one hundred six"
+        assert read_triplet(2, 0, 0) == "two hundred"
+    def test_zero_hundreds(self) -> None:
+        assert read_triplet(0, 5, 5) == "fifty five"
+    def test_all_zeros(self) -> None:
+        assert read_triplet(0, 0, 0) == "zero"
+class TestReadDigits:
+    """Tests for read_digits function."""
+    def test_empty(self) -> None:
+        assert read_digits([]) == "zero"
+    def test_zero(self) -> None:
+        assert read_digits([0]) == "zero"
+        assert read_digits([0, 0, 0]) == "zero"
+    def test_single_digit(self) -> None:
+        assert read_digits([5]) == "five"
+    def test_double_digit(self) -> None:
+        assert read_digits([4, 2]) == "forty two"
+    def test_triple_digit(self) -> None:
+        assert read_digits([1, 2, 3]) == "one hundred twenty three"
+    def test_thousands(self) -> None:
+        assert read_digits([1, 0, 0, 0]) == "one thousand"
+        assert read_digits([1, 2, 3, 4]) == "one thousand two hundred thirty four"
+    def test_millions(self) -> None:
+        assert read_digits([1, 0, 0, 0, 0, 0, 0]) == "one million"
+    def test_complex(self) -> None:
+        # 1,234,567
+        digits = [1, 2, 3, 4, 5, 6, 7]
+        result = read_digits(digits)
+        assert "one million" in result
+        assert "two hundred thirty four thousand" in result
+        assert "five hundred sixty seven" in result
+    def test_invalid_digit(self) -> None:
+        with pytest.raises(ValueError, match="must be digits"):
+            read_digits([1, 10, 3])
+class TestEncode:
+    """Tests for encode function."""
+    def test_simple(self) -> None:
+        indices = encode("one million")
+        assert len(indices) == 2
+        assert all(0 <= i < len(VOCABULARY) for i in indices)
+    def test_multi_word(self) -> None:
+        indices = encode("twenty three")
+        assert len(indices) == 2
+    def test_empty(self) -> None:
+        assert encode("") == []
+        assert encode("   ") == []
+    def test_unknown_word(self) -> None:
+        with pytest.raises(ValueError, match="Unknown word"):
+            encode("unknown")
+class TestDecode:
+    """Tests for decode function."""
+    def test_simple(self) -> None:
+        encoded = encode("one million")
+        assert decode(encoded) == "one million"
+    def test_with_eos(self) -> None:
+        encoded = encode("one million") + [EOS_IDX]
+        assert decode(encoded) == "one million"
+    def test_empty(self) -> None:
+        assert decode([]) == ""
+    def test_invalid_index(self) -> None:
+        with pytest.raises(ValueError, match="out of range"):
+            decode([9999])
+class TestEncodeDecodeRoundTrip:
+    """Tests for encode/decode round-trip."""
+    def test_round_trip(self) -> None:
+        test_cases = [
+            "one million",
+            "twenty three",
+            "one hundred twenty three",
+            "zero",
+            "nine hundred nineteen",
+        ]
+        for text in test_cases:
+            encoded = encode(text)
+            assert decode(encoded) == text