| | |
| | """ |
| | Configuration for DevOps-Engineer-SLM: A Role-Based SLM for DevOps Engineer. |
| | ~1B params, LLaMA-style architecture with RoPE β supports up to 1M token context. |
| | """ |
| |
|
| | from dataclasses import dataclass, field |
| | from pathlib import Path |
| | from typing import Optional |
| |
|
| |
|
| | @dataclass |
| | class SLMConfig: |
| | """All hyperparameters and paths in one place.""" |
| |
|
| | |
| | project_dir: Path = Path(__file__).resolve().parent |
| | data_dir: Path = field(default=None) |
| | tokenizer_dir: Path = field(default=None) |
| | checkpoint_dir: Path = field(default=None) |
| |
|
| | |
| | domain_name: str = "DevOps Engineer" |
| | domain_slug: str = "devops_engineer" |
| | tokenizer_filename: str = "devops_engineer_tokenizer.json" |
| |
|
| | |
| | vocab_size: int = 32_768 |
| | min_frequency: int = 2 |
| | special_tokens: list = field( |
| | default_factory=lambda: [ |
| | "<pad>", "<unk>", "<bos>", "<eos>", |
| | "<|system|>", "<|user|>", "<|assistant|>", |
| | ] |
| | ) |
| |
|
| | |
| | n_layer: int = 32 |
| | n_head: int = 20 |
| | n_embd: int = 1600 |
| | block_size: int = 512 |
| | dropout: float = 0.05 |
| | bias: bool = False |
| | ffn_multiplier: float = 2.667 |
| |
|
| | |
| | max_position_embeddings: int = 100_000_000_000 |
| | rope_theta: float = 50_000_000_000.0 |
| |
|
| | |
| | sliding_window: Optional[int] = None |
| |
|
| | |
| | gradient_checkpointing: bool = True |
| |
|
| | |
| | batch_size: int = 1 |
| | gradient_accumulation_steps: int = 16 |
| | learning_rate: float = 2e-4 |
| | weight_decay: float = 0.1 |
| | max_epochs: int = 3 |
| | dataset_stride: int = 512 |
| | warmup_steps: int = 100 |
| | grad_clip: float = 1.0 |
| | eval_interval: int = 50 |
| | eval_samples: int = 10 |
| | log_interval: int = 10 |
| | device: str = "auto" |
| |
|
| | |
| | max_new_tokens: int = 1_000_000 |
| | temperature: float = 0.8 |
| | top_k: int = 50 |
| | top_p: float = 0.9 |
| |
|
| | |
| | hf_repo_name: str = "devops-engineer-slm-1m" |
| | hf_model_card_tags: list = field(default_factory=lambda: ['devops', 'cicd', 'docker', 'kubernetes', 'infrastructure', 'slm', 'llama-style', 'rope', '1m-context', 'from-scratch', '1b-params']) |
| |
|
| | def __post_init__(self): |
| | if self.data_dir is None: |
| | self.data_dir = self.project_dir / "data" |
| | if self.tokenizer_dir is None: |
| | self.tokenizer_dir = self.project_dir / "tokenizer" |
| | if self.checkpoint_dir is None: |
| | self.checkpoint_dir = self.project_dir / "checkpoints" |
| |
|
| | self.data_dir.mkdir(parents=True, exist_ok=True) |
| | self.tokenizer_dir.mkdir(parents=True, exist_ok=True) |
| | self.checkpoint_dir.mkdir(parents=True, exist_ok=True) |
| |
|
| | if self.device == "auto": |
| | import torch |
| | if torch.cuda.is_available(): |
| | self.device = "cuda" |
| | elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): |
| | self.device = "mps" |
| | else: |
| | self.device = "cpu" |
| |
|
| |
|
| | cfg = SLMConfig() |
| |
|