File size: 4,910 Bytes
66153d5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | """
Global configuration for the LLM Pipeline.
Edit these values to match your environment.
"""
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional
# βββββββββββββββββββββββββββββββββββββββββββββ
# Paths
# βββββββββββββββββββββββββββββββββββββββββββββ
ROOT = Path(__file__).parent.parent
MODELS_DIR = ROOT / "artifacts" / "models"
MERGES_DIR = ROOT / "artifacts" / "merges"
ADAPTERS_DIR = ROOT / "artifacts" / "adapters"
EVAL_DIR = ROOT / "artifacts" / "evaluations"
DATA_DIR = ROOT / "artifacts" / "data"
LOGS_DIR = ROOT / "logs"
for _d in [MODELS_DIR, MERGES_DIR, ADAPTERS_DIR, EVAL_DIR, DATA_DIR, LOGS_DIR]:
_d.mkdir(parents=True, exist_ok=True)
# βββββββββββββββββββββββββββββββββββββββββββββ
# Scale: medium (7B, single A100/H100)
# βββββββββββββββββββββββββββββββββββββββββββββ
SCALE = "medium"
SCALE_DEFAULTS = {
"medium": {
"max_model_params_b": 10, # max billions of params to consider
"dtype": "bfloat16",
"device_map": "auto",
"lora_r": 16,
"lora_alpha": 32,
"lora_dropout": 0.05,
"per_device_train_batch_size": 4,
"gradient_accumulation_steps": 4,
"max_seq_length": 2048,
"load_in_4bit": True,
}
}
CFG = SCALE_DEFAULTS[SCALE]
# βββββββββββββββββββββββββββββββββββββββββββββ
# Discovery
# βββββββββββββββββββββββββββββββββββββββββββββ
HF_MODEL_CATEGORIES = {
"code": ["starcoder", "codellama", "deepseek-coder", "qwen2.5-coder"],
"reasoning": ["mistral", "llama", "qwen", "gemma"],
"chat": ["zephyr", "openchat", "neural-chat"],
"medical": ["meditron", "biomedlm", "llama-med"],
"multilingual":["aya", "bloomz", "xglm"],
}
TOP_K_CANDIDATES = 5 # per category
MIN_DOWNLOADS = 1_000 # filter noise
MIN_LIKES = 10
# βββββββββββββββββββββββββββββββββββββββββββββ
# Merging
# βββββββββββββββββββββββββββββββββββββββββββββ
MERGE_STRATEGIES = ["slerp", "ties", "dare_ties", "task_arithmetic", "breadcrumbs"]
# βββββββββββββββββββββββββββββββββββββββββββββ
# Evaluation
# βββββββββββββββββββββββββββββββββββββββββββββ
EVAL_METRICS = ["rouge", "bertscore", "faithfulness", "llm_judge"]
JUDGE_MODEL = "mistralai/Mistral-7B-Instruct-v0.3" # swap for a stronger model
ROUGE_TYPES = ["rouge1", "rouge2", "rougeL"]
# βββββββββββββββββββββββββββββββββββββββββββββ
# Fine-tuning
# βββββββββββββββββββββββββββββββββββββββββββββ
FT_BASE_MODEL = "mistralai/Mistral-7B-v0.3"
FT_EPOCHS = 3
FT_LR = 2e-4
FT_WARMUP_RATIO = 0.03
FT_SAVE_STEPS = 100
# βββββββββββββββββββββββββββββββββββββββββββββ
# Inference (vLLM)
# βββββββββββββββββββββββββββββββββββββββββββββ
VLLM_TENSOR_PARALLEL = 1 # GPUs
VLLM_GPU_MEMORY_UTIL = 0.90
VLLM_MAX_MODEL_LEN = 4096
# βββββββββββββββββββββββββββββββββββββββββββββ
# MLOps
# βββββββββββββββββββββββββββββββββββββββββββββ
WANDB_PROJECT = "llm-pipeline"
MLFLOW_URI = "sqlite:///mlflow.db"
HF_ORG = "" # your HF username/org for pushing models
# βββββββββββββββββββββββββββββββββββββββββββββ
# Tokens (set via env vars, never hardcode)
# βββββββββββββββββββββββββββββββββββββββββββββ
import os
HF_TOKEN = os.getenv("HF_TOKEN", "")
WANDB_TOKEN = os.getenv("WANDB_API_KEY", "")
|