| """ | |
| Global configuration for the LLM Pipeline. | |
| Edit these values to match your environment. | |
| """ | |
| from dataclasses import dataclass, field | |
| from pathlib import Path | |
| from typing import Optional | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Paths | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| ROOT = Path(__file__).parent.parent | |
| MODELS_DIR = ROOT / "artifacts" / "models" | |
| MERGES_DIR = ROOT / "artifacts" / "merges" | |
| ADAPTERS_DIR = ROOT / "artifacts" / "adapters" | |
| EVAL_DIR = ROOT / "artifacts" / "evaluations" | |
| DATA_DIR = ROOT / "artifacts" / "data" | |
| LOGS_DIR = ROOT / "logs" | |
| for _d in [MODELS_DIR, MERGES_DIR, ADAPTERS_DIR, EVAL_DIR, DATA_DIR, LOGS_DIR]: | |
| _d.mkdir(parents=True, exist_ok=True) | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Scale: medium (7B, single A100/H100) | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| SCALE = "medium" | |
| SCALE_DEFAULTS = { | |
| "medium": { | |
| "max_model_params_b": 10, # max billions of params to consider | |
| "dtype": "bfloat16", | |
| "device_map": "auto", | |
| "lora_r": 16, | |
| "lora_alpha": 32, | |
| "lora_dropout": 0.05, | |
| "per_device_train_batch_size": 4, | |
| "gradient_accumulation_steps": 4, | |
| "max_seq_length": 2048, | |
| "load_in_4bit": True, | |
| } | |
| } | |
| CFG = SCALE_DEFAULTS[SCALE] | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Discovery | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| HF_MODEL_CATEGORIES = { | |
| "code": ["starcoder", "codellama", "deepseek-coder", "qwen2.5-coder"], | |
| "reasoning": ["mistral", "llama", "qwen", "gemma"], | |
| "chat": ["zephyr", "openchat", "neural-chat"], | |
| "medical": ["meditron", "biomedlm", "llama-med"], | |
| "multilingual":["aya", "bloomz", "xglm"], | |
| } | |
| TOP_K_CANDIDATES = 5 # per category | |
| MIN_DOWNLOADS = 1_000 # filter noise | |
| MIN_LIKES = 10 | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Merging | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| MERGE_STRATEGIES = ["slerp", "ties", "dare_ties", "task_arithmetic", "breadcrumbs"] | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Evaluation | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| EVAL_METRICS = ["rouge", "bertscore", "faithfulness", "llm_judge"] | |
| JUDGE_MODEL = "mistralai/Mistral-7B-Instruct-v0.3" # swap for a stronger model | |
| ROUGE_TYPES = ["rouge1", "rouge2", "rougeL"] | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Fine-tuning | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| FT_BASE_MODEL = "mistralai/Mistral-7B-v0.3" | |
| FT_EPOCHS = 3 | |
| FT_LR = 2e-4 | |
| FT_WARMUP_RATIO = 0.03 | |
| FT_SAVE_STEPS = 100 | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Inference (vLLM) | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| VLLM_TENSOR_PARALLEL = 1 # GPUs | |
| VLLM_GPU_MEMORY_UTIL = 0.90 | |
| VLLM_MAX_MODEL_LEN = 4096 | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # MLOps | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| WANDB_PROJECT = "llm-pipeline" | |
| MLFLOW_URI = "sqlite:///mlflow.db" | |
| HF_ORG = "" # your HF username/org for pushing models | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Tokens (set via env vars, never hardcode) | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| import os | |
| HF_TOKEN = os.getenv("HF_TOKEN", "") | |
| WANDB_TOKEN = os.getenv("WANDB_API_KEY", "") | |