PROJECT "FineTunedLLM" DESCRIPTION "Fine-tuning a small LLM with checkpoint resume and custom callbacks" VERSION "1.0" AUTHOR "OktoSeek" DATASET { train: "dataset/instruction_train.jsonl" validation: "dataset/instruction_val.jsonl" test: "dataset/instruction_test.jsonl" format: "instruction" type: "generation" language: "en" } MODEL { base: "oktoseek/base-llm-7b" architecture: "transformer" parameters: 7B context_window: 4096 precision: "fp16" } TRAIN { epochs: 5 batch_size: 4 gradient_accumulation: 8 learning_rate: 0.0001 optimizer: "adamw" scheduler: "cosine_with_restarts" loss: "cross_entropy" device: "cuda" gpu: true mixed_precision: true early_stopping: true checkpoint_steps: 100 checkpoint_path: "./checkpoints" weight_decay: 0.01 gradient_clip: 1.0 warmup_steps: 100 save_strategy: "steps" } METRICS { loss perplexity bleu rouge_l token_efficiency response_coherence } VALIDATE { on_train: false on_validation: true frequency: 1 save_best_model: true metric_to_monitor: "loss" } INFERENCE { max_tokens: 512 temperature: 0.7 top_p: 0.9 top_k: 50 repetition_penalty: 1.1 stop_sequences: ["\n\nHuman:", "\n\nAssistant:"] } EXPORT { format: ["gguf", "onnx", "okm", "safetensors"] path: "export/" quantization: "int8" optimize_for: "speed" } DEPLOY { target: "api" endpoint: "http://localhost:9000/llm" requires_auth: true port: 9000 max_concurrent_requests: 50 } HOOKS { before_train: "scripts/preprocess_data.py" after_epoch: "scripts/custom_early_stop.py" on_checkpoint: "scripts/backup_checkpoint.sh" } LOGGING { save_logs: true metrics_file: "runs/finetuned-llm/metrics.json" training_file: "runs/finetuned-llm/training_logs.json" log_level: "info" log_every: 10 }