File size: 1,536 Bytes
33d8063
 
 
 
 
 
 
ada690e
 
 
 
 
33d8063
ada690e
 
 
33d8063
ada690e
 
 
 
 
 
 
 
 
33d8063
ada690e
 
 
 
 
 
 
 
 
 
 
33d8063
 
 
ada690e
 
 
 
 
 
 
 
 
33d8063
 
ada690e
 
33d8063
 
ada690e
33d8063
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
{
  "base_model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
  "dataset": "Josephgflowers/Finance-Instruct-500k",
  "training_config": {
    "model_name": "meta-llama/Meta-Llama-3.1-8B-Instruct",
    "dataset_name": "Josephgflowers/Finance-Instruct-500k",
    "dataset_config": "default",
    "max_length": 2048,
    "train_batch_size": 16,
    "eval_batch_size": 8,
    "gradient_accumulation_steps": 4,
    "learning_rate": 0.0001,
    "num_epochs": 2,
    "lora_r": 64,
    "lora_alpha": 128,
    "lora_dropout": 0.1,
    "warmup_ratio": 0.05,
    "weight_decay": 0.01,
    "max_grad_norm": 1.0,
    "save_steps": 100,
    "eval_steps": 50,
    "logging_steps": 25,
    "output_dir": "/content/drive/MyDrive/financial_llama_models/checkpoints",
    "save_dir": "/content/drive/MyDrive/financial_llama_models/final_model",
    "fp16": true,
    "gradient_checkpointing": true,
    "dataloader_num_workers": 4,
    "quantization": null,
    "precision": "fp16",
    "length_bucket_boundaries": [
      512,
      1024,
      1536,
      2048
    ],
    "length_stats_sample_size": 4000,
    "length_stats_percentile": 0.98,
    "align_save_with_eval": true,
    "optim": "paged_adamw_8bit",
    "max_train_samples": 25000,
    "max_val_samples": 2500,
    "length_stats": {
      "p50": 168,
      "p75": 289,
      "p90": 552,
      "p95": 814,
      "p98": 1131,
      "p99": 1535,
      "p100": 1537
    }
  },
  "lora_config": {
    "r": 64,
    "alpha": 128,
    "dropout": 0.1
  },
  "training_date": "2025-11-15T04:03:37.180688"
}