File size: 502 Bytes
0134c04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
{
  "model_name": "Qwen/Qwen3-1.7B",
  "hf_repo_id": "farpluto/doc-to-lora-niah",
  "lora_r": 8,
  "lora_alpha": 8.0,
  "target_module": "down_proj",
  "latent_dim": 512,
  "n_percv_blocks": 8,
  "lr": 4e-05,
  "weight_decay": 0.01,
  "grad_accum": 8,
  "max_steps": 8000,
  "warmup_steps": 200,
  "max_grad_norm": 1.0,
  "l1_coef": 0.05,
  "eval_every": 500,
  "save_every": 2000,
  "ctx_min_len": 32,
  "ctx_max_len": 256,
  "n_train": 80000,
  "n_eval": 300,
  "seed": 42,
  "out_dir": "d2l_ckpts"
}