File size: 1,607 Bytes
e63569d
359ff82
 
be6a29a
 
 
359ff82
 
be6a29a
359ff82
be6a29a
 
 
359ff82
 
 
 
 
 
be6a29a
 
 
359ff82
be6a29a
 
e63569d
359ff82
e63569d
 
 
 
 
 
359ff82
 
 
 
 
be6a29a
 
359ff82
 
 
f712f4b
be6a29a
e63569d
be6a29a
 
 
 
 
 
 
 
 
 
 
 
e63569d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
{
  "experiment_name": "dmhy-char-virtual-sps32-10epoch-lightfocus",
  "data_file": "data/generated/focus_after_virtual_sps32_char.jsonl",
  "data_sources": [
    {
      "role": "primary",
      "path": "data/generated/focus_after_virtual_sps32_char.jsonl",
      "samples": 140660,
      "repeat": 1,
      "effective_samples": 140660
    }
  ],
  "augmentation": {
    "partial_requested": 0,
    "partial_written": 0,
    "permutation_requested": 0,
    "permutation_written": 0,
    "special_requested": 0,
    "special_written": 0,
    "max_chars": 160
  },
  "dataset_mode": "encoded",
  "virtual_dataset_dir": null,
  "apply_label_repairs": false,
  "keep_raw_dataset": false,
  "tokenizer_variant": "char",
  "vocab_file": "datasets/AnimeName/vocab.char.json",
  "vocab_size": 6199,
  "max_seq_length": 128,
  "hidden_size": 256,
  "num_hidden_layers": 4,
  "num_attention_heads": 8,
  "intermediate_size": 1024,
  "train_samples": 133627,
  "eval_samples": 7033,
  "load_seconds": 3.860345099994447,
  "encode_seconds": 11.22450440004468,
  "epochs": 1.0,
  "max_steps": -1,
  "batch_size": 1792,
  "learning_rate": 2e-06,
  "warmup_steps": 20,
  "seed": 208,
  "device": "cuda",
  "fp16": false,
  "gradient_accumulation_steps": 1,
  "dataloader_num_workers": 0,
  "dataloader_prefetch_factor": null,
  "dataloader_persistent_workers": false,
  "dataloader_pin_memory": true,
  "encoded_dataset_device": "cpu",
  "mixed_precision": "bf16",
  "tf32": true,
  "torch_compile": false,
  "auto_find_batch_size": false,
  "perf_log_steps": 50,
  "perf_sample_interval": 0.5,
  "periodic_eval": false
}