File size: 3,480 Bytes
f2ec095
 
359ff82
 
f2ec095
 
359ff82
 
f2ec095
 
359ff82
 
f2ec095
 
 
 
359ff82
 
 
 
be6a29a
359ff82
be6a29a
359ff82
 
be6a29a
f2ec095
359ff82
 
 
 
 
 
 
 
 
 
 
 
be6a29a
359ff82
 
 
 
 
 
 
 
 
 
 
f2ec095
 
359ff82
 
f2ec095
359ff82
f2ec095
 
 
 
359ff82
 
 
 
be6a29a
359ff82
 
 
f2ec095
359ff82
 
 
 
 
f2ec095
359ff82
 
 
 
 
f2ec095
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
{
  "published_checkpoint": "repository_root",
  "summary": "The published checkpoint was produced in two stages: a full 10-epoch CUDA fine-tune over Rust-generated virtual BIO shards, followed by a light thin-runtime hard-case focus fine-tune.",
  "summary_zh": "当前发布 checkpoint 是两阶段产物:先用 Rust 生成的虚拟 BIO shard 做完整 10 epoch CUDA 微调,再做轻量薄层运行时困难样本微调。",
  "stages": [
    {
      "name": "dmhy-char-virtual-sps32-10epoch-lr1e5",
      "type": "full_dataset_finetune_with_rust_virtual_shards",
      "machine": "adqew@192.168.63.157",
      "data_file": "datasets/AnimeName/dmhy_weak_char.jsonl",
      "virtual_source_file": "data/generated/virtual_source_train_seed105.jsonl",
      "virtual_dataset_dir": "data/generated/virtual_char_sps32_seed105",
      "tokenizer_variant": "char",
      "vocab_file": "datasets/AnimeName/vocab.char.json",
      "vocab_size": 6199,
      "max_seq_length": 128,
      "source_rows": 619361,
      "special_fixture_rows": 935,
      "virtual_train_samples": 20439848,
      "eval_samples": 12641,
      "epochs": 10.0,
      "optimizer_steps": 114070,
      "batch_size": 1792,
      "learning_rate": 0.00001,
      "warmup_steps": 2000,
      "seed": 105,
      "device": "cuda",
      "mixed_precision": "bf16",
      "tf32": true,
      "dataloader_num_workers": 4,
      "virtual_generation": {
        "samples_per_source": 32,
        "separator_mode": "per-gap",
        "bracket_mode": "per-part",
        "include_original": true,
        "include_special_fixtures": true,
        "shard_size": 25000,
        "shards": 881,
        "elapsed_seconds": 31.55
      },
      "eval_f1": 0.9902097153862615,
      "eval_accuracy": 0.9978861640315251,
      "fixed_regression_model_only": "22/26",
      "fixed_regression_normalized_only": "23/26",
      "heldout_model_only": "1994/2048",
      "heldout_normalized_only": "2008/2048",
      "train_runtime_seconds": 21181.32,
      "train_tokens_per_second": 1236288.9470061918,
      "perf_gpu_util_avg": 96.14912280701755,
      "perf_gpu_util_max": 100.0,
      "role": "Base checkpoint for the final light hard-case focus stage. This is the full >100k-step virtual-shard training run."
    },
    {
      "name": "dmhy-char-virtual-sps32-10epoch-lightfocus",
      "type": "light_hard_case_focus_finetune",
      "machine": "adqew@192.168.63.157",
      "data_file": "data/generated/focus_after_virtual_sps32_char.jsonl",
      "tokenizer_variant": "char",
      "vocab_file": "datasets/AnimeName/vocab.char.json",
      "vocab_size": 6199,
      "max_seq_length": 128,
      "focus_source_rows": 140660,
      "train_samples": 133627,
      "eval_samples": 7033,
      "epochs": 1.0,
      "batch_size": 1792,
      "learning_rate": 0.000002,
      "warmup_steps": 20,
      "seed": 208,
      "device": "cuda",
      "mixed_precision": "bf16",
      "tf32": true,
      "eval_f1": 0.9843520993189067,
      "eval_accuracy": 0.9961191832100342,
      "fixed_regression_model_only": "24/26",
      "fixed_regression_normalized_only": "26/26",
      "heldout_model_only": "1962/2048",
      "heldout_normalized_only": "1988/2048",
      "perf_tokens_per_second_avg": 997645.0850819343,
      "perf_gpu_util_avg": 100.0,
      "role": "Published repository-root checkpoint. The default thin runtime also includes narrow postprocessing for bracketed search notes and release-promo title prefixes."
    }
  ]
}