{
  "published_checkpoint": "repository_root",
  "summary": "The published checkpoint was produced in two stages: a full 10-epoch CUDA fine-tune over Rust-generated virtual BIO shards, followed by a light thin-runtime hard-case focus fine-tune.",
  "summary_zh": "当前发布 checkpoint 是两阶段产物：先用 Rust 生成的虚拟 BIO shard 做完整 10 epoch CUDA 微调，再做轻量薄层运行时困难样本微调。",
  "stages": [
    {
      "name": "dmhy-char-virtual-sps32-10epoch-lr1e5",
      "type": "full_dataset_finetune_with_rust_virtual_shards",
      "machine": "adqew@192.168.63.157",
      "data_file": "datasets/AnimeName/dmhy_weak_char.jsonl",
      "virtual_source_file": "data/generated/virtual_source_train_seed105.jsonl",
      "virtual_dataset_dir": "data/generated/virtual_char_sps32_seed105",
      "tokenizer_variant": "char",
      "vocab_file": "datasets/AnimeName/vocab.char.json",
      "vocab_size": 6199,
      "max_seq_length": 128,
      "source_rows": 619361,
      "special_fixture_rows": 935,
      "virtual_train_samples": 20439848,
      "eval_samples": 12641,
      "epochs": 10.0,
      "optimizer_steps": 114070,
      "batch_size": 1792,
      "learning_rate": 0.00001,
      "warmup_steps": 2000,
      "seed": 105,
      "device": "cuda",
      "mixed_precision": "bf16",
      "tf32": true,
      "dataloader_num_workers": 4,
      "virtual_generation": {
        "samples_per_source": 32,
        "separator_mode": "per-gap",
        "bracket_mode": "per-part",
        "include_original": true,
        "include_special_fixtures": true,
        "shard_size": 25000,
        "shards": 881,
        "elapsed_seconds": 31.55
      },
      "eval_f1": 0.9902097153862615,
      "eval_accuracy": 0.9978861640315251,
      "fixed_regression_model_only": "22/26",
      "fixed_regression_normalized_only": "23/26",
      "heldout_model_only": "1994/2048",
      "heldout_normalized_only": "2008/2048",
      "train_runtime_seconds": 21181.32,
      "train_tokens_per_second": 1236288.9470061918,
      "perf_gpu_util_avg": 96.14912280701755,
      "perf_gpu_util_max": 100.0,
      "role": "Base checkpoint for the final light hard-case focus stage. This is the full >100k-step virtual-shard training run."
    },
    {
      "name": "dmhy-char-virtual-sps32-10epoch-lightfocus",
      "type": "light_hard_case_focus_finetune",
      "machine": "adqew@192.168.63.157",
      "data_file": "data/generated/focus_after_virtual_sps32_char.jsonl",
      "tokenizer_variant": "char",
      "vocab_file": "datasets/AnimeName/vocab.char.json",
      "vocab_size": 6199,
      "max_seq_length": 128,
      "focus_source_rows": 140660,
      "train_samples": 133627,
      "eval_samples": 7033,
      "epochs": 1.0,
      "batch_size": 1792,
      "learning_rate": 0.000002,
      "warmup_steps": 20,
      "seed": 208,
      "device": "cuda",
      "mixed_precision": "bf16",
      "tf32": true,
      "eval_f1": 0.9843520993189067,
      "eval_accuracy": 0.9961191832100342,
      "fixed_regression_model_only": "24/26",
      "fixed_regression_normalized_only": "26/26",
      "heldout_model_only": "1962/2048",
      "heldout_normalized_only": "1988/2048",
      "perf_tokens_per_second_avg": 997645.0850819343,
      "perf_gpu_util_avg": 100.0,
      "role": "Published repository-root checkpoint. The default thin runtime also includes narrow postprocessing for bracketed search notes and release-promo title prefixes."
    }
  ]
}