{ "published_checkpoint": "repository_root", "summary": "The published checkpoint was produced in two stages: a full 10-epoch CUDA fine-tune over Rust-generated virtual BIO shards, followed by a light thin-runtime hard-case focus fine-tune.", "summary_zh": "当前发布 checkpoint 是两阶段产物:先用 Rust 生成的虚拟 BIO shard 做完整 10 epoch CUDA 微调,再做轻量薄层运行时困难样本微调。", "stages": [ { "name": "dmhy-char-virtual-sps32-10epoch-lr1e5", "type": "full_dataset_finetune_with_rust_virtual_shards", "machine": "adqew@192.168.63.157", "data_file": "datasets/AnimeName/dmhy_weak_char.jsonl", "virtual_source_file": "data/generated/virtual_source_train_seed105.jsonl", "virtual_dataset_dir": "data/generated/virtual_char_sps32_seed105", "tokenizer_variant": "char", "vocab_file": "datasets/AnimeName/vocab.char.json", "vocab_size": 6199, "max_seq_length": 128, "source_rows": 619361, "special_fixture_rows": 935, "virtual_train_samples": 20439848, "eval_samples": 12641, "epochs": 10.0, "optimizer_steps": 114070, "batch_size": 1792, "learning_rate": 0.00001, "warmup_steps": 2000, "seed": 105, "device": "cuda", "mixed_precision": "bf16", "tf32": true, "dataloader_num_workers": 4, "virtual_generation": { "samples_per_source": 32, "separator_mode": "per-gap", "bracket_mode": "per-part", "include_original": true, "include_special_fixtures": true, "shard_size": 25000, "shards": 881, "elapsed_seconds": 31.55 }, "eval_f1": 0.9902097153862615, "eval_accuracy": 0.9978861640315251, "fixed_regression_model_only": "22/26", "fixed_regression_normalized_only": "23/26", "heldout_model_only": "1994/2048", "heldout_normalized_only": "2008/2048", "train_runtime_seconds": 21181.32, "train_tokens_per_second": 1236288.9470061918, "perf_gpu_util_avg": 96.14912280701755, "perf_gpu_util_max": 100.0, "role": "Base checkpoint for the final light hard-case focus stage. This is the full >100k-step virtual-shard training run." }, { "name": "dmhy-char-virtual-sps32-10epoch-lightfocus", "type": "light_hard_case_focus_finetune", "machine": "adqew@192.168.63.157", "data_file": "data/generated/focus_after_virtual_sps32_char.jsonl", "tokenizer_variant": "char", "vocab_file": "datasets/AnimeName/vocab.char.json", "vocab_size": 6199, "max_seq_length": 128, "focus_source_rows": 140660, "train_samples": 133627, "eval_samples": 7033, "epochs": 1.0, "batch_size": 1792, "learning_rate": 0.000002, "warmup_steps": 20, "seed": 208, "device": "cuda", "mixed_precision": "bf16", "tf32": true, "eval_f1": 0.9843520993189067, "eval_accuracy": 0.9961191832100342, "fixed_regression_model_only": "24/26", "fixed_regression_normalized_only": "26/26", "heldout_model_only": "1962/2048", "heldout_normalized_only": "1988/2048", "perf_tokens_per_second_avg": 997645.0850819343, "perf_gpu_util_avg": 100.0, "role": "Published repository-root checkpoint. The default thin runtime also includes narrow postprocessing for bracketed search notes and release-promo title prefixes." } ] }