ModerRAS
/

AniFileBERT

Token Classification

filename-parsing

Eval Results (legacy)

Model card Files Files and versions

AniFileBERT / reports /training_lineage.json

ModerRAS's picture

Train virtual-shard anime parser

359ff82 1 day ago

history blame contribute delete

3.48 kB

	{
	"published_checkpoint": "repository_root",
	"summary": "The published checkpoint was produced in two stages: a full 10-epoch CUDA fine-tune over Rust-generated virtual BIO shards, followed by a light thin-runtime hard-case focus fine-tune.",
	"summary_zh": "当前发布 checkpoint 是两阶段产物：先用 Rust 生成的虚拟 BIO shard 做完整 10 epoch CUDA 微调，再做轻量薄层运行时困难样本微调。",
	"stages": [
	{
	"name": "dmhy-char-virtual-sps32-10epoch-lr1e5",
	"type": "full_dataset_finetune_with_rust_virtual_shards",
	"machine": "adqew@192.168.63.157",
	"data_file": "datasets/AnimeName/dmhy_weak_char.jsonl",
	"virtual_source_file": "data/generated/virtual_source_train_seed105.jsonl",
	"virtual_dataset_dir": "data/generated/virtual_char_sps32_seed105",
	"tokenizer_variant": "char",
	"vocab_file": "datasets/AnimeName/vocab.char.json",
	"vocab_size": 6199,
	"max_seq_length": 128,
	"source_rows": 619361,
	"special_fixture_rows": 935,
	"virtual_train_samples": 20439848,
	"eval_samples": 12641,
	"epochs": 10.0,
	"optimizer_steps": 114070,
	"batch_size": 1792,
	"learning_rate": 0.00001,
	"warmup_steps": 2000,
	"seed": 105,
	"device": "cuda",
	"mixed_precision": "bf16",
	"tf32": true,
	"dataloader_num_workers": 4,
	"virtual_generation": {
	"samples_per_source": 32,
	"separator_mode": "per-gap",
	"bracket_mode": "per-part",
	"include_original": true,
	"include_special_fixtures": true,
	"shard_size": 25000,
	"shards": 881,
	"elapsed_seconds": 31.55
	},
	"eval_f1": 0.9902097153862615,
	"eval_accuracy": 0.9978861640315251,
	"fixed_regression_model_only": "22/26",
	"fixed_regression_normalized_only": "23/26",
	"heldout_model_only": "1994/2048",
	"heldout_normalized_only": "2008/2048",
	"train_runtime_seconds": 21181.32,
	"train_tokens_per_second": 1236288.9470061918,
	"perf_gpu_util_avg": 96.14912280701755,
	"perf_gpu_util_max": 100.0,
	"role": "Base checkpoint for the final light hard-case focus stage. This is the full >100k-step virtual-shard training run."
	},
	{
	"name": "dmhy-char-virtual-sps32-10epoch-lightfocus",
	"type": "light_hard_case_focus_finetune",
	"machine": "adqew@192.168.63.157",
	"data_file": "data/generated/focus_after_virtual_sps32_char.jsonl",
	"tokenizer_variant": "char",
	"vocab_file": "datasets/AnimeName/vocab.char.json",
	"vocab_size": 6199,
	"max_seq_length": 128,
	"focus_source_rows": 140660,
	"train_samples": 133627,
	"eval_samples": 7033,
	"epochs": 1.0,
	"batch_size": 1792,
	"learning_rate": 0.000002,
	"warmup_steps": 20,
	"seed": 208,
	"device": "cuda",
	"mixed_precision": "bf16",
	"tf32": true,
	"eval_f1": 0.9843520993189067,
	"eval_accuracy": 0.9961191832100342,
	"fixed_regression_model_only": "24/26",
	"fixed_regression_normalized_only": "26/26",
	"heldout_model_only": "1962/2048",
	"heldout_normalized_only": "1988/2048",
	"perf_tokens_per_second_avg": 997645.0850819343,
	"perf_gpu_util_avg": 100.0,
	"role": "Published repository-root checkpoint. The default thin runtime also includes narrow postprocessing for bracketed search notes and release-promo title prefixes."
	}
	]
	}