Token Classification
Transformers
ONNX
Safetensors
English
Japanese
Chinese
bert
anime
filename-parsing
Eval Results (legacy)
Instructions to use ModerRAS/AniFileBERT with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ModerRAS/AniFileBERT with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("token-classification", model="ModerRAS/AniFileBERT")# Load model directly from transformers import AutoTokenizer, AutoModelForTokenClassification tokenizer = AutoTokenizer.from_pretrained("ModerRAS/AniFileBERT") model = AutoModelForTokenClassification.from_pretrained("ModerRAS/AniFileBERT") - Notebooks
- Google Colab
- Kaggle
| { | |
| "published_checkpoint": "repository_root", | |
| "summary": "The published checkpoint was produced in two stages: a full 10-epoch CUDA fine-tune over Rust-generated virtual BIO shards, followed by a light thin-runtime hard-case focus fine-tune.", | |
| "summary_zh": "当前发布 checkpoint 是两阶段产物:先用 Rust 生成的虚拟 BIO shard 做完整 10 epoch CUDA 微调,再做轻量薄层运行时困难样本微调。", | |
| "stages": [ | |
| { | |
| "name": "dmhy-char-virtual-sps32-10epoch-lr1e5", | |
| "type": "full_dataset_finetune_with_rust_virtual_shards", | |
| "machine": "adqew@192.168.63.157", | |
| "data_file": "datasets/AnimeName/dmhy_weak_char.jsonl", | |
| "virtual_source_file": "data/generated/virtual_source_train_seed105.jsonl", | |
| "virtual_dataset_dir": "data/generated/virtual_char_sps32_seed105", | |
| "tokenizer_variant": "char", | |
| "vocab_file": "datasets/AnimeName/vocab.char.json", | |
| "vocab_size": 6199, | |
| "max_seq_length": 128, | |
| "source_rows": 619361, | |
| "special_fixture_rows": 935, | |
| "virtual_train_samples": 20439848, | |
| "eval_samples": 12641, | |
| "epochs": 10.0, | |
| "optimizer_steps": 114070, | |
| "batch_size": 1792, | |
| "learning_rate": 0.00001, | |
| "warmup_steps": 2000, | |
| "seed": 105, | |
| "device": "cuda", | |
| "mixed_precision": "bf16", | |
| "tf32": true, | |
| "dataloader_num_workers": 4, | |
| "virtual_generation": { | |
| "samples_per_source": 32, | |
| "separator_mode": "per-gap", | |
| "bracket_mode": "per-part", | |
| "include_original": true, | |
| "include_special_fixtures": true, | |
| "shard_size": 25000, | |
| "shards": 881, | |
| "elapsed_seconds": 31.55 | |
| }, | |
| "eval_f1": 0.9902097153862615, | |
| "eval_accuracy": 0.9978861640315251, | |
| "fixed_regression_model_only": "22/26", | |
| "fixed_regression_normalized_only": "23/26", | |
| "heldout_model_only": "1994/2048", | |
| "heldout_normalized_only": "2008/2048", | |
| "train_runtime_seconds": 21181.32, | |
| "train_tokens_per_second": 1236288.9470061918, | |
| "perf_gpu_util_avg": 96.14912280701755, | |
| "perf_gpu_util_max": 100.0, | |
| "role": "Base checkpoint for the final light hard-case focus stage. This is the full >100k-step virtual-shard training run." | |
| }, | |
| { | |
| "name": "dmhy-char-virtual-sps32-10epoch-lightfocus", | |
| "type": "light_hard_case_focus_finetune", | |
| "machine": "adqew@192.168.63.157", | |
| "data_file": "data/generated/focus_after_virtual_sps32_char.jsonl", | |
| "tokenizer_variant": "char", | |
| "vocab_file": "datasets/AnimeName/vocab.char.json", | |
| "vocab_size": 6199, | |
| "max_seq_length": 128, | |
| "focus_source_rows": 140660, | |
| "train_samples": 133627, | |
| "eval_samples": 7033, | |
| "epochs": 1.0, | |
| "batch_size": 1792, | |
| "learning_rate": 0.000002, | |
| "warmup_steps": 20, | |
| "seed": 208, | |
| "device": "cuda", | |
| "mixed_precision": "bf16", | |
| "tf32": true, | |
| "eval_f1": 0.9843520993189067, | |
| "eval_accuracy": 0.9961191832100342, | |
| "fixed_regression_model_only": "24/26", | |
| "fixed_regression_normalized_only": "26/26", | |
| "heldout_model_only": "1962/2048", | |
| "heldout_normalized_only": "1988/2048", | |
| "perf_tokens_per_second_avg": 997645.0850819343, | |
| "perf_gpu_util_avg": 100.0, | |
| "role": "Published repository-root checkpoint. The default thin runtime also includes narrow postprocessing for bracketed search notes and release-promo title prefixes." | |
| } | |
| ] | |
| } | |