Token Classification
Transformers
ONNX
Safetensors
English
Japanese
Chinese
bert
anime
filename-parsing
Eval Results (legacy)
Instructions to use ModerRAS/AniFileBERT with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ModerRAS/AniFileBERT with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("token-classification", model="ModerRAS/AniFileBERT")# Load model directly from transformers import AutoTokenizer, AutoModelForTokenClassification tokenizer = AutoTokenizer.from_pretrained("ModerRAS/AniFileBERT") model = AutoModelForTokenClassification.from_pretrained("ModerRAS/AniFileBERT") - Notebooks
- Google Colab
- Kaggle
| """Benchmark AniFileBERT PyTorch and ONNX Runtime inference. | |
| The benchmark measures end-to-end parser latency after model/session loading. | |
| It includes tokenization, model forward pass, constrained BIO decoding, and | |
| field postprocessing. | |
| """ | |
| import argparse | |
| import json | |
| import statistics | |
| import time | |
| from pathlib import Path | |
| from typing import Callable, Dict, List | |
| import torch | |
| import onnxruntime as ort | |
| from transformers import BertForTokenClassification | |
| from anifilebert.config import Config | |
| from tools.evaluate_parser_cases import DEFAULT_CASE_FILE, load_cases | |
| from anifilebert.inference import parse_filename | |
| from tools.onnx_inference import OnnxFilenameParser | |
| from anifilebert.tokenizer import load_tokenizer | |
| DEFAULT_OUTPUT_FILE = Path("reports") / "benchmark_results.json" | |
| def percentile(values: List[float], pct: float) -> float: | |
| if not values: | |
| return 0.0 | |
| ordered = sorted(values) | |
| index = (len(ordered) - 1) * pct | |
| lower = int(index) | |
| upper = min(lower + 1, len(ordered) - 1) | |
| if lower == upper: | |
| return ordered[lower] | |
| weight = index - lower | |
| return ordered[lower] * (1 - weight) + ordered[upper] * weight | |
| def summarize(name: str, load_ms: float, latencies_ms: List[float]) -> Dict: | |
| total_ms = sum(latencies_ms) | |
| count = len(latencies_ms) | |
| return { | |
| "name": name, | |
| "load_ms": load_ms, | |
| "runs": count, | |
| "avg_ms": statistics.fmean(latencies_ms) if latencies_ms else 0.0, | |
| "p50_ms": percentile(latencies_ms, 0.50), | |
| "p95_ms": percentile(latencies_ms, 0.95), | |
| "p99_ms": percentile(latencies_ms, 0.99), | |
| "min_ms": min(latencies_ms) if latencies_ms else 0.0, | |
| "max_ms": max(latencies_ms) if latencies_ms else 0.0, | |
| "throughput_fps": (count / (total_ms / 1000.0)) if total_ms > 0 else 0.0, | |
| } | |
| def run_benchmark( | |
| name: str, | |
| parser_fn: Callable[[str], Dict], | |
| filenames: List[str], | |
| warmup: int, | |
| repeat: int, | |
| ) -> Dict: | |
| for idx in range(warmup): | |
| parser_fn(filenames[idx % len(filenames)]) | |
| latencies: List[float] = [] | |
| for _ in range(repeat): | |
| for filename in filenames: | |
| start = time.perf_counter() | |
| parser_fn(filename) | |
| latencies.append((time.perf_counter() - start) * 1000.0) | |
| return {"name": name, "latencies_ms": latencies} | |
| def load_case_filenames(case_file: str, limit: int | None) -> List[str]: | |
| cases = load_cases(case_file) | |
| filenames = [case["filename"] for case in cases if case.get("filename")] | |
| if limit is not None and limit > 0: | |
| filenames = filenames[:limit] | |
| if not filenames: | |
| raise ValueError(f"No filenames found in {case_file}") | |
| return filenames | |
| def main() -> None: | |
| parser = argparse.ArgumentParser(description="Benchmark AniFileBERT inference speed") | |
| parser.add_argument("--model-dir", default=".", help="Directory containing the PyTorch checkpoint") | |
| parser.add_argument("--onnx", default="exports/anime_filename_parser.onnx", help="ONNX model path") | |
| parser.add_argument("--case-file", default=DEFAULT_CASE_FILE, help="JSON regression case file") | |
| parser.add_argument("--max-length", type=int, default=None, help="Override sequence length") | |
| parser.add_argument("--limit-cases", type=int, default=None, help="Use only the first N cases") | |
| parser.add_argument("--repeat", type=int, default=5, help="Repeat the case set this many times") | |
| parser.add_argument("--warmup", type=int, default=10, help="Warmup parses per backend") | |
| parser.add_argument("--backend", choices=["both", "torch", "onnx"], default="both") | |
| parser.add_argument("--torch-threads", type=int, default=1, help="torch intra-op thread count") | |
| parser.add_argument("--ort-threads", type=int, default=1, help="ONNX Runtime intra/inter-op thread count") | |
| parser.add_argument("--no-constrained-bio", action="store_true", help="Use greedy labels for PyTorch backend") | |
| parser.add_argument("--output", default=str(DEFAULT_OUTPUT_FILE), help="JSON output path") | |
| args = parser.parse_args() | |
| filenames = load_case_filenames(args.case_file, args.limit_cases) | |
| model_dir = Path(args.model_dir) | |
| max_length = args.max_length | |
| if args.torch_threads and args.torch_threads > 0: | |
| torch.set_num_threads(args.torch_threads) | |
| torch.set_num_interop_threads(args.torch_threads) | |
| results: List[Dict] = [] | |
| if args.backend in {"both", "torch"}: | |
| cfg = Config() | |
| load_start = time.perf_counter() | |
| tokenizer = load_tokenizer(str(model_dir)) | |
| model = BertForTokenClassification.from_pretrained(model_dir) | |
| model.eval() | |
| resolved_max_length = max_length or int(getattr(model.config, "max_seq_length", 128)) | |
| id2label = {int(k): v for k, v in getattr(model.config, "id2label", cfg.id2label).items()} | |
| load_ms = (time.perf_counter() - load_start) * 1000.0 | |
| def parse_torch(filename: str) -> Dict: | |
| return parse_filename( | |
| filename, | |
| model, | |
| tokenizer, | |
| id2label, | |
| max_length=resolved_max_length, | |
| debug=False, | |
| constrain_bio=not args.no_constrained_bio, | |
| ) | |
| raw = run_benchmark("pytorch", parse_torch, filenames, args.warmup, args.repeat) | |
| results.append(summarize(raw["name"], load_ms, raw["latencies_ms"])) | |
| if args.backend in {"both", "onnx"}: | |
| session_options = ort.SessionOptions() | |
| if args.ort_threads and args.ort_threads > 0: | |
| session_options.intra_op_num_threads = args.ort_threads | |
| session_options.inter_op_num_threads = args.ort_threads | |
| load_start = time.perf_counter() | |
| onnx_parser = OnnxFilenameParser( | |
| model_dir=model_dir, | |
| onnx_path=Path(args.onnx), | |
| max_length=max_length or 128, | |
| session_options=session_options, | |
| ) | |
| load_ms = (time.perf_counter() - load_start) * 1000.0 | |
| def parse_onnx(filename: str) -> Dict: | |
| return onnx_parser.parse(filename) | |
| raw = run_benchmark("onnxruntime", parse_onnx, filenames, args.warmup, args.repeat) | |
| results.append(summarize(raw["name"], load_ms, raw["latencies_ms"])) | |
| report = { | |
| "model_dir": str(model_dir), | |
| "onnx": args.onnx, | |
| "case_file": args.case_file, | |
| "case_count": len(filenames), | |
| "repeat": args.repeat, | |
| "warmup": args.warmup, | |
| "torch_threads": args.torch_threads, | |
| "ort_threads": args.ort_threads, | |
| "constrain_bio": not args.no_constrained_bio, | |
| "results": results, | |
| } | |
| print(json.dumps(report, ensure_ascii=False, indent=2)) | |
| print("\nSummary:") | |
| print("| Backend | Load ms | Avg ms | P50 ms | P95 ms | P99 ms | Throughput files/s |") | |
| print("| --- | ---: | ---: | ---: | ---: | ---: | ---: |") | |
| for item in results: | |
| print( | |
| f"| {item['name']} | {item['load_ms']:.2f} | {item['avg_ms']:.3f} | " | |
| f"{item['p50_ms']:.3f} | {item['p95_ms']:.3f} | {item['p99_ms']:.3f} | " | |
| f"{item['throughput_fps']:.1f} |" | |
| ) | |
| output_path = Path(args.output) | |
| output_path.parent.mkdir(parents=True, exist_ok=True) | |
| output_path.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8") | |
| if __name__ == "__main__": | |
| main() | |