Token Classification
Transformers
ONNX
Safetensors
English
Japanese
Chinese
bert
anime
filename-parsing
Eval Results (legacy)
Instructions to use ModerRAS/AniFileBERT with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ModerRAS/AniFileBERT with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("token-classification", model="ModerRAS/AniFileBERT")# Load model directly from transformers import AutoTokenizer, AutoModelForTokenClassification tokenizer = AutoTokenizer.from_pretrained("ModerRAS/AniFileBERT") model = AutoModelForTokenClassification.from_pretrained("ModerRAS/AniFileBERT") - Notebooks
- Google Colab
- Kaggle
| # -*- coding: utf-8 -*- | |
| r"""Inspect a local AniFileBERT background training run. | |
| Examples: | |
| .\.venv\Scripts\python.exe -m tools.training_status --name schema_v2_cached_wrapper_train_skipcache | |
| .\.venv\Scripts\python.exe -m tools.training_status --pid-file reports\schema_v2_cached_wrapper_train_skipcache.pid.txt --stdout logs\schema_v2_cached_wrapper_train_skipcache.out.log | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import json | |
| import os | |
| from pathlib import Path | |
| import subprocess | |
| import sys | |
| from typing import Any | |
| try: | |
| sys.stdout.reconfigure(encoding="utf-8", errors="replace") | |
| sys.stderr.reconfigure(encoding="utf-8", errors="replace") | |
| except AttributeError: | |
| pass | |
| def parse_args() -> argparse.Namespace: | |
| parser = argparse.ArgumentParser(description="Inspect local AniFileBERT training status") | |
| parser.add_argument("--name", help="Run name used for reports/<name>.pid.txt and logs/<name>.out.log/.err.log") | |
| parser.add_argument("--pid-file", help="PID file path") | |
| parser.add_argument("--stdout", help="stdout log path") | |
| parser.add_argument("--stderr", help="stderr log path") | |
| parser.add_argument("--tail", type=int, default=80, help="Log lines to show") | |
| parser.add_argument("--metrics", action="append", default=[], help="Metrics JSON path to summarize; can repeat") | |
| return parser.parse_args() | |
| def default_paths(name: str) -> tuple[Path, Path, Path]: | |
| return ( | |
| Path("reports") / f"{name}.pid.txt", | |
| Path("logs") / f"{name}.out.log", | |
| Path("logs") / f"{name}.err.log", | |
| ) | |
| def read_pid(path: Path) -> int | None: | |
| try: | |
| text = path.read_text(encoding="ascii").strip() | |
| except FileNotFoundError: | |
| return None | |
| try: | |
| return int(text) | |
| except ValueError: | |
| return None | |
| def process_status(pid: int | None) -> dict[str, Any]: | |
| if pid is None: | |
| return {"pid": None, "running": False} | |
| if os.name == "nt": | |
| cmd = [ | |
| "powershell.exe", | |
| "-NoProfile", | |
| "-Command", | |
| f"$p = Get-Process -Id {pid} -ErrorAction SilentlyContinue; " | |
| "$p | Select-Object Id,ProcessName,CPU,WorkingSet | ConvertTo-Json -Compress", | |
| ] | |
| proc = subprocess.run(cmd, capture_output=True, text=True, encoding="utf-8", errors="replace") | |
| text = proc.stdout.strip() | |
| if not text: | |
| return {"pid": pid, "running": False} | |
| try: | |
| data = json.loads(text) | |
| except json.JSONDecodeError: | |
| return {"pid": pid, "running": True, "raw": text} | |
| return {"pid": pid, "running": True, "process": data} | |
| try: | |
| os.kill(pid, 0) | |
| except OSError: | |
| return {"pid": pid, "running": False} | |
| return {"pid": pid, "running": True} | |
| def tail_lines(path: Path, count: int) -> list[str]: | |
| if not path.is_file(): | |
| return [] | |
| with path.open("r", encoding="utf-8", errors="replace") as handle: | |
| lines = handle.readlines() | |
| return [line.rstrip("\n") for line in lines[-count:]] | |
| def summarize_metrics(path: Path) -> dict[str, Any] | None: | |
| if not path.is_file(): | |
| return None | |
| data = json.loads(path.read_text(encoding="utf-8")) | |
| summary: dict[str, Any] = {"path": str(path)} | |
| if "modes" in data: | |
| for mode_name, mode in data["modes"].items(): | |
| if "full_correct" in mode: | |
| summary[mode_name] = { | |
| "full_correct": mode.get("full_correct"), | |
| "case_count": mode.get("case_count"), | |
| "full_accuracy": mode.get("full_accuracy"), | |
| "failures": [item.get("id") or item.get("filename") for item in mode.get("failures", [])[:10]], | |
| } | |
| elif "full_match_correct" in mode: | |
| summary[mode_name] = { | |
| "full_match_correct": mode.get("full_match_correct"), | |
| "full_match_total": mode.get("full_match_total"), | |
| "full_match_accuracy": mode.get("full_match_accuracy"), | |
| "failures": [item.get("filename") for item in mode.get("failures", [])[:10]], | |
| } | |
| return summary | |
| def main() -> None: | |
| args = parse_args() | |
| if args.name: | |
| pid_file, stdout_path, stderr_path = default_paths(args.name) | |
| else: | |
| pid_file = Path(args.pid_file) if args.pid_file else Path() | |
| stdout_path = Path(args.stdout) if args.stdout else Path() | |
| stderr_path = Path(args.stderr) if args.stderr else Path() | |
| if args.pid_file: | |
| pid_file = Path(args.pid_file) | |
| if args.stdout: | |
| stdout_path = Path(args.stdout) | |
| if args.stderr: | |
| stderr_path = Path(args.stderr) | |
| pid = read_pid(pid_file) if pid_file else None | |
| print(json.dumps(process_status(pid), ensure_ascii=False, indent=2)) | |
| if stdout_path: | |
| print(f"\n--- stdout tail: {stdout_path} ---") | |
| for line in tail_lines(stdout_path, args.tail): | |
| print(line) | |
| if stderr_path: | |
| print(f"\n--- stderr tail: {stderr_path} ---") | |
| for line in tail_lines(stderr_path, args.tail): | |
| print(line) | |
| for metric in args.metrics: | |
| summary = summarize_metrics(Path(metric)) | |
| if summary is not None: | |
| print(f"\n--- metrics: {metric} ---") | |
| print(json.dumps(summary, ensure_ascii=False, indent=2)) | |
| if __name__ == "__main__": | |
| main() | |