Token Classification
Transformers
ONNX
Safetensors
English
Japanese
Chinese
bert
anime
filename-parsing
Eval Results (legacy)
Instructions to use ModerRAS/AniFileBERT with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ModerRAS/AniFileBERT with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("token-classification", model="ModerRAS/AniFileBERT")# Load model directly from transformers import AutoTokenizer, AutoModelForTokenClassification tokenizer = AutoTokenizer.from_pretrained("ModerRAS/AniFileBERT") model = AutoModelForTokenClassification.from_pretrained("ModerRAS/AniFileBERT") - Notebooks
- Google Colab
- Kaggle
File size: 5,417 Bytes
0df0bf9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 | # -*- coding: utf-8 -*-
r"""Inspect a local AniFileBERT background training run.
Examples:
.\.venv\Scripts\python.exe -m tools.training_status --name schema_v2_cached_wrapper_train_skipcache
.\.venv\Scripts\python.exe -m tools.training_status --pid-file reports\schema_v2_cached_wrapper_train_skipcache.pid.txt --stdout logs\schema_v2_cached_wrapper_train_skipcache.out.log
"""
from __future__ import annotations
import argparse
import json
import os
from pathlib import Path
import subprocess
import sys
from typing import Any
try:
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
sys.stderr.reconfigure(encoding="utf-8", errors="replace")
except AttributeError:
pass
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Inspect local AniFileBERT training status")
parser.add_argument("--name", help="Run name used for reports/<name>.pid.txt and logs/<name>.out.log/.err.log")
parser.add_argument("--pid-file", help="PID file path")
parser.add_argument("--stdout", help="stdout log path")
parser.add_argument("--stderr", help="stderr log path")
parser.add_argument("--tail", type=int, default=80, help="Log lines to show")
parser.add_argument("--metrics", action="append", default=[], help="Metrics JSON path to summarize; can repeat")
return parser.parse_args()
def default_paths(name: str) -> tuple[Path, Path, Path]:
return (
Path("reports") / f"{name}.pid.txt",
Path("logs") / f"{name}.out.log",
Path("logs") / f"{name}.err.log",
)
def read_pid(path: Path) -> int | None:
try:
text = path.read_text(encoding="ascii").strip()
except FileNotFoundError:
return None
try:
return int(text)
except ValueError:
return None
def process_status(pid: int | None) -> dict[str, Any]:
if pid is None:
return {"pid": None, "running": False}
if os.name == "nt":
cmd = [
"powershell.exe",
"-NoProfile",
"-Command",
f"$p = Get-Process -Id {pid} -ErrorAction SilentlyContinue; "
"$p | Select-Object Id,ProcessName,CPU,WorkingSet | ConvertTo-Json -Compress",
]
proc = subprocess.run(cmd, capture_output=True, text=True, encoding="utf-8", errors="replace")
text = proc.stdout.strip()
if not text:
return {"pid": pid, "running": False}
try:
data = json.loads(text)
except json.JSONDecodeError:
return {"pid": pid, "running": True, "raw": text}
return {"pid": pid, "running": True, "process": data}
try:
os.kill(pid, 0)
except OSError:
return {"pid": pid, "running": False}
return {"pid": pid, "running": True}
def tail_lines(path: Path, count: int) -> list[str]:
if not path.is_file():
return []
with path.open("r", encoding="utf-8", errors="replace") as handle:
lines = handle.readlines()
return [line.rstrip("\n") for line in lines[-count:]]
def summarize_metrics(path: Path) -> dict[str, Any] | None:
if not path.is_file():
return None
data = json.loads(path.read_text(encoding="utf-8"))
summary: dict[str, Any] = {"path": str(path)}
if "modes" in data:
for mode_name, mode in data["modes"].items():
if "full_correct" in mode:
summary[mode_name] = {
"full_correct": mode.get("full_correct"),
"case_count": mode.get("case_count"),
"full_accuracy": mode.get("full_accuracy"),
"failures": [item.get("id") or item.get("filename") for item in mode.get("failures", [])[:10]],
}
elif "full_match_correct" in mode:
summary[mode_name] = {
"full_match_correct": mode.get("full_match_correct"),
"full_match_total": mode.get("full_match_total"),
"full_match_accuracy": mode.get("full_match_accuracy"),
"failures": [item.get("filename") for item in mode.get("failures", [])[:10]],
}
return summary
def main() -> None:
args = parse_args()
if args.name:
pid_file, stdout_path, stderr_path = default_paths(args.name)
else:
pid_file = Path(args.pid_file) if args.pid_file else Path()
stdout_path = Path(args.stdout) if args.stdout else Path()
stderr_path = Path(args.stderr) if args.stderr else Path()
if args.pid_file:
pid_file = Path(args.pid_file)
if args.stdout:
stdout_path = Path(args.stdout)
if args.stderr:
stderr_path = Path(args.stderr)
pid = read_pid(pid_file) if pid_file else None
print(json.dumps(process_status(pid), ensure_ascii=False, indent=2))
if stdout_path:
print(f"\n--- stdout tail: {stdout_path} ---")
for line in tail_lines(stdout_path, args.tail):
print(line)
if stderr_path:
print(f"\n--- stderr tail: {stderr_path} ---")
for line in tail_lines(stderr_path, args.tail):
print(line)
for metric in args.metrics:
summary = summarize_metrics(Path(metric))
if summary is not None:
print(f"\n--- metrics: {metric} ---")
print(json.dumps(summary, ensure_ascii=False, indent=2))
if __name__ == "__main__":
main()
|