Token Classification
Transformers
ONNX
Safetensors
English
Japanese
Chinese
bert
anime
filename-parsing
Eval Results (legacy)
Instructions to use ModerRAS/AniFileBERT with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ModerRAS/AniFileBERT with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("token-classification", model="ModerRAS/AniFileBERT")# Load model directly from transformers import AutoTokenizer, AutoModelForTokenClassification tokenizer = AutoTokenizer.from_pretrained("ModerRAS/AniFileBERT") model = AutoModelForTokenClassification.from_pretrained("ModerRAS/AniFileBERT") - Notebooks
- Google Colab
- Kaggle
| """Append path-shaped char BIO focus examples. | |
| This helper is intentionally small: it builds a handful of deterministic path | |
| examples where leading directories are noise and the parseable entities appear | |
| in later path segments. | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import json | |
| from pathlib import Path | |
| def char_item(filename: str, spans: list[tuple[str, str]], source: str) -> dict[str, object]: | |
| tokens = list(filename) | |
| labels = ["O"] * len(tokens) | |
| cursor = 0 | |
| for text, entity in spans: | |
| start = filename.find(text, cursor) | |
| if start < 0: | |
| start = filename.find(text) | |
| if start < 0: | |
| raise ValueError(f"span {text!r} not found in {filename!r}") | |
| labels[start] = f"B-{entity}" | |
| for index in range(start + 1, start + len(text)): | |
| labels[index] = f"I-{entity}" | |
| cursor = start + len(text) | |
| return { | |
| "filename": filename, | |
| "tokens": tokens, | |
| "labels": labels, | |
| "tokenizer_variant": "char", | |
| "source": source, | |
| } | |
| def build_cases(source: str) -> list[dict[str, object]]: | |
| return [ | |
| char_item( | |
| r"Z:\Library\Anime\Shinsekai Yori\Extras\NCED02 [Ma10p_1080p][x265_flac].mkv", | |
| [ | |
| ("Shinsekai Yori", "TITLE"), | |
| ("NCED02", "SPECIAL"), | |
| ("1080p", "RESOLUTION"), | |
| ("x265_flac", "SOURCE"), | |
| ], | |
| source, | |
| ), | |
| char_item( | |
| r"O:\115open\Anime\Sousou no Frieren\Season 01\31 [1080P][Baha][WEB-DL].mkv", | |
| [ | |
| ("Sousou no Frieren", "TITLE"), | |
| ("Season 01", "SEASON"), | |
| ("31", "EPISODE"), | |
| ("1080P", "RESOLUTION"), | |
| ("Baha", "SOURCE"), | |
| ("WEB-DL", "SOURCE"), | |
| ], | |
| source, | |
| ), | |
| char_item( | |
| r"/mnt/media/anime/Bangumi/One Piece/Season 21/1110 [1080p][WEB-DL].mkv", | |
| [ | |
| ("One Piece", "TITLE"), | |
| ("Season 21", "SEASON"), | |
| ("1110", "EPISODE"), | |
| ("1080p", "RESOLUTION"), | |
| ("WEB-DL", "SOURCE"), | |
| ], | |
| source, | |
| ), | |
| char_item( | |
| r"D:\Media\Anime\completed\Witch Watch\S01\15 [1080p][CHS].mkv", | |
| [ | |
| ("Witch Watch", "TITLE"), | |
| ("S01", "SEASON"), | |
| ("15", "EPISODE"), | |
| ("1080p", "RESOLUTION"), | |
| ("CHS", "SOURCE"), | |
| ], | |
| source, | |
| ), | |
| char_item( | |
| r"O:\115open\Anime\Kakuriyo no Yadomeshi\Season 02\12 [WebRip 1080p].mkv", | |
| [ | |
| ("Kakuriyo no Yadomeshi", "TITLE"), | |
| ("Season 02", "SEASON"), | |
| ("12", "EPISODE"), | |
| ("WebRip", "SOURCE"), | |
| ("1080p", "RESOLUTION"), | |
| ], | |
| source, | |
| ), | |
| char_item( | |
| r"C:\Archive\old\misc\One Piece\Season 21\One.Piece.1110.1080p.WEB-DL.AAC2.0.H.264.mkv", | |
| [ | |
| ("One Piece", "TITLE"), | |
| ("Season 21", "SEASON"), | |
| ("1110", "EPISODE"), | |
| ("1080p", "RESOLUTION"), | |
| ("WEB-DL", "SOURCE"), | |
| ], | |
| source, | |
| ), | |
| ] | |
| def main() -> None: | |
| parser = argparse.ArgumentParser(description=__doc__) | |
| parser.add_argument("--output", required=True) | |
| parser.add_argument("--repeat", type=int, default=96) | |
| parser.add_argument("--source", default="manual_path_focus") | |
| parser.add_argument("--append", action="store_true") | |
| args = parser.parse_args() | |
| output = Path(args.output) | |
| output.parent.mkdir(parents=True, exist_ok=True) | |
| mode = "a" if args.append else "w" | |
| cases = build_cases(args.source) | |
| with output.open(mode, encoding="utf-8") as handle: | |
| for _ in range(args.repeat): | |
| for item in cases: | |
| handle.write(json.dumps(item, ensure_ascii=False, separators=(",", ":")) + "\n") | |
| print( | |
| json.dumps( | |
| { | |
| "output": str(output), | |
| "repeat": args.repeat, | |
| "case_count": len(cases), | |
| "written_rows": args.repeat * len(cases), | |
| "append": args.append, | |
| }, | |
| ensure_ascii=False, | |
| indent=2, | |
| ) | |
| ) | |
| if __name__ == "__main__": | |
| main() | |