Token Classification
Transformers
ONNX
Safetensors
English
Japanese
Chinese
bert
anime
filename-parsing
Eval Results (legacy)
Instructions to use ModerRAS/AniFileBERT with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ModerRAS/AniFileBERT with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("token-classification", model="ModerRAS/AniFileBERT")# Load model directly from transformers import AutoTokenizer, AutoModelForTokenClassification tokenizer = AutoTokenizer.from_pretrained("ModerRAS/AniFileBERT") model = AutoModelForTokenClassification.from_pretrained("ModerRAS/AniFileBERT") - Notebooks
- Google Colab
- Kaggle
Balance synthetic movie and special path bases
Browse files
tools/schema_v2_synthetic_augment/src/main.rs
CHANGED
|
@@ -963,7 +963,7 @@ fn path_seed_compatible_with_base(kind: PathSeedKind, base_kind: BaseMediaKind)
|
|
| 963 |
}
|
| 964 |
|
| 965 |
fn simple_path_leaf_records() -> Vec<Record> {
|
| 966 |
-
vec![
|
| 967 |
char_record_from_spans(
|
| 968 |
"03.mkv",
|
| 969 |
&[(0, 2, Entity::Episode)],
|
|
@@ -988,23 +988,67 @@ fn simple_path_leaf_records() -> Vec<Record> {
|
|
| 988 |
"path_leaf_sxe",
|
| 989 |
"path_aug",
|
| 990 |
),
|
| 991 |
-
|
| 992 |
-
|
| 993 |
-
|
| 994 |
-
|
| 995 |
-
|
| 996 |
-
|
| 997 |
-
|
| 998 |
-
|
| 999 |
-
|
| 1000 |
-
|
| 1001 |
-
|
| 1002 |
-
|
| 1003 |
-
|
| 1004 |
-
|
| 1005 |
-
|
| 1006 |
-
|
| 1007 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1008 |
}
|
| 1009 |
|
| 1010 |
fn char_record_from_spans(
|
|
|
|
| 963 |
}
|
| 964 |
|
| 965 |
fn simple_path_leaf_records() -> Vec<Record> {
|
| 966 |
+
let mut records = vec![
|
| 967 |
char_record_from_spans(
|
| 968 |
"03.mkv",
|
| 969 |
&[(0, 2, Entity::Episode)],
|
|
|
|
| 988 |
"path_leaf_sxe",
|
| 989 |
"path_aug",
|
| 990 |
),
|
| 991 |
+
];
|
| 992 |
+
|
| 993 |
+
const MOVIE_TAGS: [&str; 6] = [
|
| 994 |
+
"Movie",
|
| 995 |
+
"Gekijouban",
|
| 996 |
+
"The Movie",
|
| 997 |
+
"Film",
|
| 998 |
+
"BDMovie",
|
| 999 |
+
"劇場版",
|
| 1000 |
+
];
|
| 1001 |
+
const MOVIE_SUFFIXES: [&str; 12] = [
|
| 1002 |
+
"",
|
| 1003 |
+
" 01",
|
| 1004 |
+
" 02",
|
| 1005 |
+
" 2004",
|
| 1006 |
+
" 1080p",
|
| 1007 |
+
" BDRip",
|
| 1008 |
+
" WEB-DL",
|
| 1009 |
+
" Remux",
|
| 1010 |
+
" Complete",
|
| 1011 |
+
" Director Cut",
|
| 1012 |
+
" Part A",
|
| 1013 |
+
" Part B",
|
| 1014 |
+
];
|
| 1015 |
+
for (tag_idx, tag) in MOVIE_TAGS.iter().enumerate() {
|
| 1016 |
+
for (suffix_idx, suffix) in MOVIE_SUFFIXES.iter().enumerate() {
|
| 1017 |
+
let filename = format!("{tag}{suffix}.mkv");
|
| 1018 |
+
let template_id = format!("path_leaf_movie_{tag_idx}_{suffix_idx}");
|
| 1019 |
+
records.push(char_record_from_spans(
|
| 1020 |
+
&filename,
|
| 1021 |
+
&[(0, tag.len(), Entity::Tag)],
|
| 1022 |
+
"schema_v2_synthetic_path_leaf",
|
| 1023 |
+
&template_id,
|
| 1024 |
+
"path_leaf_movie",
|
| 1025 |
+
"path_aug",
|
| 1026 |
+
));
|
| 1027 |
+
}
|
| 1028 |
+
}
|
| 1029 |
+
|
| 1030 |
+
const SPECIAL_TAGS: [&str; 12] = [
|
| 1031 |
+
"NCOP", "NCOP2", "NCED", "NCED2", "OP", "OP2", "ED", "ED1", "PV", "CM", "Menu", "Trailer",
|
| 1032 |
+
];
|
| 1033 |
+
const SPECIAL_SUFFIXES: [&str; 8] =
|
| 1034 |
+
["", " 01", " 02", " A", " B", " Clean", " Creditless", " v2"];
|
| 1035 |
+
for (tag_idx, tag) in SPECIAL_TAGS.iter().enumerate() {
|
| 1036 |
+
for (suffix_idx, suffix) in SPECIAL_SUFFIXES.iter().enumerate() {
|
| 1037 |
+
let special_text = format!("{tag}{suffix}");
|
| 1038 |
+
let filename = format!("{special_text}.mkv");
|
| 1039 |
+
let template_id = format!("path_leaf_special_{tag_idx}_{suffix_idx}");
|
| 1040 |
+
records.push(char_record_from_spans(
|
| 1041 |
+
&filename,
|
| 1042 |
+
&[(0, special_text.len(), Entity::Special)],
|
| 1043 |
+
"schema_v2_synthetic_path_leaf",
|
| 1044 |
+
&template_id,
|
| 1045 |
+
"path_leaf_special",
|
| 1046 |
+
"path_aug",
|
| 1047 |
+
));
|
| 1048 |
+
}
|
| 1049 |
+
}
|
| 1050 |
+
|
| 1051 |
+
records
|
| 1052 |
}
|
| 1053 |
|
| 1054 |
fn char_record_from_spans(
|