Token Classification
Transformers
ONNX
Safetensors
English
Japanese
Chinese
bert
anime
filename-parsing
Eval Results (legacy)
Instructions to use ModerRAS/AniFileBERT with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ModerRAS/AniFileBERT with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("token-classification", model="ModerRAS/AniFileBERT")# Load model directly from transformers import AutoTokenizer, AutoModelForTokenClassification tokenizer = AutoTokenizer.from_pretrained("ModerRAS/AniFileBERT") model = AutoModelForTokenClassification.from_pretrained("ModerRAS/AniFileBERT") - Notebooks
- Google Colab
- Kaggle
Refine low-frequency DMHY labeling rules
Browse files
tools/rust_dmhy_template_apply/src/main.rs
CHANGED
|
@@ -178,6 +178,8 @@ static EPISODE_RE: Lazy<Regex> =
|
|
| 178 |
Lazy::new(|| Regex::new(r"(?i)^(?:EP?|#)?\d{1,4}(?:\.\d{1,2})?(?:END)?$").unwrap());
|
| 179 |
static DECIMAL_EPISODE_RE: Lazy<Regex> =
|
| 180 |
Lazy::new(|| Regex::new(r"^\d{1,3}\.\d{1,2}$").unwrap());
|
|
|
|
|
|
|
| 181 |
static EPISODE_CJK_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^第?\d{1,4}[话話回集]$").unwrap());
|
| 182 |
static EPISODE_CJK_PREFIX_RE: Lazy<Regex> =
|
| 183 |
Lazy::new(|| Regex::new(r"^第?\d{1,4}[话話回集]").unwrap());
|
|
@@ -190,12 +192,15 @@ static SXE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)^S\d{1,2}E\d{1,4}(?:v
|
|
| 190 |
static SXE_VALUE_RE: Lazy<Regex> =
|
| 191 |
Lazy::new(|| Regex::new(r"(?i)^S(\d{1,2})E(\d{1,4})(?:v(\d+))?$").unwrap());
|
| 192 |
static EPISODE_VALUE_RE: Lazy<Regex> =
|
| 193 |
-
Lazy::new(|| Regex::new(r"(?i)^(EP|E|#)(\d{1,4})(?:v(\d+))?$").unwrap());
|
| 194 |
static SEASON_RE: Lazy<Regex> = Lazy::new(|| {
|
| 195 |
Regex::new(r"(?i)^(?:S\d{1,2}|Season\s*\d{1,2}|第[一二三四五六七八九十\d]+[季期部])$").unwrap()
|
| 196 |
});
|
| 197 |
static CJK_SEASON_TOKEN_RE: Lazy<Regex> =
|
| 198 |
Lazy::new(|| Regex::new(r"^第[一二三四五六七八九十\d]+[季期部]$").unwrap());
|
|
|
|
|
|
|
|
|
|
| 199 |
static SEASON_VALUE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)^S(\d{1,2})$").unwrap());
|
| 200 |
static SPECIAL_RE: Lazy<Regex> = Lazy::new(|| {
|
| 201 |
Regex::new(r"(?i)^(?:(?:NCOP|NCED|OP|ED|PV|CM)(?:[\s_.-]?(?:\d{1,4}|v\d{1,3}|[A-Z]))?|SP(?:[\s_.-]?\d{0,4})?|(?:OVA|OAD|IV)(?:[\s_.-]?\d{0,4})?|(?:Menu|Intro|Preview|Trailer|Teaser|Animatics?)(?:[\s_.-]?(?:\d{0,4}|Ep\d{1,4}|[A-Z]))?)$").unwrap()
|
|
@@ -204,6 +209,11 @@ static VOLUME_RE: Lazy<Regex> =
|
|
| 204 |
Lazy::new(|| Regex::new(r"(?i)^(?:Vol(?:ume)?\.?|Disc|CD|BD|DVD|D)\s*\d{1,3}$").unwrap());
|
| 205 |
static DATE_RE: Lazy<Regex> =
|
| 206 |
Lazy::new(|| Regex::new(r"^(?:19|20)\d{2}(?:[._-]\d{1,2}){0,2}$").unwrap());
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
static LANG_RE: Lazy<Regex> = Lazy::new(|| {
|
| 208 |
Regex::new(r"(?i)^(?:CHS|CHT|ZHS|ZHT|GB|BIG5|JPN?|JP|JA|JAP|ENG|EN|SC|TC|简[体體]?|繁[体體]?|简日|繁日|字幕|内封|外挂|Sub|Subs|MSubs?)$").unwrap()
|
| 209 |
});
|
|
@@ -1338,7 +1348,10 @@ fn classify_atom(text: &str) -> String {
|
|
| 1338 |
if RESOLUTION_RE.is_match(&cleaned) {
|
| 1339 |
return "RESOLUTION".to_string();
|
| 1340 |
}
|
| 1341 |
-
if DATE_RE.is_match(&cleaned)
|
|
|
|
|
|
|
|
|
|
| 1342 |
return "DATE".to_string();
|
| 1343 |
}
|
| 1344 |
if EPISODE_VERSION_RE.is_match(&compact) {
|
|
@@ -1740,7 +1753,7 @@ fn has_encoding_noise(value: &str) -> bool {
|
|
| 1740 |
let markers = [
|
| 1741 |
"譁", "蜈", "螟", "蟄", "謇", "邱", "荳", "縺", "繧", "莨", "鬆", "髯", "瀛",
|
| 1742 |
"楀", "箷", "绲", "刔", "鏃", "湪", "鏍", "犲", "儚", "鐗", "吀", "铦", "躲",
|
| 1743 |
-
"伄", "椋", "伓", "姘",
|
| 1744 |
];
|
| 1745 |
let marker_hits = markers
|
| 1746 |
.iter()
|
|
@@ -1750,7 +1763,10 @@ fn has_encoding_noise(value: &str) -> bool {
|
|
| 1750 |
.chars()
|
| 1751 |
.filter(|ch| ('\u{ff61}'..='\u{ff9f}').contains(ch))
|
| 1752 |
.count();
|
| 1753 |
-
|
|
|
|
|
|
|
|
|
|
| 1754 |
}
|
| 1755 |
|
| 1756 |
fn has_non_anime_noise(value: &str) -> bool {
|
|
@@ -2096,8 +2112,12 @@ fn split_episode_token(token: &str) -> Option<(Vec<String>, Vec<String>)> {
|
|
| 2096 |
return Some((pieces, labels));
|
| 2097 |
}
|
| 2098 |
let caps = EPISODE_VALUE_RE.captures(token)?;
|
| 2099 |
-
let mut pieces = vec![caps[1].to_string()
|
| 2100 |
-
let mut labels = vec!["O".to_string()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2101 |
if let Some(version) = caps.get(3) {
|
| 2102 |
pieces.push("v".to_string());
|
| 2103 |
pieces.push(version.as_str().to_string());
|
|
@@ -2168,9 +2188,14 @@ fn is_special_title_phrase(text: &str) -> bool {
|
|
| 2168 |
| "TOKUTEN"
|
| 2169 |
| "TRAILER"
|
| 2170 |
| "WORLD PREMIERE"
|
|
|
|
|
|
|
| 2171 |
| "映像特典"
|
| 2172 |
| "特典"
|
| 2173 |
) || normalized.contains("映像特典")
|
|
|
|
|
|
|
|
|
|
| 2174 |
|| SPECIAL_TITLE_PHRASE_RE.is_match(text)
|
| 2175 |
}
|
| 2176 |
|
|
@@ -2181,16 +2206,29 @@ const KNOWN_TITLE_PHRASES: &[&[&str]] = &[
|
|
| 2181 |
&["Zom", "100"],
|
| 2182 |
&["Kamisama", "Hajimemashita", "2"],
|
| 2183 |
&["Phantasy", "Star", "Online", "2", "Episode", "Oracle"],
|
|
|
|
|
|
|
| 2184 |
];
|
| 2185 |
|
| 2186 |
fn apply_known_title_phrases(tokens: &[String], groups: &[Group], roles: &mut [String]) {
|
| 2187 |
if let Some(whitelists) = RUNTIME_WHITELISTS.get() {
|
| 2188 |
for (index, group) in groups.iter().enumerate() {
|
| 2189 |
if group.class_name == "BRACKET_TEXT"
|
| 2190 |
-
&& roles.get(index).is_some_and(|role| role == "GROUP")
|
| 2191 |
&& whitelists
|
| 2192 |
.group_names
|
| 2193 |
.contains(&normalize_whitelist_name(&group_text(tokens, group)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2194 |
{
|
| 2195 |
roles[index] = "GROUP".to_string();
|
| 2196 |
}
|
|
@@ -2231,7 +2269,14 @@ fn apply_title_phrase(
|
|
| 2231 |
{
|
| 2232 |
for (group_index, _) in window {
|
| 2233 |
if roles.get(*group_index).is_some_and(|role| role == "GROUP") {
|
| 2234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2235 |
}
|
| 2236 |
if !allow_structural_override
|
| 2237 |
&& roles.get(*group_index).is_some_and(|role| {
|
|
@@ -2345,6 +2390,24 @@ fn adjust_contextual_roles(tokens: &[String], groups: &[Group], roles: &[String]
|
|
| 2345 |
output[index] = "O".to_string();
|
| 2346 |
continue;
|
| 2347 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2348 |
if roles[index].starts_with("EPISODE") && (2..roles.len()).contains(&index) {
|
| 2349 |
let previous_text = group_text(tokens, &groups[index - 2]);
|
| 2350 |
let next_special = output[index + 1..roles.len().min(index + 4)]
|
|
@@ -2376,6 +2439,49 @@ fn adjust_contextual_roles(tokens: &[String], groups: &[Group], roles: &[String]
|
|
| 2376 |
output[index] = "SPECIAL".to_string();
|
| 2377 |
continue;
|
| 2378 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2379 |
if output[index - 2] == "TITLE"
|
| 2380 |
&& groups[index - 1].class_name == "SEP"
|
| 2381 |
&& previous_text.len() <= 48
|
|
@@ -2398,6 +2504,27 @@ fn adjust_contextual_roles(tokens: &[String], groups: &[Group], roles: &[String]
|
|
| 2398 |
continue;
|
| 2399 |
}
|
| 2400 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2401 |
if roles[index].starts_with("EPISODE")
|
| 2402 |
&& BARE_RESOLUTION_RE.is_match(&text)
|
| 2403 |
&& index >= 2
|
|
@@ -2474,6 +2601,17 @@ fn adjust_contextual_roles(tokens: &[String], groups: &[Group], roles: &[String]
|
|
| 2474 |
&& text.chars().any(|ch| ch.is_alphabetic())
|
| 2475 |
&& !ep_markers.contains(&text.as_str())
|
| 2476 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2477 |
if let Some(last_title) = output[..index].iter().rposition(|role| role == "TITLE") {
|
| 2478 |
let episode_since_title = output[last_title + 1..index]
|
| 2479 |
.iter()
|
|
@@ -2561,14 +2699,36 @@ fn adjust_contextual_roles(tokens: &[String], groups: &[Group], roles: &[String]
|
|
| 2561 |
} else {
|
| 2562 |
String::new()
|
| 2563 |
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2564 |
if previous_text.contains('点')
|
| 2565 |
|| previous_text.contains('點')
|
| 2566 |
|| previous_text.contains("晚上")
|
| 2567 |
|| previous_text.contains("上午")
|
| 2568 |
|| previous_text.contains("下午")
|
|
|
|
|
|
|
| 2569 |
|| next_text.contains('点')
|
| 2570 |
|| next_text.contains('點')
|
| 2571 |
|| next_text.contains('半')
|
|
|
|
|
|
|
| 2572 |
{
|
| 2573 |
output[index] = "O".to_string();
|
| 2574 |
}
|
|
@@ -2687,9 +2847,27 @@ fn title_candidate_score(tokens: &[String], groups: &[Group], start: usize, end:
|
|
| 2687 |
) {
|
| 2688 |
score -= 500;
|
| 2689 |
}
|
|
|
|
|
|
|
|
|
|
| 2690 |
score
|
| 2691 |
}
|
| 2692 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2693 |
fn roles_candidate_text_group(group: &Group) -> bool {
|
| 2694 |
matches!(group.class_name.as_str(), "TEXT" | "BRACKET_TEXT")
|
| 2695 |
}
|
|
@@ -2712,19 +2890,39 @@ fn normalize_generated_tokens(tokens: &[String], labels: &[String]) -> (Vec<Stri
|
|
| 2712 |
|
| 2713 |
fn normalize_title_token(token: &str) -> (Vec<String>, Vec<String>) {
|
| 2714 |
let pieces = split_generated_token(token);
|
| 2715 |
-
let
|
| 2716 |
-
|
| 2717 |
-
|
| 2718 |
-
|
| 2719 |
-
|
| 2720 |
-
|
| 2721 |
-
|
| 2722 |
-
|
| 2723 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2724 |
}
|
| 2725 |
-
|
| 2726 |
-
|
| 2727 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2728 |
}
|
| 2729 |
|
| 2730 |
fn split_generated_token(token: &str) -> Vec<String> {
|
|
@@ -2881,11 +3079,14 @@ fn smooth_title_spans(tokens: &[String], labels: &[String]) -> Vec<String> {
|
|
| 2881 |
right += 1;
|
| 2882 |
}
|
| 2883 |
if left >= 0 && right < tokens.len() {
|
| 2884 |
-
let left_label =
|
| 2885 |
-
let right_label =
|
| 2886 |
if left_label == right_label && matches!(left_label.as_str(), "B-TITLE" | "B-GROUP") {
|
| 2887 |
output[index] = left_label.clone();
|
| 2888 |
}
|
|
|
|
|
|
|
|
|
|
| 2889 |
}
|
| 2890 |
if title_terminal_punctuation.contains(&token.as_str()) && index > 0 {
|
| 2891 |
let left_label = &output[index - 1];
|
|
@@ -3183,6 +3384,47 @@ mod tests {
|
|
| 3183 |
assert!(decimal_episode.contains(&(".".to_string(), "B-EPISODE".to_string())));
|
| 3184 |
assert!(decimal_episode.contains(&("5".to_string(), "B-EPISODE".to_string())));
|
| 3185 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3186 |
let spy = labels_for("[Studio GreenTea] Spy x Family [38][WebRip][HEVC-10bit 1080p AAC ASSx2]");
|
| 3187 |
assert!(spy.contains(&("Studio".to_string(), "B-GROUP".to_string())));
|
| 3188 |
assert!(spy.contains(&("Spy".to_string(), "B-TITLE".to_string())));
|
|
@@ -3388,6 +3630,21 @@ mod tests {
|
|
| 3388 |
assert!(volume.contains(&("MENU02".to_string(), "B-SPECIAL".to_string())));
|
| 3389 |
assert!(!volume.contains(&("01".to_string(), "B-EPISODE".to_string())));
|
| 3390 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3391 |
let numeric_title =
|
| 3392 |
labels_for("3000.Leagues.in.Search.of.Mother.S01E01.1080p.WEB-DL.H.264-D00oo00M");
|
| 3393 |
assert!(numeric_title.contains(&("3000".to_string(), "B-TITLE".to_string())));
|
|
@@ -3404,5 +3661,49 @@ mod tests {
|
|
| 3404 |
assert!(media_block.contains(&("1080".to_string(), "B-RESOLUTION".to_string())));
|
| 3405 |
assert!(media_block.contains(&("x264".to_string(), "B-SOURCE".to_string())));
|
| 3406 |
assert!(media_block.contains(&("Chs".to_string(), "B-SOURCE".to_string())));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3407 |
}
|
| 3408 |
}
|
|
|
|
| 178 |
Lazy::new(|| Regex::new(r"(?i)^(?:EP?|#)?\d{1,4}(?:\.\d{1,2})?(?:END)?$").unwrap());
|
| 179 |
static DECIMAL_EPISODE_RE: Lazy<Regex> =
|
| 180 |
Lazy::new(|| Regex::new(r"^\d{1,3}\.\d{1,2}$").unwrap());
|
| 181 |
+
static NUMERIC_TITLE_PREFIX_RE: Lazy<Regex> =
|
| 182 |
+
Lazy::new(|| Regex::new(r"^\d{1,3}(?:[./-]\d{1,3})?$").unwrap());
|
| 183 |
static EPISODE_CJK_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^第?\d{1,4}[话話回集]$").unwrap());
|
| 184 |
static EPISODE_CJK_PREFIX_RE: Lazy<Regex> =
|
| 185 |
Lazy::new(|| Regex::new(r"^第?\d{1,4}[话話回集]").unwrap());
|
|
|
|
| 192 |
static SXE_VALUE_RE: Lazy<Regex> =
|
| 193 |
Lazy::new(|| Regex::new(r"(?i)^S(\d{1,2})E(\d{1,4})(?:v(\d+))?$").unwrap());
|
| 194 |
static EPISODE_VALUE_RE: Lazy<Regex> =
|
| 195 |
+
Lazy::new(|| Regex::new(r"(?i)^(EP|E|#)(\d{1,4}(?:\.\d{1,2})?)(?:v(\d+))?$").unwrap());
|
| 196 |
static SEASON_RE: Lazy<Regex> = Lazy::new(|| {
|
| 197 |
Regex::new(r"(?i)^(?:S\d{1,2}|Season\s*\d{1,2}|第[一二三四五六七八九十\d]+[季期部])$").unwrap()
|
| 198 |
});
|
| 199 |
static CJK_SEASON_TOKEN_RE: Lazy<Regex> =
|
| 200 |
Lazy::new(|| Regex::new(r"^第[一二三四五六七八九十\d]+[季期部]$").unwrap());
|
| 201 |
+
static CJK_SEASON_EMBEDDED_RE: Lazy<Regex> = Lazy::new(|| {
|
| 202 |
+
Regex::new(r"^(.+?)(第[一二三四五六七八九十\d]+[季期部])(.{0,12})$").unwrap()
|
| 203 |
+
});
|
| 204 |
static SEASON_VALUE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)^S(\d{1,2})$").unwrap());
|
| 205 |
static SPECIAL_RE: Lazy<Regex> = Lazy::new(|| {
|
| 206 |
Regex::new(r"(?i)^(?:(?:NCOP|NCED|OP|ED|PV|CM)(?:[\s_.-]?(?:\d{1,4}|v\d{1,3}|[A-Z]))?|SP(?:[\s_.-]?\d{0,4})?|(?:OVA|OAD|IV)(?:[\s_.-]?\d{0,4})?|(?:Menu|Intro|Preview|Trailer|Teaser|Animatics?)(?:[\s_.-]?(?:\d{0,4}|Ep\d{1,4}|[A-Z]))?)$").unwrap()
|
|
|
|
| 209 |
Lazy::new(|| Regex::new(r"(?i)^(?:Vol(?:ume)?\.?|Disc|CD|BD|DVD|D)\s*\d{1,3}$").unwrap());
|
| 210 |
static DATE_RE: Lazy<Regex> =
|
| 211 |
Lazy::new(|| Regex::new(r"^(?:19|20)\d{2}(?:[._-]\d{1,2}){0,2}$").unwrap());
|
| 212 |
+
static DATE_RANGE_MIXED_RE: Lazy<Regex> = Lazy::new(|| {
|
| 213 |
+
Regex::new(r"^(?:19|20)\d{2}(?:[._-]\d{1,2}){0,2}\s*[-~]\s*(?:19|20)\d{2}(?:[._-]\d{1,2}){0,2}$").unwrap()
|
| 214 |
+
});
|
| 215 |
+
static CJK_DATE_RE: Lazy<Regex> =
|
| 216 |
+
Lazy::new(|| Regex::new(r"^(?:19|20)\d{2}年\d{1,2}月\d{1,2}日$").unwrap());
|
| 217 |
static LANG_RE: Lazy<Regex> = Lazy::new(|| {
|
| 218 |
Regex::new(r"(?i)^(?:CHS|CHT|ZHS|ZHT|GB|BIG5|JPN?|JP|JA|JAP|ENG|EN|SC|TC|简[体體]?|繁[体體]?|简日|繁日|字幕|内封|外挂|Sub|Subs|MSubs?)$").unwrap()
|
| 219 |
});
|
|
|
|
| 1348 |
if RESOLUTION_RE.is_match(&cleaned) {
|
| 1349 |
return "RESOLUTION".to_string();
|
| 1350 |
}
|
| 1351 |
+
if DATE_RE.is_match(&cleaned)
|
| 1352 |
+
|| DATE_RANGE_MIXED_RE.is_match(&cleaned)
|
| 1353 |
+
|| CJK_DATE_RE.is_match(&cleaned)
|
| 1354 |
+
{
|
| 1355 |
return "DATE".to_string();
|
| 1356 |
}
|
| 1357 |
if EPISODE_VERSION_RE.is_match(&compact) {
|
|
|
|
| 1753 |
let markers = [
|
| 1754 |
"譁", "蜈", "螟", "蟄", "謇", "邱", "荳", "縺", "繧", "莨", "鬆", "髯", "瀛",
|
| 1755 |
"楀", "箷", "绲", "刔", "鏃", "湪", "鏍", "犲", "儚", "鐗", "吀", "铦", "躲",
|
| 1756 |
+
"伄", "椋", "伓", "姘", "帽",
|
| 1757 |
];
|
| 1758 |
let marker_hits = markers
|
| 1759 |
.iter()
|
|
|
|
| 1763 |
.chars()
|
| 1764 |
.filter(|ch| ('\u{ff61}'..='\u{ff9f}').contains(ch))
|
| 1765 |
.count();
|
| 1766 |
+
let latin_mojibake = value.split_whitespace().any(|part| {
|
| 1767 |
+
part.contains('帽') && part.chars().any(|ch| ch.is_ascii_alphabetic())
|
| 1768 |
+
});
|
| 1769 |
+
marker_hits >= 2 || (marker_hits >= 1 && halfwidth_hits >= 1) || latin_mojibake
|
| 1770 |
}
|
| 1771 |
|
| 1772 |
fn has_non_anime_noise(value: &str) -> bool {
|
|
|
|
| 2112 |
return Some((pieces, labels));
|
| 2113 |
}
|
| 2114 |
let caps = EPISODE_VALUE_RE.captures(token)?;
|
| 2115 |
+
let mut pieces = vec![caps[1].to_string()];
|
| 2116 |
+
let mut labels = vec!["O".to_string()];
|
| 2117 |
+
for piece in split_generated_token(&caps[2]) {
|
| 2118 |
+
pieces.push(piece);
|
| 2119 |
+
labels.push("B-EPISODE".to_string());
|
| 2120 |
+
}
|
| 2121 |
if let Some(version) = caps.get(3) {
|
| 2122 |
pieces.push("v".to_string());
|
| 2123 |
pieces.push(version.as_str().to_string());
|
|
|
|
| 2188 |
| "TOKUTEN"
|
| 2189 |
| "TRAILER"
|
| 2190 |
| "WORLD PREMIERE"
|
| 2191 |
+
| "番宣"
|
| 2192 |
+
| "宣番"
|
| 2193 |
| "映像特典"
|
| 2194 |
| "特典"
|
| 2195 |
) || normalized.contains("映像特典")
|
| 2196 |
+
|| normalized.contains("特典映像")
|
| 2197 |
+
|| normalized.contains("番宣")
|
| 2198 |
+
|| normalized.contains("宣番")
|
| 2199 |
|| SPECIAL_TITLE_PHRASE_RE.is_match(text)
|
| 2200 |
}
|
| 2201 |
|
|
|
|
| 2206 |
&["Zom", "100"],
|
| 2207 |
&["Kamisama", "Hajimemashita", "2"],
|
| 2208 |
&["Phantasy", "Star", "Online", "2", "Episode", "Oracle"],
|
| 2209 |
+
&["Lupin The Thrid Jigen Daisuke no Bohyou"],
|
| 2210 |
+
&["Lupin The Third Jigen Daisuke no Bohyou"],
|
| 2211 |
];
|
| 2212 |
|
| 2213 |
fn apply_known_title_phrases(tokens: &[String], groups: &[Group], roles: &mut [String]) {
|
| 2214 |
if let Some(whitelists) = RUNTIME_WHITELISTS.get() {
|
| 2215 |
for (index, group) in groups.iter().enumerate() {
|
| 2216 |
if group.class_name == "BRACKET_TEXT"
|
|
|
|
| 2217 |
&& whitelists
|
| 2218 |
.group_names
|
| 2219 |
.contains(&normalize_whitelist_name(&group_text(tokens, group)))
|
| 2220 |
+
&& !roles.get(index).is_some_and(|role| {
|
| 2221 |
+
matches!(
|
| 2222 |
+
role.as_str(),
|
| 2223 |
+
"EPISODE"
|
| 2224 |
+
| "EPISODE_VERSION"
|
| 2225 |
+
| "EPISODE_RANGE"
|
| 2226 |
+
| "SEASON"
|
| 2227 |
+
| "SOURCE"
|
| 2228 |
+
| "RESOLUTION"
|
| 2229 |
+
| "SPECIAL"
|
| 2230 |
+
)
|
| 2231 |
+
})
|
| 2232 |
{
|
| 2233 |
roles[index] = "GROUP".to_string();
|
| 2234 |
}
|
|
|
|
| 2269 |
{
|
| 2270 |
for (group_index, _) in window {
|
| 2271 |
if roles.get(*group_index).is_some_and(|role| role == "GROUP") {
|
| 2272 |
+
let is_known_group = RUNTIME_WHITELISTS.get().is_some_and(|whitelists| {
|
| 2273 |
+
whitelists
|
| 2274 |
+
.group_names
|
| 2275 |
+
.contains(&normalize_whitelist_name(&window[0].1))
|
| 2276 |
+
});
|
| 2277 |
+
if is_known_group {
|
| 2278 |
+
continue;
|
| 2279 |
+
}
|
| 2280 |
}
|
| 2281 |
if !allow_structural_override
|
| 2282 |
&& roles.get(*group_index).is_some_and(|role| {
|
|
|
|
| 2390 |
output[index] = "O".to_string();
|
| 2391 |
continue;
|
| 2392 |
}
|
| 2393 |
+
if roles[index].starts_with("EPISODE")
|
| 2394 |
+
&& index >= 1
|
| 2395 |
+
&& output[index - 1] == "TITLE"
|
| 2396 |
+
&& groups[index - 1].class_name != "SEP"
|
| 2397 |
+
&& text.chars().all(|ch| ch.is_ascii_digit())
|
| 2398 |
+
&& (text.len() <= 2
|
| 2399 |
+
|| (text.len() <= 3
|
| 2400 |
+
&& group_text(tokens, &groups[index - 1])
|
| 2401 |
+
.chars()
|
| 2402 |
+
.any(|ch| !ch.is_ascii())
|
| 2403 |
+
&& !group_text(tokens, &groups[index - 1]).ends_with('第')))
|
| 2404 |
+
&& roles[index + 1..]
|
| 2405 |
+
.iter()
|
| 2406 |
+
.any(|role| role.starts_with("EPISODE"))
|
| 2407 |
+
{
|
| 2408 |
+
output[index] = "TITLE".to_string();
|
| 2409 |
+
continue;
|
| 2410 |
+
}
|
| 2411 |
if roles[index].starts_with("EPISODE") && (2..roles.len()).contains(&index) {
|
| 2412 |
let previous_text = group_text(tokens, &groups[index - 2]);
|
| 2413 |
let next_special = output[index + 1..roles.len().min(index + 4)]
|
|
|
|
| 2439 |
output[index] = "SPECIAL".to_string();
|
| 2440 |
continue;
|
| 2441 |
}
|
| 2442 |
+
if index >= 1
|
| 2443 |
+
&& output[index - 1] == "TITLE"
|
| 2444 |
+
&& groups[index - 1].class_name != "SEP"
|
| 2445 |
+
&& text.chars().all(|ch| ch.is_ascii_digit())
|
| 2446 |
+
&& (text.len() <= 2
|
| 2447 |
+
|| (text.len() <= 3
|
| 2448 |
+
&& group_text(tokens, &groups[index - 1])
|
| 2449 |
+
.chars()
|
| 2450 |
+
.any(|ch| !ch.is_ascii())
|
| 2451 |
+
&& !group_text(tokens, &groups[index - 1]).ends_with('第')))
|
| 2452 |
+
&& roles[index + 1..]
|
| 2453 |
+
.iter()
|
| 2454 |
+
.any(|role| role.starts_with("EPISODE"))
|
| 2455 |
+
{
|
| 2456 |
+
output[index] = "TITLE".to_string();
|
| 2457 |
+
continue;
|
| 2458 |
+
}
|
| 2459 |
+
if !output[..index].iter().any(|role| role == "TITLE")
|
| 2460 |
+
&& NUMERIC_TITLE_PREFIX_RE.is_match(&text)
|
| 2461 |
+
&& output[..index].iter().any(|role| role == "GROUP")
|
| 2462 |
+
&& roles[index + 1..]
|
| 2463 |
+
.iter()
|
| 2464 |
+
.any(|role| role.starts_with("EPISODE"))
|
| 2465 |
+
{
|
| 2466 |
+
output[index] = "TITLE".to_string();
|
| 2467 |
+
continue;
|
| 2468 |
+
}
|
| 2469 |
+
if !output[..index].iter().any(|role| role == "TITLE")
|
| 2470 |
+
&& NUMERIC_TITLE_PREFIX_RE.is_match(&text)
|
| 2471 |
+
&& index + 2 < roles.len()
|
| 2472 |
+
&& groups[index + 1].class_name == "SEP"
|
| 2473 |
+
&& groups[index + 2].class_name == "TEXT"
|
| 2474 |
+
&& group_text(tokens, &groups[index + 2])
|
| 2475 |
+
.chars()
|
| 2476 |
+
.any(|ch| ch.is_alphabetic())
|
| 2477 |
+
&& roles[index + 3..]
|
| 2478 |
+
.iter()
|
| 2479 |
+
.any(|role| role.starts_with("EPISODE"))
|
| 2480 |
+
{
|
| 2481 |
+
output[index] = "TITLE".to_string();
|
| 2482 |
+
output[index + 2] = "TITLE".to_string();
|
| 2483 |
+
continue;
|
| 2484 |
+
}
|
| 2485 |
if output[index - 2] == "TITLE"
|
| 2486 |
&& groups[index - 1].class_name == "SEP"
|
| 2487 |
&& previous_text.len() <= 48
|
|
|
|
| 2504 |
continue;
|
| 2505 |
}
|
| 2506 |
}
|
| 2507 |
+
if roles[index].starts_with("EPISODE")
|
| 2508 |
+
&& text.chars().all(|ch| ch.is_ascii_digit())
|
| 2509 |
+
&& output[..index].iter().any(|role| role == "SPECIAL")
|
| 2510 |
+
&& !output[..index].iter().any(|role| role.starts_with("EPISODE"))
|
| 2511 |
+
{
|
| 2512 |
+
let previous_structural = (0..index)
|
| 2513 |
+
.rev()
|
| 2514 |
+
.find(|&cursor| groups[cursor].class_name != "SEP")
|
| 2515 |
+
.and_then(|cursor| output.get(cursor))
|
| 2516 |
+
.map(String::as_str);
|
| 2517 |
+
let next_real = (index + 1..roles.len())
|
| 2518 |
+
.find(|&cursor| groups[cursor].class_name != "SEP")
|
| 2519 |
+
.and_then(|cursor| roles.get(cursor))
|
| 2520 |
+
.map(String::as_str);
|
| 2521 |
+
if matches!(previous_structural, Some("SPECIAL"))
|
| 2522 |
+
&& !matches!(next_real, Some("TITLE" | "SEASON"))
|
| 2523 |
+
{
|
| 2524 |
+
output[index] = "SPECIAL".to_string();
|
| 2525 |
+
continue;
|
| 2526 |
+
}
|
| 2527 |
+
}
|
| 2528 |
if roles[index].starts_with("EPISODE")
|
| 2529 |
&& BARE_RESOLUTION_RE.is_match(&text)
|
| 2530 |
&& index >= 2
|
|
|
|
| 2601 |
&& text.chars().any(|ch| ch.is_alphabetic())
|
| 2602 |
&& !ep_markers.contains(&text.as_str())
|
| 2603 |
{
|
| 2604 |
+
if !output[..index].iter().any(|role| role == "TITLE") {
|
| 2605 |
+
let previous_structural = (0..index)
|
| 2606 |
+
.rev()
|
| 2607 |
+
.find(|&cursor| groups[cursor].class_name != "SEP")
|
| 2608 |
+
.and_then(|cursor| output.get(cursor))
|
| 2609 |
+
.map(String::as_str);
|
| 2610 |
+
if matches!(previous_structural, Some("SPECIAL")) {
|
| 2611 |
+
output[index] = "TITLE".to_string();
|
| 2612 |
+
continue;
|
| 2613 |
+
}
|
| 2614 |
+
}
|
| 2615 |
if let Some(last_title) = output[..index].iter().rposition(|role| role == "TITLE") {
|
| 2616 |
let episode_since_title = output[last_title + 1..index]
|
| 2617 |
.iter()
|
|
|
|
| 2699 |
} else {
|
| 2700 |
String::new()
|
| 2701 |
};
|
| 2702 |
+
if previous_text.ends_with('第') && next_text.starts_with('期') {
|
| 2703 |
+
output[index] = "SEASON".to_string();
|
| 2704 |
+
continue;
|
| 2705 |
+
}
|
| 2706 |
+
if output[..index].iter().any(|role| role == "TITLE")
|
| 2707 |
+
&& (output[..index]
|
| 2708 |
+
.iter()
|
| 2709 |
+
.enumerate()
|
| 2710 |
+
.any(|(cursor, role)| {
|
| 2711 |
+
role == "TITLE" && is_special_title_phrase(&group_text(tokens, &groups[cursor]))
|
| 2712 |
+
}))
|
| 2713 |
+
&& !output[..index].iter().any(|role| role.starts_with("EPISODE"))
|
| 2714 |
+
&& text.chars().all(|ch| ch.is_ascii_digit())
|
| 2715 |
+
&& text.len() <= 3
|
| 2716 |
+
{
|
| 2717 |
+
output[index] = "SPECIAL".to_string();
|
| 2718 |
+
continue;
|
| 2719 |
+
}
|
| 2720 |
if previous_text.contains('点')
|
| 2721 |
|| previous_text.contains('點')
|
| 2722 |
|| previous_text.contains("晚上")
|
| 2723 |
|| previous_text.contains("上午")
|
| 2724 |
|| previous_text.contains("下午")
|
| 2725 |
+
|| previous_text.contains('年')
|
| 2726 |
+
|| previous_text.contains('月')
|
| 2727 |
|| next_text.contains('点')
|
| 2728 |
|| next_text.contains('點')
|
| 2729 |
|| next_text.contains('半')
|
| 2730 |
+
|| next_text.contains('月')
|
| 2731 |
+
|| next_text.contains('日')
|
| 2732 |
{
|
| 2733 |
output[index] = "O".to_string();
|
| 2734 |
}
|
|
|
|
| 2847 |
) {
|
| 2848 |
score -= 500;
|
| 2849 |
}
|
| 2850 |
+
if title_noise_score_penalty(cleaned) {
|
| 2851 |
+
score -= 700;
|
| 2852 |
+
}
|
| 2853 |
score
|
| 2854 |
}
|
| 2855 |
|
| 2856 |
+
fn title_noise_score_penalty(text: &str) -> bool {
|
| 2857 |
+
let normalized = text
|
| 2858 |
+
.replace(['_', '-', '.'], " ")
|
| 2859 |
+
.split_whitespace()
|
| 2860 |
+
.collect::<Vec<_>>()
|
| 2861 |
+
.join(" ")
|
| 2862 |
+
.to_ascii_lowercase();
|
| 2863 |
+
normalized.contains("bdrip")
|
| 2864 |
+
|| normalized.contains("webrip")
|
| 2865 |
+
|| normalized.contains("web dl")
|
| 2866 |
+
|| normalized.contains("bluray")
|
| 2867 |
+
|| normalized.contains("full hd")
|
| 2868 |
+
|| normalized.contains("hdtv")
|
| 2869 |
+
}
|
| 2870 |
+
|
| 2871 |
fn roles_candidate_text_group(group: &Group) -> bool {
|
| 2872 |
matches!(group.class_name.as_str(), "TEXT" | "BRACKET_TEXT")
|
| 2873 |
}
|
|
|
|
| 2890 |
|
| 2891 |
fn normalize_title_token(token: &str) -> (Vec<String>, Vec<String>) {
|
| 2892 |
let pieces = split_generated_token(token);
|
| 2893 |
+
let mut output_pieces = Vec::new();
|
| 2894 |
+
let mut labels = Vec::new();
|
| 2895 |
+
for piece in pieces {
|
| 2896 |
+
if is_standalone_separator(&piece) {
|
| 2897 |
+
output_pieces.push(piece);
|
| 2898 |
+
labels.push("O".to_string());
|
| 2899 |
+
continue;
|
| 2900 |
+
}
|
| 2901 |
+
if CJK_SEASON_TOKEN_RE.is_match(&piece) {
|
| 2902 |
+
output_pieces.push(piece);
|
| 2903 |
+
labels.push("B-SEASON".to_string());
|
| 2904 |
+
continue;
|
| 2905 |
+
}
|
| 2906 |
+
if let Some(caps) = CJK_SEASON_EMBEDDED_RE.captures(&piece) {
|
| 2907 |
+
let before = caps.get(1).map(|m| m.as_str()).unwrap_or_default();
|
| 2908 |
+
let season = caps.get(2).map(|m| m.as_str()).unwrap_or_default();
|
| 2909 |
+
let after = caps.get(3).map(|m| m.as_str()).unwrap_or_default();
|
| 2910 |
+
if !before.is_empty() {
|
| 2911 |
+
output_pieces.push(before.to_string());
|
| 2912 |
+
labels.push("B-TITLE".to_string());
|
| 2913 |
}
|
| 2914 |
+
output_pieces.push(season.to_string());
|
| 2915 |
+
labels.push("B-SEASON".to_string());
|
| 2916 |
+
if !after.is_empty() {
|
| 2917 |
+
output_pieces.push(after.to_string());
|
| 2918 |
+
labels.push("O".to_string());
|
| 2919 |
+
}
|
| 2920 |
+
continue;
|
| 2921 |
+
}
|
| 2922 |
+
output_pieces.push(piece);
|
| 2923 |
+
labels.push("B-TITLE".to_string());
|
| 2924 |
+
}
|
| 2925 |
+
(output_pieces, labels)
|
| 2926 |
}
|
| 2927 |
|
| 2928 |
fn split_generated_token(token: &str) -> Vec<String> {
|
|
|
|
| 3079 |
right += 1;
|
| 3080 |
}
|
| 3081 |
if left >= 0 && right < tokens.len() {
|
| 3082 |
+
let left_label = output[left as usize].clone();
|
| 3083 |
+
let right_label = labels[right].clone();
|
| 3084 |
if left_label == right_label && matches!(left_label.as_str(), "B-TITLE" | "B-GROUP") {
|
| 3085 |
output[index] = left_label.clone();
|
| 3086 |
}
|
| 3087 |
+
if token == "." && left_label == "B-EPISODE" && right_label == "B-EPISODE" {
|
| 3088 |
+
output[index] = "B-EPISODE".to_string();
|
| 3089 |
+
}
|
| 3090 |
}
|
| 3091 |
if title_terminal_punctuation.contains(&token.as_str()) && index > 0 {
|
| 3092 |
let left_label = &output[index - 1];
|
|
|
|
| 3384 |
assert!(decimal_episode.contains(&(".".to_string(), "B-EPISODE".to_string())));
|
| 3385 |
assert!(decimal_episode.contains(&("5".to_string(), "B-EPISODE".to_string())));
|
| 3386 |
|
| 3387 |
+
let _ = RUNTIME_WHITELISTS.set(Whitelists {
|
| 3388 |
+
title_phrases: Vec::new(),
|
| 3389 |
+
group_names: [
|
| 3390 |
+
"LowPower-Raws".to_string(),
|
| 3391 |
+
"ANi".to_string(),
|
| 3392 |
+
"LoliHouse".to_string(),
|
| 3393 |
+
"QTS".to_string(),
|
| 3394 |
+
]
|
| 3395 |
+
.into_iter()
|
| 3396 |
+
.collect(),
|
| 3397 |
+
});
|
| 3398 |
+
let lowpower = labels_for("[LowPower-Raws] 91 Days - 01 (BD 720P x264 10bit AAC)");
|
| 3399 |
+
assert!(lowpower.contains(&("LowPower".to_string(), "B-GROUP".to_string())));
|
| 3400 |
+
assert!(lowpower.contains(&("91".to_string(), "B-TITLE".to_string())));
|
| 3401 |
+
assert!(lowpower.contains(&("Days".to_string(), "B-TITLE".to_string())));
|
| 3402 |
+
assert!(lowpower.contains(&("01".to_string(), "B-EPISODE".to_string())));
|
| 3403 |
+
|
| 3404 |
+
let ririsa = labels_for("[ANi] 2.5 次元的誘惑 - 01 [1080P][Baha][WEB-DL][AAC AVC][CHT]");
|
| 3405 |
+
assert!(ririsa.contains(&("2".to_string(), "B-TITLE".to_string())));
|
| 3406 |
+
assert!(ririsa.contains(&(".".to_string(), "B-TITLE".to_string())));
|
| 3407 |
+
assert!(ririsa.contains(&("5".to_string(), "B-TITLE".to_string())));
|
| 3408 |
+
assert!(ririsa.contains(&("次元的誘惑".to_string(), "B-TITLE".to_string())));
|
| 3409 |
+
assert!(ririsa.contains(&("01".to_string(), "B-EPISODE".to_string())));
|
| 3410 |
+
|
| 3411 |
+
let nanabun = labels_for("[LoliHouse] 22-7 - 01 [WebRip 1080p HEVC-10bit AAC ASS]");
|
| 3412 |
+
assert!(nanabun.contains(&("22".to_string(), "B-TITLE".to_string())));
|
| 3413 |
+
assert!(nanabun.contains(&("-".to_string(), "B-TITLE".to_string())));
|
| 3414 |
+
assert!(nanabun.contains(&("7".to_string(), "B-TITLE".to_string())));
|
| 3415 |
+
assert!(nanabun.contains(&("01".to_string(), "B-EPISODE".to_string())));
|
| 3416 |
+
|
| 3417 |
+
let saint = labels_for("[QTS] OVA Saint Seiya The Lost Canvas Meiou Shinwa ep 01 (BD H264 1920x1080 24fps FLAC)");
|
| 3418 |
+
assert!(saint.contains(&("OVA".to_string(), "B-SPECIAL".to_string())));
|
| 3419 |
+
assert!(saint.contains(&("Saint".to_string(), "B-TITLE".to_string())));
|
| 3420 |
+
assert!(saint.contains(&("Seiya".to_string(), "B-TITLE".to_string())));
|
| 3421 |
+
assert!(saint.contains(&("01".to_string(), "B-EPISODE".to_string())));
|
| 3422 |
+
|
| 3423 |
+
let gundam = labels_for("機動戦士ガンダム00 セカンドシーズン/Ep.01 「# 天使再臨」");
|
| 3424 |
+
assert!(gundam.contains(&("機動戦士ガンダム".to_string(), "B-TITLE".to_string())));
|
| 3425 |
+
assert!(gundam.contains(&("00".to_string(), "B-TITLE".to_string())));
|
| 3426 |
+
assert!(gundam.contains(&("01".to_string(), "B-EPISODE".to_string())));
|
| 3427 |
+
|
| 3428 |
let spy = labels_for("[Studio GreenTea] Spy x Family [38][WebRip][HEVC-10bit 1080p AAC ASSx2]");
|
| 3429 |
assert!(spy.contains(&("Studio".to_string(), "B-GROUP".to_string())));
|
| 3430 |
assert!(spy.contains(&("Spy".to_string(), "B-TITLE".to_string())));
|
|
|
|
| 3630 |
assert!(volume.contains(&("MENU02".to_string(), "B-SPECIAL".to_string())));
|
| 3631 |
assert!(!volume.contains(&("01".to_string(), "B-EPISODE".to_string())));
|
| 3632 |
|
| 3633 |
+
let aria_notice =
|
| 3634 |
+
labels_for("[KNA-Subs&ANK-Raws] 緋弾のアリアAA 番宣1 (BDrip 1920x1080 HEVC-YUV420P10 FLAC)");
|
| 3635 |
+
assert!(aria_notice.contains(&("緋弾のアリア".to_string(), "B-TITLE".to_string())));
|
| 3636 |
+
assert!(aria_notice.contains(&("番宣".to_string(), "B-SPECIAL".to_string())));
|
| 3637 |
+
assert!(aria_notice.contains(&("1".to_string(), "B-SPECIAL".to_string())));
|
| 3638 |
+
assert!(!aria_notice.contains(&("1".to_string(), "B-EPISODE".to_string())));
|
| 3639 |
+
|
| 3640 |
+
let lost_song =
|
| 3641 |
+
labels_for("[Snow-Raws] LOST SONG CM&PV 01(BD 1920x1080 HEVC-YUV420P10 FLAC)");
|
| 3642 |
+
assert!(lost_song.contains(&("LOST".to_string(), "B-TITLE".to_string())));
|
| 3643 |
+
assert!(lost_song.contains(&("CM".to_string(), "B-SPECIAL".to_string())));
|
| 3644 |
+
assert!(lost_song.contains(&("PV".to_string(), "B-SPECIAL".to_string())));
|
| 3645 |
+
assert!(lost_song.contains(&("01".to_string(), "B-SPECIAL".to_string())));
|
| 3646 |
+
assert!(!lost_song.contains(&("01".to_string(), "B-EPISODE".to_string())));
|
| 3647 |
+
|
| 3648 |
let numeric_title =
|
| 3649 |
labels_for("3000.Leagues.in.Search.of.Mother.S01E01.1080p.WEB-DL.H.264-D00oo00M");
|
| 3650 |
assert!(numeric_title.contains(&("3000".to_string(), "B-TITLE".to_string())));
|
|
|
|
| 3661 |
assert!(media_block.contains(&("1080".to_string(), "B-RESOLUTION".to_string())));
|
| 3662 |
assert!(media_block.contains(&("x264".to_string(), "B-SOURCE".to_string())));
|
| 3663 |
assert!(media_block.contains(&("Chs".to_string(), "B-SOURCE".to_string())));
|
| 3664 |
+
|
| 3665 |
+
let ge999 = labels_for("GE999 第024話 「次元航海惑星」1979年02月22日 (720x540 x264 AAC2)");
|
| 3666 |
+
assert!(ge999.contains(&("GE999".to_string(), "B-TITLE".to_string())));
|
| 3667 |
+
assert!(ge999.contains(&("024".to_string(), "B-EPISODE".to_string())));
|
| 3668 |
+
assert!(!ge999.contains(&("22".to_string(), "B-EPISODE".to_string())));
|
| 3669 |
+
|
| 3670 |
+
let galaxy = labels_for("銀河鉄道999 第024話 「次元航海惑星」 (DVD 640x480 WMV9)");
|
| 3671 |
+
assert!(galaxy.contains(&("銀河鉄道".to_string(), "B-TITLE".to_string())));
|
| 3672 |
+
assert!(galaxy.contains(&("999".to_string(), "B-TITLE".to_string())));
|
| 3673 |
+
assert!(galaxy.contains(&("024".to_string(), "B-EPISODE".to_string())));
|
| 3674 |
+
|
| 3675 |
+
let mahoro = labels_for("[POPGO][FREEWIND][Mahoro_Matic][Full_HD-BDRIP][01]");
|
| 3676 |
+
assert!(mahoro.contains(&("Mahoro".to_string(), "B-TITLE".to_string())));
|
| 3677 |
+
assert!(!mahoro.contains(&("Full".to_string(), "B-TITLE".to_string())));
|
| 3678 |
+
assert!(mahoro.contains(&("01".to_string(), "B-EPISODE".to_string())));
|
| 3679 |
+
|
| 3680 |
+
let kitaro = labels_for("[1985.10-1988.02] Kitaro_鬼太郎 第3期(ゲゲゲの鬼太郎)_TV 036 異次元妖怪かまなり");
|
| 3681 |
+
assert!(kitaro.contains(&("Kitaro".to_string(), "B-TITLE".to_string())));
|
| 3682 |
+
assert!(kitaro.contains(&("3".to_string(), "B-SEASON".to_string())));
|
| 3683 |
+
assert!(kitaro.contains(&("036".to_string(), "B-EPISODE".to_string())));
|
| 3684 |
+
assert!(!kitaro.contains(&("1985".to_string(), "B-EPISODE".to_string())));
|
| 3685 |
+
|
| 3686 |
+
let urusei = labels_for("Urusei_Yatsura_DVD_Ep042.5_Simu");
|
| 3687 |
+
assert!(urusei.contains(&("Urusei".to_string(), "B-TITLE".to_string())));
|
| 3688 |
+
assert!(urusei.contains(&("042".to_string(), "B-EPISODE".to_string())));
|
| 3689 |
+
assert!(urusei.contains(&(".".to_string(), "B-EPISODE".to_string())));
|
| 3690 |
+
assert!(urusei.contains(&("5".to_string(), "B-EPISODE".to_string())));
|
| 3691 |
+
|
| 3692 |
+
let lupin =
|
| 3693 |
+
labels_for("[Lupin The Thrid Jigen Daisuke no Bohyou][Logo][BDRIP][1080P][H264_FLAC]");
|
| 3694 |
+
assert!(lupin.contains(&("Lupin".to_string(), "B-TITLE".to_string())));
|
| 3695 |
+
assert!(!lupin.contains(&("Lupin".to_string(), "B-GROUP".to_string())));
|
| 3696 |
+
|
| 3697 |
+
let mirumo = labels_for("【咪路fans】魔法咪路咪路第二季日语版 01[GB][MP4]");
|
| 3698 |
+
assert!(mirumo.contains(&("魔法咪路咪路".to_string(), "B-TITLE".to_string())));
|
| 3699 |
+
assert!(mirumo.contains(&("第二季".to_string(), "B-SEASON".to_string())));
|
| 3700 |
+
assert!(mirumo.contains(&("01".to_string(), "B-EPISODE".to_string())));
|
| 3701 |
+
|
| 3702 |
+
let doremi_bonus = labels_for(
|
| 3703 |
+
"おジャ魔女どれみナ・イ・ショ 特典映像07「おジャ魔女どれみナ・イ・ショ エンドテロップ集」(DVD 640x480 )",
|
| 3704 |
+
);
|
| 3705 |
+
assert!(doremi_bonus.contains(&("おジャ魔女どれみナ".to_string(), "B-TITLE".to_string())));
|
| 3706 |
+
assert!(doremi_bonus.contains(&("07".to_string(), "B-SPECIAL".to_string())));
|
| 3707 |
+
assert!(!doremi_bonus.contains(&("07".to_string(), "B-EPISODE".to_string())));
|
| 3708 |
}
|
| 3709 |
}
|