ModerRAS
/

AniFileBERT

@@ -178,6 +178,8 @@ static EPISODE_RE: Lazy<Regex> =
     Lazy::new(|| Regex::new(r"(?i)^(?:EP?|#)?\d{1,4}(?:\.\d{1,2})?(?:END)?$").unwrap());
 static DECIMAL_EPISODE_RE: Lazy<Regex> =
     Lazy::new(|| Regex::new(r"^\d{1,3}\.\d{1,2}$").unwrap());
 static EPISODE_CJK_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^第?\d{1,4}[话話回集]$").unwrap());
 static EPISODE_CJK_PREFIX_RE: Lazy<Regex> =
     Lazy::new(|| Regex::new(r"^第?\d{1,4}[话話回集]").unwrap());
@@ -190,12 +192,15 @@ static SXE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)^S\d{1,2}E\d{1,4}(?:v
 static SXE_VALUE_RE: Lazy<Regex> =
     Lazy::new(|| Regex::new(r"(?i)^S(\d{1,2})E(\d{1,4})(?:v(\d+))?$").unwrap());
 static EPISODE_VALUE_RE: Lazy<Regex> =
-    Lazy::new(|| Regex::new(r"(?i)^(EP|E|#)(\d{1,4})(?:v(\d+))?$").unwrap());
 static SEASON_RE: Lazy<Regex> = Lazy::new(|| {
     Regex::new(r"(?i)^(?:S\d{1,2}|Season\s*\d{1,2}|第[一二三四五六七八九十\d]+[季期部])$").unwrap()
 });
 static CJK_SEASON_TOKEN_RE: Lazy<Regex> =
     Lazy::new(|| Regex::new(r"^第[一二三四五六七八九十\d]+[季期部]$").unwrap());
 static SEASON_VALUE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)^S(\d{1,2})$").unwrap());
 static SPECIAL_RE: Lazy<Regex> = Lazy::new(|| {
     Regex::new(r"(?i)^(?:(?:NCOP|NCED|OP|ED|PV|CM)(?:[\s_.-]?(?:\d{1,4}|v\d{1,3}|[A-Z]))?|SP(?:[\s_.-]?\d{0,4})?|(?:OVA|OAD|IV)(?:[\s_.-]?\d{0,4})?|(?:Menu|Intro|Preview|Trailer|Teaser|Animatics?)(?:[\s_.-]?(?:\d{0,4}|Ep\d{1,4}|[A-Z]))?)$").unwrap()
@@ -204,6 +209,11 @@ static VOLUME_RE: Lazy<Regex> =
     Lazy::new(|| Regex::new(r"(?i)^(?:Vol(?:ume)?\.?|Disc|CD|BD|DVD|D)\s*\d{1,3}$").unwrap());
 static DATE_RE: Lazy<Regex> =
     Lazy::new(|| Regex::new(r"^(?:19|20)\d{2}(?:[._-]\d{1,2}){0,2}$").unwrap());
 static LANG_RE: Lazy<Regex> = Lazy::new(|| {
     Regex::new(r"(?i)^(?:CHS|CHT|ZHS|ZHT|GB|BIG5|JPN?|JP|JA|JAP|ENG|EN|SC|TC|简[体體]?|繁[体體]?|简日|繁日|字幕|内封|外挂|Sub|Subs|MSubs?)$").unwrap()
 });
@@ -1338,7 +1348,10 @@ fn classify_atom(text: &str) -> String {
     if RESOLUTION_RE.is_match(&cleaned) {
         return "RESOLUTION".to_string();
     }
-    if DATE_RE.is_match(&cleaned) {
         return "DATE".to_string();
     }
     if EPISODE_VERSION_RE.is_match(&compact) {
@@ -1740,7 +1753,7 @@ fn has_encoding_noise(value: &str) -> bool {
     let markers = [
         "譁", "蜈", "螟", "蟄", "謇", "邱", "荳", "縺", "繧", "莨", "鬆", "髯", "瀛",
         "楀", "箷", "绲", "刔", "鏃", "湪", "鏍", "犲", "儚", "鐗", "吀", "铦", "躲",
-        "伄", "椋", "伓", "姘",
     ];
     let marker_hits = markers
         .iter()
@@ -1750,7 +1763,10 @@ fn has_encoding_noise(value: &str) -> bool {
         .chars()
         .filter(|ch| ('\u{ff61}'..='\u{ff9f}').contains(ch))
         .count();
-    marker_hits >= 2 || (marker_hits >= 1 && halfwidth_hits >= 1)
 }
 fn has_non_anime_noise(value: &str) -> bool {
@@ -2096,8 +2112,12 @@ fn split_episode_token(token: &str) -> Option<(Vec<String>, Vec<String>)> {
         return Some((pieces, labels));
     }
     let caps = EPISODE_VALUE_RE.captures(token)?;
-    let mut pieces = vec![caps[1].to_string(), caps[2].to_string()];
-    let mut labels = vec!["O".to_string(), "B-EPISODE".to_string()];
     if let Some(version) = caps.get(3) {
         pieces.push("v".to_string());
         pieces.push(version.as_str().to_string());
@@ -2168,9 +2188,14 @@ fn is_special_title_phrase(text: &str) -> bool {
             | "TOKUTEN"
             | "TRAILER"
             | "WORLD PREMIERE"
             | "映像特典"
             | "特典"
     ) || normalized.contains("映像特典")
         || SPECIAL_TITLE_PHRASE_RE.is_match(text)
 }
@@ -2181,16 +2206,29 @@ const KNOWN_TITLE_PHRASES: &[&[&str]] = &[
     &["Zom", "100"],
     &["Kamisama", "Hajimemashita", "2"],
     &["Phantasy", "Star", "Online", "2", "Episode", "Oracle"],
 ];
 fn apply_known_title_phrases(tokens: &[String], groups: &[Group], roles: &mut [String]) {
     if let Some(whitelists) = RUNTIME_WHITELISTS.get() {
         for (index, group) in groups.iter().enumerate() {
             if group.class_name == "BRACKET_TEXT"
-                && roles.get(index).is_some_and(|role| role == "GROUP")
                 && whitelists
                     .group_names
                     .contains(&normalize_whitelist_name(&group_text(tokens, group)))
             {
                 roles[index] = "GROUP".to_string();
             }
@@ -2231,7 +2269,14 @@ fn apply_title_phrase(
         {
             for (group_index, _) in window {
                 if roles.get(*group_index).is_some_and(|role| role == "GROUP") {
-                    continue;
                 }
                 if !allow_structural_override
                     && roles.get(*group_index).is_some_and(|role| {
@@ -2345,6 +2390,24 @@ fn adjust_contextual_roles(tokens: &[String], groups: &[Group], roles: &[String]
             output[index] = "O".to_string();
             continue;
         }
         if roles[index].starts_with("EPISODE") && (2..roles.len()).contains(&index) {
             let previous_text = group_text(tokens, &groups[index - 2]);
             let next_special = output[index + 1..roles.len().min(index + 4)]
@@ -2376,6 +2439,49 @@ fn adjust_contextual_roles(tokens: &[String], groups: &[Group], roles: &[String]
                 output[index] = "SPECIAL".to_string();
                 continue;
             }
             if output[index - 2] == "TITLE"
                 && groups[index - 1].class_name == "SEP"
                 && previous_text.len() <= 48
@@ -2398,6 +2504,27 @@ fn adjust_contextual_roles(tokens: &[String], groups: &[Group], roles: &[String]
                 continue;
             }
         }
         if roles[index].starts_with("EPISODE")
             && BARE_RESOLUTION_RE.is_match(&text)
             && index >= 2
@@ -2474,6 +2601,17 @@ fn adjust_contextual_roles(tokens: &[String], groups: &[Group], roles: &[String]
             && text.chars().any(|ch| ch.is_alphabetic())
             && !ep_markers.contains(&text.as_str())
         {
             if let Some(last_title) = output[..index].iter().rposition(|role| role == "TITLE") {
                 let episode_since_title = output[last_title + 1..index]
                     .iter()
@@ -2561,14 +2699,36 @@ fn adjust_contextual_roles(tokens: &[String], groups: &[Group], roles: &[String]
             } else {
                 String::new()
             };
             if previous_text.contains('点')
                 || previous_text.contains('點')
                 || previous_text.contains("晚上")
                 || previous_text.contains("上午")
                 || previous_text.contains("下午")
                 || next_text.contains('点')
                 || next_text.contains('點')
                 || next_text.contains('半')
             {
                 output[index] = "O".to_string();
             }
@@ -2687,9 +2847,27 @@ fn title_candidate_score(tokens: &[String], groups: &[Group], start: usize, end:
     ) {
         score -= 500;
     }
     score
 }
 fn roles_candidate_text_group(group: &Group) -> bool {
     matches!(group.class_name.as_str(), "TEXT" | "BRACKET_TEXT")
 }
@@ -2712,19 +2890,39 @@ fn normalize_generated_tokens(tokens: &[String], labels: &[String]) -> (Vec<Stri
 fn normalize_title_token(token: &str) -> (Vec<String>, Vec<String>) {
     let pieces = split_generated_token(token);
-    let labels = pieces
-        .iter()
-        .map(|piece| {
-            if is_standalone_separator(piece) {
-                "O".to_string()
-            } else if CJK_SEASON_TOKEN_RE.is_match(piece) {
-                "B-SEASON".to_string()
-            } else {
-                "B-TITLE".to_string()
             }
-        })
-        .collect();
-    (pieces, labels)
 }
 fn split_generated_token(token: &str) -> Vec<String> {
@@ -2881,11 +3079,14 @@ fn smooth_title_spans(tokens: &[String], labels: &[String]) -> Vec<String> {
             right += 1;
         }
         if left >= 0 && right < tokens.len() {
-            let left_label = &output[left as usize];
-            let right_label = &labels[right];
             if left_label == right_label && matches!(left_label.as_str(), "B-TITLE" | "B-GROUP") {
                 output[index] = left_label.clone();
             }
         }
         if title_terminal_punctuation.contains(&token.as_str()) && index > 0 {
             let left_label = &output[index - 1];
@@ -3183,6 +3384,47 @@ mod tests {
         assert!(decimal_episode.contains(&(".".to_string(), "B-EPISODE".to_string())));
         assert!(decimal_episode.contains(&("5".to_string(), "B-EPISODE".to_string())));
         let spy = labels_for("[Studio GreenTea] Spy x Family [38][WebRip][HEVC-10bit 1080p AAC ASSx2]");
         assert!(spy.contains(&("Studio".to_string(), "B-GROUP".to_string())));
         assert!(spy.contains(&("Spy".to_string(), "B-TITLE".to_string())));
@@ -3388,6 +3630,21 @@ mod tests {
         assert!(volume.contains(&("MENU02".to_string(), "B-SPECIAL".to_string())));
         assert!(!volume.contains(&("01".to_string(), "B-EPISODE".to_string())));
         let numeric_title =
             labels_for("3000.Leagues.in.Search.of.Mother.S01E01.1080p.WEB-DL.H.264-D00oo00M");
         assert!(numeric_title.contains(&("3000".to_string(), "B-TITLE".to_string())));
@@ -3404,5 +3661,49 @@ mod tests {
         assert!(media_block.contains(&("1080".to_string(), "B-RESOLUTION".to_string())));
         assert!(media_block.contains(&("x264".to_string(), "B-SOURCE".to_string())));
         assert!(media_block.contains(&("Chs".to_string(), "B-SOURCE".to_string())));
     }
 }

     Lazy::new(|| Regex::new(r"(?i)^(?:EP?|#)?\d{1,4}(?:\.\d{1,2})?(?:END)?$").unwrap());
 static DECIMAL_EPISODE_RE: Lazy<Regex> =
     Lazy::new(|| Regex::new(r"^\d{1,3}\.\d{1,2}$").unwrap());
+static NUMERIC_TITLE_PREFIX_RE: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r"^\d{1,3}(?:[./-]\d{1,3})?$").unwrap());
 static EPISODE_CJK_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^第?\d{1,4}[话話回集]$").unwrap());
 static EPISODE_CJK_PREFIX_RE: Lazy<Regex> =
     Lazy::new(|| Regex::new(r"^第?\d{1,4}[话話回集]").unwrap());
 static SXE_VALUE_RE: Lazy<Regex> =
     Lazy::new(|| Regex::new(r"(?i)^S(\d{1,2})E(\d{1,4})(?:v(\d+))?$").unwrap());
 static EPISODE_VALUE_RE: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r"(?i)^(EP|E|#)(\d{1,4}(?:\.\d{1,2})?)(?:v(\d+))?$").unwrap());
 static SEASON_RE: Lazy<Regex> = Lazy::new(|| {
     Regex::new(r"(?i)^(?:S\d{1,2}|Season\s*\d{1,2}|第[一二三四五六七八九十\d]+[季期部])$").unwrap()
 });
 static CJK_SEASON_TOKEN_RE: Lazy<Regex> =
     Lazy::new(|| Regex::new(r"^第[一二三四五六七八九十\d]+[季期部]$").unwrap());
+static CJK_SEASON_EMBEDDED_RE: Lazy<Regex> = Lazy::new(|| {
+    Regex::new(r"^(.+?)(第[一二三四五六七八九十\d]+[季期部])(.{0,12})$").unwrap()
+});
 static SEASON_VALUE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)^S(\d{1,2})$").unwrap());
 static SPECIAL_RE: Lazy<Regex> = Lazy::new(|| {
     Regex::new(r"(?i)^(?:(?:NCOP|NCED|OP|ED|PV|CM)(?:[\s_.-]?(?:\d{1,4}|v\d{1,3}|[A-Z]))?|SP(?:[\s_.-]?\d{0,4})?|(?:OVA|OAD|IV)(?:[\s_.-]?\d{0,4})?|(?:Menu|Intro|Preview|Trailer|Teaser|Animatics?)(?:[\s_.-]?(?:\d{0,4}|Ep\d{1,4}|[A-Z]))?)$").unwrap()
     Lazy::new(|| Regex::new(r"(?i)^(?:Vol(?:ume)?\.?|Disc|CD|BD|DVD|D)\s*\d{1,3}$").unwrap());
 static DATE_RE: Lazy<Regex> =
     Lazy::new(|| Regex::new(r"^(?:19|20)\d{2}(?:[._-]\d{1,2}){0,2}$").unwrap());
+static DATE_RANGE_MIXED_RE: Lazy<Regex> = Lazy::new(|| {
+    Regex::new(r"^(?:19|20)\d{2}(?:[._-]\d{1,2}){0,2}\s*[-~]\s*(?:19|20)\d{2}(?:[._-]\d{1,2}){0,2}$").unwrap()
+});
+static CJK_DATE_RE: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r"^(?:19|20)\d{2}年\d{1,2}月\d{1,2}日$").unwrap());
 static LANG_RE: Lazy<Regex> = Lazy::new(|| {
     Regex::new(r"(?i)^(?:CHS|CHT|ZHS|ZHT|GB|BIG5|JPN?|JP|JA|JAP|ENG|EN|SC|TC|简[体體]?|繁[体體]?|简日|繁日|字幕|内封|外挂|Sub|Subs|MSubs?)$").unwrap()
 });
     if RESOLUTION_RE.is_match(&cleaned) {
         return "RESOLUTION".to_string();
     }
+    if DATE_RE.is_match(&cleaned)
+        || DATE_RANGE_MIXED_RE.is_match(&cleaned)
+        || CJK_DATE_RE.is_match(&cleaned)
+    {
         return "DATE".to_string();
     }
     if EPISODE_VERSION_RE.is_match(&compact) {
     let markers = [
         "譁", "蜈", "螟", "蟄", "謇", "邱", "荳", "縺", "繧", "莨", "鬆", "髯", "瀛",
         "楀", "箷", "绲", "刔", "鏃", "湪", "鏍", "犲", "儚", "鐗", "吀", "铦", "躲",
+        "伄", "椋", "伓", "姘", "帽",
     ];
     let marker_hits = markers
         .iter()
         .chars()
         .filter(|ch| ('\u{ff61}'..='\u{ff9f}').contains(ch))
         .count();
+    let latin_mojibake = value.split_whitespace().any(|part| {
+        part.contains('帽') && part.chars().any(|ch| ch.is_ascii_alphabetic())
+    });
+    marker_hits >= 2 || (marker_hits >= 1 && halfwidth_hits >= 1) || latin_mojibake
 }
 fn has_non_anime_noise(value: &str) -> bool {
         return Some((pieces, labels));
     }
     let caps = EPISODE_VALUE_RE.captures(token)?;
+    let mut pieces = vec![caps[1].to_string()];
+    let mut labels = vec!["O".to_string()];
+    for piece in split_generated_token(&caps[2]) {
+        pieces.push(piece);
+        labels.push("B-EPISODE".to_string());
+    }
     if let Some(version) = caps.get(3) {
         pieces.push("v".to_string());
         pieces.push(version.as_str().to_string());
             | "TOKUTEN"
             | "TRAILER"
             | "WORLD PREMIERE"
+            | "番宣"
+            | "宣番"
             | "映像特典"
             | "特典"
     ) || normalized.contains("映像特典")
+        || normalized.contains("特典映像")
+        || normalized.contains("番宣")
+        || normalized.contains("宣番")
         || SPECIAL_TITLE_PHRASE_RE.is_match(text)
 }
     &["Zom", "100"],
     &["Kamisama", "Hajimemashita", "2"],
     &["Phantasy", "Star", "Online", "2", "Episode", "Oracle"],
+    &["Lupin The Thrid Jigen Daisuke no Bohyou"],
+    &["Lupin The Third Jigen Daisuke no Bohyou"],
 ];
 fn apply_known_title_phrases(tokens: &[String], groups: &[Group], roles: &mut [String]) {
     if let Some(whitelists) = RUNTIME_WHITELISTS.get() {
         for (index, group) in groups.iter().enumerate() {
             if group.class_name == "BRACKET_TEXT"
                 && whitelists
                     .group_names
                     .contains(&normalize_whitelist_name(&group_text(tokens, group)))
+                && !roles.get(index).is_some_and(|role| {
+                    matches!(
+                        role.as_str(),
+                        "EPISODE"
+                            | "EPISODE_VERSION"
+                            | "EPISODE_RANGE"
+                            | "SEASON"
+                            | "SOURCE"
+                            | "RESOLUTION"
+                            | "SPECIAL"
+                    )
+                })
             {
                 roles[index] = "GROUP".to_string();
             }
         {
             for (group_index, _) in window {
                 if roles.get(*group_index).is_some_and(|role| role == "GROUP") {
+                    let is_known_group = RUNTIME_WHITELISTS.get().is_some_and(|whitelists| {
+                        whitelists
+                            .group_names
+                            .contains(&normalize_whitelist_name(&window[0].1))
+                    });
+                    if is_known_group {
+                        continue;
+                    }
                 }
                 if !allow_structural_override
                     && roles.get(*group_index).is_some_and(|role| {
             output[index] = "O".to_string();
             continue;
         }
+        if roles[index].starts_with("EPISODE")
+            && index >= 1
+            && output[index - 1] == "TITLE"
+            && groups[index - 1].class_name != "SEP"
+            && text.chars().all(|ch| ch.is_ascii_digit())
+            && (text.len() <= 2
+                || (text.len() <= 3
+                    && group_text(tokens, &groups[index - 1])
+                        .chars()
+                        .any(|ch| !ch.is_ascii())
+                    && !group_text(tokens, &groups[index - 1]).ends_with('第')))
+            && roles[index + 1..]
+                .iter()
+                .any(|role| role.starts_with("EPISODE"))
+        {
+            output[index] = "TITLE".to_string();
+            continue;
+        }
         if roles[index].starts_with("EPISODE") && (2..roles.len()).contains(&index) {
             let previous_text = group_text(tokens, &groups[index - 2]);
             let next_special = output[index + 1..roles.len().min(index + 4)]
                 output[index] = "SPECIAL".to_string();
                 continue;
             }
+            if index >= 1
+                && output[index - 1] == "TITLE"
+                && groups[index - 1].class_name != "SEP"
+                && text.chars().all(|ch| ch.is_ascii_digit())
+                && (text.len() <= 2
+                    || (text.len() <= 3
+                        && group_text(tokens, &groups[index - 1])
+                            .chars()
+                            .any(|ch| !ch.is_ascii())
+                        && !group_text(tokens, &groups[index - 1]).ends_with('第')))
+                && roles[index + 1..]
+                    .iter()
+                    .any(|role| role.starts_with("EPISODE"))
+            {
+                output[index] = "TITLE".to_string();
+                continue;
+            }
+            if !output[..index].iter().any(|role| role == "TITLE")
+                && NUMERIC_TITLE_PREFIX_RE.is_match(&text)
+                && output[..index].iter().any(|role| role == "GROUP")
+                && roles[index + 1..]
+                    .iter()
+                    .any(|role| role.starts_with("EPISODE"))
+            {
+                output[index] = "TITLE".to_string();
+                continue;
+            }
+            if !output[..index].iter().any(|role| role == "TITLE")
+                && NUMERIC_TITLE_PREFIX_RE.is_match(&text)
+                && index + 2 < roles.len()
+                && groups[index + 1].class_name == "SEP"
+                && groups[index + 2].class_name == "TEXT"
+                && group_text(tokens, &groups[index + 2])
+                    .chars()
+                    .any(|ch| ch.is_alphabetic())
+                && roles[index + 3..]
+                    .iter()
+                    .any(|role| role.starts_with("EPISODE"))
+            {
+                output[index] = "TITLE".to_string();
+                output[index + 2] = "TITLE".to_string();
+                continue;
+            }
             if output[index - 2] == "TITLE"
                 && groups[index - 1].class_name == "SEP"
                 && previous_text.len() <= 48
                 continue;
             }
         }
+        if roles[index].starts_with("EPISODE")
+            && text.chars().all(|ch| ch.is_ascii_digit())
+            && output[..index].iter().any(|role| role == "SPECIAL")
+            && !output[..index].iter().any(|role| role.starts_with("EPISODE"))
+        {
+            let previous_structural = (0..index)
+                .rev()
+                .find(|&cursor| groups[cursor].class_name != "SEP")
+                .and_then(|cursor| output.get(cursor))
+                .map(String::as_str);
+            let next_real = (index + 1..roles.len())
+                .find(|&cursor| groups[cursor].class_name != "SEP")
+                .and_then(|cursor| roles.get(cursor))
+                .map(String::as_str);
+            if matches!(previous_structural, Some("SPECIAL"))
+                && !matches!(next_real, Some("TITLE" | "SEASON"))
+            {
+                output[index] = "SPECIAL".to_string();
+                continue;
+            }
+        }
         if roles[index].starts_with("EPISODE")
             && BARE_RESOLUTION_RE.is_match(&text)
             && index >= 2
             && text.chars().any(|ch| ch.is_alphabetic())
             && !ep_markers.contains(&text.as_str())
         {
+            if !output[..index].iter().any(|role| role == "TITLE") {
+                let previous_structural = (0..index)
+                    .rev()
+                    .find(|&cursor| groups[cursor].class_name != "SEP")
+                    .and_then(|cursor| output.get(cursor))
+                    .map(String::as_str);
+                if matches!(previous_structural, Some("SPECIAL")) {
+                    output[index] = "TITLE".to_string();
+                    continue;
+                }
+            }
             if let Some(last_title) = output[..index].iter().rposition(|role| role == "TITLE") {
                 let episode_since_title = output[last_title + 1..index]
                     .iter()
             } else {
                 String::new()
             };
+            if previous_text.ends_with('第') && next_text.starts_with('期') {
+                output[index] = "SEASON".to_string();
+                continue;
+            }
+            if output[..index].iter().any(|role| role == "TITLE")
+                && (output[..index]
+                    .iter()
+                    .enumerate()
+                    .any(|(cursor, role)| {
+                        role == "TITLE" && is_special_title_phrase(&group_text(tokens, &groups[cursor]))
+                    }))
+                && !output[..index].iter().any(|role| role.starts_with("EPISODE"))
+                && text.chars().all(|ch| ch.is_ascii_digit())
+                && text.len() <= 3
+            {
+                output[index] = "SPECIAL".to_string();
+                continue;
+            }
             if previous_text.contains('点')
                 || previous_text.contains('點')
                 || previous_text.contains("晚上")
                 || previous_text.contains("上午")
                 || previous_text.contains("下午")
+                || previous_text.contains('年')
+                || previous_text.contains('月')
                 || next_text.contains('点')
                 || next_text.contains('點')
                 || next_text.contains('半')
+                || next_text.contains('月')
+                || next_text.contains('日')
             {
                 output[index] = "O".to_string();
             }
     ) {
         score -= 500;
     }
+    if title_noise_score_penalty(cleaned) {
+        score -= 700;
+    }
     score
 }
+fn title_noise_score_penalty(text: &str) -> bool {
+    let normalized = text
+        .replace(['_', '-', '.'], " ")
+        .split_whitespace()
+        .collect::<Vec<_>>()
+        .join(" ")
+        .to_ascii_lowercase();
+    normalized.contains("bdrip")
+        || normalized.contains("webrip")
+        || normalized.contains("web dl")
+        || normalized.contains("bluray")
+        || normalized.contains("full hd")
+        || normalized.contains("hdtv")
+}
 fn roles_candidate_text_group(group: &Group) -> bool {
     matches!(group.class_name.as_str(), "TEXT" | "BRACKET_TEXT")
 }
 fn normalize_title_token(token: &str) -> (Vec<String>, Vec<String>) {
     let pieces = split_generated_token(token);
+    let mut output_pieces = Vec::new();
+    let mut labels = Vec::new();
+    for piece in pieces {
+        if is_standalone_separator(&piece) {
+            output_pieces.push(piece);
+            labels.push("O".to_string());
+            continue;
+        }
+        if CJK_SEASON_TOKEN_RE.is_match(&piece) {
+            output_pieces.push(piece);
+            labels.push("B-SEASON".to_string());
+            continue;
+        }
+        if let Some(caps) = CJK_SEASON_EMBEDDED_RE.captures(&piece) {
+            let before = caps.get(1).map(|m| m.as_str()).unwrap_or_default();
+            let season = caps.get(2).map(|m| m.as_str()).unwrap_or_default();
+            let after = caps.get(3).map(|m| m.as_str()).unwrap_or_default();
+            if !before.is_empty() {
+                output_pieces.push(before.to_string());
+                labels.push("B-TITLE".to_string());
             }
+            output_pieces.push(season.to_string());
+            labels.push("B-SEASON".to_string());
+            if !after.is_empty() {
+                output_pieces.push(after.to_string());
+                labels.push("O".to_string());
+            }
+            continue;
+        }
+        output_pieces.push(piece);
+        labels.push("B-TITLE".to_string());
+    }
+    (output_pieces, labels)
 }
 fn split_generated_token(token: &str) -> Vec<String> {
             right += 1;
         }
         if left >= 0 && right < tokens.len() {
+            let left_label = output[left as usize].clone();
+            let right_label = labels[right].clone();
             if left_label == right_label && matches!(left_label.as_str(), "B-TITLE" | "B-GROUP") {
                 output[index] = left_label.clone();
             }
+            if token == "." && left_label == "B-EPISODE" && right_label == "B-EPISODE" {
+                output[index] = "B-EPISODE".to_string();
+            }
         }
         if title_terminal_punctuation.contains(&token.as_str()) && index > 0 {
             let left_label = &output[index - 1];
         assert!(decimal_episode.contains(&(".".to_string(), "B-EPISODE".to_string())));
         assert!(decimal_episode.contains(&("5".to_string(), "B-EPISODE".to_string())));
+        let _ = RUNTIME_WHITELISTS.set(Whitelists {
+            title_phrases: Vec::new(),
+            group_names: [
+                "LowPower-Raws".to_string(),
+                "ANi".to_string(),
+                "LoliHouse".to_string(),
+                "QTS".to_string(),
+            ]
+            .into_iter()
+            .collect(),
+        });
+        let lowpower = labels_for("[LowPower-Raws] 91 Days - 01 (BD 720P x264 10bit AAC)");
+        assert!(lowpower.contains(&("LowPower".to_string(), "B-GROUP".to_string())));
+        assert!(lowpower.contains(&("91".to_string(), "B-TITLE".to_string())));
+        assert!(lowpower.contains(&("Days".to_string(), "B-TITLE".to_string())));
+        assert!(lowpower.contains(&("01".to_string(), "B-EPISODE".to_string())));
+        let ririsa = labels_for("[ANi] 2.5 次元的誘惑 - 01 [1080P][Baha][WEB-DL][AAC AVC][CHT]");
+        assert!(ririsa.contains(&("2".to_string(), "B-TITLE".to_string())));
+        assert!(ririsa.contains(&(".".to_string(), "B-TITLE".to_string())));
+        assert!(ririsa.contains(&("5".to_string(), "B-TITLE".to_string())));
+        assert!(ririsa.contains(&("次元的誘惑".to_string(), "B-TITLE".to_string())));
+        assert!(ririsa.contains(&("01".to_string(), "B-EPISODE".to_string())));
+        let nanabun = labels_for("[LoliHouse] 22-7 - 01 [WebRip 1080p HEVC-10bit AAC ASS]");
+        assert!(nanabun.contains(&("22".to_string(), "B-TITLE".to_string())));
+        assert!(nanabun.contains(&("-".to_string(), "B-TITLE".to_string())));
+        assert!(nanabun.contains(&("7".to_string(), "B-TITLE".to_string())));
+        assert!(nanabun.contains(&("01".to_string(), "B-EPISODE".to_string())));
+        let saint = labels_for("[QTS] OVA Saint Seiya The Lost Canvas Meiou Shinwa ep 01 (BD H264 1920x1080 24fps FLAC)");
+        assert!(saint.contains(&("OVA".to_string(), "B-SPECIAL".to_string())));
+        assert!(saint.contains(&("Saint".to_string(), "B-TITLE".to_string())));
+        assert!(saint.contains(&("Seiya".to_string(), "B-TITLE".to_string())));
+        assert!(saint.contains(&("01".to_string(), "B-EPISODE".to_string())));
+        let gundam = labels_for("機動戦士ガンダム00 セカンドシーズン／Ep.01 「# 天使再臨」");
+        assert!(gundam.contains(&("機動戦士ガンダム".to_string(), "B-TITLE".to_string())));
+        assert!(gundam.contains(&("00".to_string(), "B-TITLE".to_string())));
+        assert!(gundam.contains(&("01".to_string(), "B-EPISODE".to_string())));
         let spy = labels_for("[Studio GreenTea] Spy x Family [38][WebRip][HEVC-10bit 1080p AAC ASSx2]");
         assert!(spy.contains(&("Studio".to_string(), "B-GROUP".to_string())));
         assert!(spy.contains(&("Spy".to_string(), "B-TITLE".to_string())));
         assert!(volume.contains(&("MENU02".to_string(), "B-SPECIAL".to_string())));
         assert!(!volume.contains(&("01".to_string(), "B-EPISODE".to_string())));
+        let aria_notice =
+            labels_for("[KNA-Subs&ANK-Raws] 緋弾のアリアAA 番宣1 (BDrip 1920x1080 HEVC-YUV420P10 FLAC)");
+        assert!(aria_notice.contains(&("緋弾のアリア".to_string(), "B-TITLE".to_string())));
+        assert!(aria_notice.contains(&("番宣".to_string(), "B-SPECIAL".to_string())));
+        assert!(aria_notice.contains(&("1".to_string(), "B-SPECIAL".to_string())));
+        assert!(!aria_notice.contains(&("1".to_string(), "B-EPISODE".to_string())));
+        let lost_song =
+            labels_for("[Snow-Raws] LOST SONG CM&PV 01(BD 1920x1080 HEVC-YUV420P10 FLAC)");
+        assert!(lost_song.contains(&("LOST".to_string(), "B-TITLE".to_string())));
+        assert!(lost_song.contains(&("CM".to_string(), "B-SPECIAL".to_string())));
+        assert!(lost_song.contains(&("PV".to_string(), "B-SPECIAL".to_string())));
+        assert!(lost_song.contains(&("01".to_string(), "B-SPECIAL".to_string())));
+        assert!(!lost_song.contains(&("01".to_string(), "B-EPISODE".to_string())));
         let numeric_title =
             labels_for("3000.Leagues.in.Search.of.Mother.S01E01.1080p.WEB-DL.H.264-D00oo00M");
         assert!(numeric_title.contains(&("3000".to_string(), "B-TITLE".to_string())));
         assert!(media_block.contains(&("1080".to_string(), "B-RESOLUTION".to_string())));
         assert!(media_block.contains(&("x264".to_string(), "B-SOURCE".to_string())));
         assert!(media_block.contains(&("Chs".to_string(), "B-SOURCE".to_string())));
+        let ge999 = labels_for("GE999 第024話 「次元航海惑星」1979年02月22日 (720x540 x264 AAC2)");
+        assert!(ge999.contains(&("GE999".to_string(), "B-TITLE".to_string())));
+        assert!(ge999.contains(&("024".to_string(), "B-EPISODE".to_string())));
+        assert!(!ge999.contains(&("22".to_string(), "B-EPISODE".to_string())));
+        let galaxy = labels_for("銀河鉄道999 第024話 「次元航海惑星」 (DVD 640x480 WMV9)");
+        assert!(galaxy.contains(&("銀河鉄道".to_string(), "B-TITLE".to_string())));
+        assert!(galaxy.contains(&("999".to_string(), "B-TITLE".to_string())));
+        assert!(galaxy.contains(&("024".to_string(), "B-EPISODE".to_string())));
+        let mahoro = labels_for("[POPGO][FREEWIND][Mahoro_Matic][Full_HD-BDRIP][01]");
+        assert!(mahoro.contains(&("Mahoro".to_string(), "B-TITLE".to_string())));
+        assert!(!mahoro.contains(&("Full".to_string(), "B-TITLE".to_string())));
+        assert!(mahoro.contains(&("01".to_string(), "B-EPISODE".to_string())));
+        let kitaro = labels_for("[1985.10-1988.02] Kitaro_鬼太郎 第3期(ゲゲゲの鬼太郎)_TV 036 異次元妖怪かまなり");
+        assert!(kitaro.contains(&("Kitaro".to_string(), "B-TITLE".to_string())));
+        assert!(kitaro.contains(&("3".to_string(), "B-SEASON".to_string())));
+        assert!(kitaro.contains(&("036".to_string(), "B-EPISODE".to_string())));
+        assert!(!kitaro.contains(&("1985".to_string(), "B-EPISODE".to_string())));
+        let urusei = labels_for("Urusei_Yatsura_DVD_Ep042.5_Simu");
+        assert!(urusei.contains(&("Urusei".to_string(), "B-TITLE".to_string())));
+        assert!(urusei.contains(&("042".to_string(), "B-EPISODE".to_string())));
+        assert!(urusei.contains(&(".".to_string(), "B-EPISODE".to_string())));
+        assert!(urusei.contains(&("5".to_string(), "B-EPISODE".to_string())));
+        let lupin =
+            labels_for("[Lupin The Thrid Jigen Daisuke no Bohyou][Logo][BDRIP][1080P][H264_FLAC]");
+        assert!(lupin.contains(&("Lupin".to_string(), "B-TITLE".to_string())));
+        assert!(!lupin.contains(&("Lupin".to_string(), "B-GROUP".to_string())));
+        let mirumo = labels_for("【咪路fans】魔法咪路咪路第二季日语版 01[GB][MP4]");
+        assert!(mirumo.contains(&("魔法咪路咪路".to_string(), "B-TITLE".to_string())));
+        assert!(mirumo.contains(&("第二季".to_string(), "B-SEASON".to_string())));
+        assert!(mirumo.contains(&("01".to_string(), "B-EPISODE".to_string())));
+        let doremi_bonus = labels_for(
+            "おジャ魔女どれみナ・イ・ショ 特典映像07「おジャ魔女どれみナ・イ・ショ エンドテロップ集｣(DVD 640x480 )",
+        );
+        assert!(doremi_bonus.contains(&("おジャ魔女どれみナ".to_string(), "B-TITLE".to_string())));
+        assert!(doremi_bonus.contains(&("07".to_string(), "B-SPECIAL".to_string())));
+        assert!(!doremi_bonus.contains(&("07".to_string(), "B-EPISODE".to_string())));
     }
 }