Implement schema v2 anime filename labels

ed49faa 6 days ago

221 kB

	use anyhow::{bail, Context, Result};
	use chrono::Utc;
	use clap::Parser;
	use once_cell::sync::{Lazy, OnceCell};
	use rayon::prelude::*;
	use regex::Regex;
	use serde::{Deserialize, Serialize};
	use serde_json::{json, Value};
	use std::collections::{HashMap, HashSet};
	use std::fs::{self, File};
	use std::io::{BufRead, BufReader, BufWriter, Write};
	use std::path::PathBuf;
	use std::sync::atomic::{AtomicUsize, Ordering};

	#[derive(Parser, Debug)]
	#[command(about = "Apply DMHY template recipes with a multi-core Rust pipeline")]
	struct Args {
	#[arg(long)]
	cluster: bool,
	#[arg(long)]
	audit_low_frequency: bool,
	#[arg(long)]
	verify_generated_output: bool,
	#[arg(long)]
	rich_annotations: bool,
	#[arg(long, default_value = "datasets/AnimeName/dmhy_list.jsonl")]
	input: PathBuf,
	#[arg(long, default_value = "reports/dmhy_template_recipes.seed.jsonl")]
	recipes: PathBuf,
	#[arg(
	long,
	default_value = "reports/dmhy_weak.template_generated.rust.jsonl"
	)]
	output: PathBuf,
	#[arg(
	long,
	default_value = "reports/dmhy_weak.template_generated.rust.manifest.json"
	)]
	manifest_output: PathBuf,
	#[arg(
	long,
	default_value = "reports/dmhy_template_clusters.rust.summary.json"
	)]
	summary_output: PathBuf,
	#[arg(
	long,
	default_value = "reports/dmhy_template_clusters.rust.samples.jsonl"
	)]
	samples_output: PathBuf,
	#[arg(long, default_value = "reports/dmhy_template_clusters.rust.jsonl")]
	clusters_output: PathBuf,
	#[arg(long, default_value = "reports/dmhy_template_recipes.rust.seed.jsonl")]
	recipes_output: PathBuf,
	#[arg(long, default_value = "reports/dmhy_template_review.rust.jsonl")]
	review_output: PathBuf,
	#[arg(long, default_value = "reports/dmhy_low_frequency_audit.rust.jsonl")]
	audit_output: PathBuf,
	#[arg(long, default_value = "reports/dmhy_rich_annotations.rust.jsonl")]
	rich_output: PathBuf,
	#[arg(long, default_value_t = 50)]
	audit_max_count: u64,
	#[arg(long)]
	limit: Option<usize>,
	#[arg(long)]
	limit_templates: Option<usize>,
	#[arg(long, default_value_t = 1)]
	min_count: u64,
	#[arg(long, default_value_t = 200)]
	top: usize,
	#[arg(long, default_value_t = 200)]
	recipe_top: usize,
	#[arg(long, default_value_t = 1000)]
	review_top: usize,
	#[arg(long, default_value_t = 8)]
	examples: usize,
	#[arg(long, default_value_t = 25)]
	recipe_min_count: usize,
	#[arg(long, default_value = "high")]
	confidence: String,
	#[arg(long, default_value = "all")]
	expand: String,
	#[arg(long, default_value_t = 100)]
	sample_per_template: usize,
	#[arg(long)]
	keep_encoding_noise: bool,
	#[arg(long)]
	preserve_parent_paths: bool,
	#[arg(long, default_value = "datasets/AnimeName/dmhy_title_whitelist.txt")]
	title_whitelist: PathBuf,
	#[arg(long, default_value = "datasets/AnimeName/dmhy_group_whitelist.txt")]
	group_whitelist: PathBuf,
	#[arg(long)]
	threads: Option<usize>,
	}

	#[derive(Debug, Default)]
	struct Whitelists {
	title_phrases: Vec<Vec<String>>,
	group_names: HashSet<String>,
	}

	static RUNTIME_WHITELISTS: OnceCell<Whitelists> = OnceCell::new();

	#[derive(Debug, Clone, Deserialize)]
	struct Recipe {
	template_id: String,
	template: String,
	roles: Vec<String>,
	confidence: Option<String>,
	count: Option<u64>,
	}

	#[derive(Debug, Clone, Serialize, Deserialize)]
	struct Record {
	filename: String,
	tokens: Vec<String>,
	labels: Vec<String>,
	template_id: String,
	template: String,
	#[serde(skip_serializing_if = "Option::is_none")]
	source_filename: Option<String>,
	#[serde(skip_serializing_if = "Option::is_none")]
	path_trimmed: Option<bool>,
	#[serde(skip_serializing_if = "Option::is_none")]
	dropped_title_candidate_positions: Option<Vec<String>>,
	}

	#[derive(Debug, Clone)]
	struct Group {
	indices: Vec<usize>,
	class_name: String,
	}

	#[derive(Debug, Default, Clone, Serialize)]
	struct Stats {
	seen: usize,
	skipped_encoding_noise: usize,
	skipped_music_audio_collection: usize,
	trimmed_parent_path: usize,
	skipped_no_recipe: usize,
	skipped_sample_cap: usize,
	skipped_role_mismatch: usize,
	skipped_low_frequency_audit_warning: usize,
	written: usize,
	}

	#[derive(Debug, Default)]
	struct Cluster {
	count: usize,
	examples: Vec<String>,
	literal_counts: HashMap<String, usize>,
	class_counts: HashMap<String, usize>,
	position_literals: Vec<HashMap<String, usize>>,
	}

	#[derive(Debug)]
	enum Processed {
	Written {
	record: Record,
	trimmed_parent: bool,
	},
	Skipped {
	reason: &'static str,
	trimmed_parent: bool,
	example: Option<String>,
	warnings: Vec<String>,
	},
	}

	static HASH_RE: Lazy<Regex> = Lazy::new(\|\| Regex::new(r"^[A-Fa-f0-9]{8,}$").unwrap());
	static RESOLUTION_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"(?i)^(?:\d{3,4}p\|\dK\|\d{3,4}[xX×]\d{3,4})$").unwrap());
	static BARE_RESOLUTION_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"^(?:360\|480\|540\|576\|720\|1080\|2160)$").unwrap());
	static EPISODE_VERSION_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"(?i)^(?:EP?)?\d{1,4}(?:v\|ver\|version\|rev)\d{1,3}$").unwrap());
	static EPISODE_WITH_SUFFIX_RE: Lazy<Regex> = Lazy::new(\|\| {
	Regex::new(r"(?i)^\d{1,4}[_ .-]?(?:Notice\|Full\|R18\|R\|Uncut\|Director'?s?Cut)$").unwrap()
	});
	static EPISODE_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"(?i)^(?:EP?\|#)?\d{1,4}(?:\.\d{1,2})?(?:END)?$").unwrap());
	static DECIMAL_EPISODE_RE: Lazy<Regex> = Lazy::new(\|\| Regex::new(r"^\d{1,3}\.\d{1,2}$").unwrap());
	static NUMERIC_TITLE_PREFIX_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"^\d{1,3}(?:[./-]\d{1,3})?$").unwrap());
	static EPISODE_CJK_RE: Lazy<Regex> = Lazy::new(\|\| Regex::new(r"^第?\d{1,4}[话話回集]$").unwrap());
	static EPISODE_CJK_PREFIX_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"^第?\d{1,4}[话話回集]").unwrap());
	static EPISODE_RANGE_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"(?i)^\d{1,4}\s[-~]\s\d{1,4}(?:\s*END)?$").unwrap());
	static EPISODE_BATCH_RE: Lazy<Regex> = Lazy::new(\|\| {
	Regex::new(r"(?i)^\d{1,4}\s[-~]\s\d{1,4}(?:\s(?:TV\|全集\|全\|END\|Fin\|Complete\|SP\|OVA\|OAD\|NCOP\|NCED)\|[+_./-]).{0,16}$").unwrap()
	});
	static SXE_RE: Lazy<Regex> = Lazy::new(\|\| Regex::new(r"(?i)^S\d{1,2}E\d{1,4}(?:v\d+)?$").unwrap());
	static SXE_VALUE_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"(?i)^S(\d{1,2})E(\d{1,4})(?:v(\d+))?$").unwrap());
	static EPISODE_VALUE_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"(?i)^(EP\|E\|#)(\d{1,4}(?:\.\d{1,2})?)(?:v(\d+))?$").unwrap());
	static SEASON_RE: Lazy<Regex> = Lazy::new(\|\| {
	Regex::new(r"(?i)^(?:S\d{1,2}\|Season\s*\d{1,2}\|第[一二三四五六七八九十\d]+[季期部])$").unwrap()
	});
	static CJK_SEASON_TOKEN_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"^第[一二三四五六七八九十\d]+[季期部]$").unwrap());
	static CJK_SEASON_EMBEDDED_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"^(.+?)(第[一二三四五六七八九十\d]+[季期部])(.{0,12})$").unwrap());
	static CJK_EPISODE_EMBEDDED_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"^(.+?)(第?\d{1,4}[话話回集])(.{0,32})$").unwrap());
	static CJK_TITLE_TRAILING_EPISODE_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"^(.+[\p{Han}\p{Hiragana}\p{Katakana}])(\d{2,3})$").unwrap());
	static ASCII_SEASON_SUFFIX_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"(?i)^(.+?)[\s_.-]+(S\d{1,2})$").unwrap());
	static ORDINAL_SEASON_TOKEN_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"(?i)^\d{1,2}(?:st\|nd\|rd\|th)$").unwrap());
	static WORD_ORDINAL_SEASON_TOKEN_RE: Lazy<Regex> = Lazy::new(\|\| {
	Regex::new(r"(?i)^(?:First\|Second\|Third\|Fourth\|Fifth\|Sixth\|Seventh\|Eighth\|Ninth\|Tenth)$")
	.unwrap()
	});
	static SEASON_WORD_RE: Lazy<Regex> = Lazy::new(\|\| Regex::new(r"(?i)^(?:Season\|Saison)$").unwrap());
	static CJK_TITLE_LANG_PREFIX_RE: Lazy<Regex> = Lazy::new(\|\| {
	Regex::new(r"^(.+?)(国日双语\|國日雙語\|日语版\|日語版\|国语版\|國語版\|双语\|雙語)(第?)$").unwrap()
	});
	static SEASON_VALUE_RE: Lazy<Regex> = Lazy::new(\|\| Regex::new(r"(?i)^S(\d{1,2})$").unwrap());
	static SPECIAL_RE: Lazy<Regex> = Lazy::new(\|\| {
	Regex::new(r"(?i)^(?:(?:NCOP\|NCED\|OP\|ED\|PV\|CM)(?:[\s_.-]?(?:\d{1,4}\|v\d{1,3}\|[A-Z]))?\|SP(?:[\s_.-]?\d{0,4})?\|(?:OVA\|OAD\|IV)(?:[\s_.-]?\d{0,4})?\|(?:BD)?Menu(?:[\s_.-]?(?:\d{0,4}\|Ep\d{1,4}\|[A-Z]))?\|(?:BD[-_. ]?)?Spot(?:[\s_.-]?(?:\d{0,4}\|Ep\d{1,4}\|[A-Z]))?\|(?:Intro\|Preview\|Trailer\|Teaser\|Animatics?)(?:[\s_.-]?(?:\d{0,4}\|Ep\d{1,4}\|[A-Z]))?)$").unwrap()
	});
	static VOLUME_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"(?i)^(?:Vol(?:ume)?\.?\|Disc\|CD\|BD\|DVD\|D)\s*\d{1,3}$").unwrap());
	static DATE_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"^(?:19\|20)\d{2}(?:[._-]\d{1,2}){0,2}$").unwrap());
	static DATE_RANGE_MIXED_RE: Lazy<Regex> = Lazy::new(\|\| {
	Regex::new(
	r"^(?:19\|20)\d{2}(?:[._-]\d{1,2}){0,2}\s[-~]\s(?:19\|20)\d{2}(?:[._-]\d{1,2}){0,2}$",
	)
	.unwrap()
	});
	static CJK_DATE_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"^(?:19\|20)\d{2}年\d{1,2}月\d{1,2}日$").unwrap());
	static LANG_RE: Lazy<Regex> = Lazy::new(\|\| {
	Regex::new(r"(?i)^(?:CHS\|CHT\|ZHS\|ZHT\|GB\|BIG5\|JPN?\|JP\|JA\|JAP\|JPTC\|JPSC\|ENG\|EN\|SC\|TC\|简[体體]?\|繁[体體]?\|简日\|繁日\|字幕\|内封\|外挂\|Sub\|Subs\|MSubs?)$").unwrap()
	});
	static MEDIA_RE: Lazy<Regex> = Lazy::new(\|\| {
	Regex::new(r"(?i)^(?:WEB\|WEB[-_. ]?DL\|WEB[-_. ]?Rip\|BDRip\|BluRay\|BDMV\|BD\|DVDRip\|DVD\|HD\|UHD\|HDTV\|TVRip\|REMUX\|xvid\|x26[45]\|h\.?26[45]\|HEVC\|AVC\|AV1\|YUV\d+P?\d\|AAC\s\d(?:\.\d+)?\|DDP\s\d(?:\.\d+)?\|FLAC\|MP3\|DTS\|HDMA\|DTS-HDMA\|E?AC3x?\d(?:\.\d+)?\|Opus\|WMV\d\|\d(?:\.\d)?ch\|10[-_. ]?bit\|8[-_. ]?bit\|Hi10p?\|Ma10p\|ASSx?\d\|SRTx?\d\|SUP\|R\d[A-Z]\|NoSub\|MKV\|MP4\|AVI\|RAW\|Raws?)$").unwrap()
	});
	static SPECIAL_TITLE_PHRASE_RE: Lazy<Regex> = Lazy::new(\|\| {
	Regex::new(r"(?i)\b(?:theater\s+greeting\s+event\|world\s+prem(?:eie\|iere)\|picture\s+drama)\b")
	.unwrap()
	});
	static YEAR_RANGE_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"^$?\s(?:19\|20)\d{2}\s[-~]\s(?:19\|20)\d{2}\s$?$").unwrap());
	static VERSIONISH_TITLE_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"(?i)^(?:19\|20)\d{2}(?:版\|ver\.?\|version)?$").unwrap());
	static PATH_SEGMENT_SEASON_RE: Lazy<Regex> = Lazy::new(\|\| {
	Regex::new(r"(?i)(?:^\|[\s_.\-/])(?:season\s*\d{1,2}\|s\d{1,2})(?:$\|[\s_.\-/])").unwrap()
	});
	static SEASON_WORD_NUMBER_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"(?i)(?:season\|saison)\s*0?(\d{1,2})").unwrap());
	static PLAIN_SEASON_SEGMENT_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"(?i)^(?:season\|saison)\s*0?\d{1,2}$\|^s0?\d{1,2}$").unwrap());
	static S_NUMBER_SEGMENT_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"(?i)(?:^\|[^\p{L}\p{N}])s0?(\d{1,2})(?:$\|[^\p{L}\p{N}])").unwrap());
	static SXE_SEASON_RE: Lazy<Regex> = Lazy::new(\|\| {
	Regex::new(r"(?i)(?:^\|[^\p{L}\p{N}])s0?(\d{1,2})e\d{1,4}(?:$\|[^\p{L}\p{N}])").unwrap()
	});
	static TOKEN_REGEXES: Lazy<Vec<Regex>> = Lazy::new(\|\| {
	[
	r"^\d{3,4}[xX×]\d{3,4}",
	r"(?i)^(?:AAC\|AC3\|EAC3\|DTS\|FLAC\|DDP)\s*\d+(?:\.\d+)?",
	r"(?i)^h\.?26[45]",
	r"(?i)^x\.?26[45]",
	r"^[\\/]+",
	r"^[-_.:：+&\|]+",
	r"^\s+",
	r"(?i)^Season\s*\d{1,2}",
	r"^[A-Za-z]+(?:\d+[A-Za-z])",
	r"^\d+[A-Za-z]+\d*",
	r"^\d{1,4}(?:[._-]\d{1,4})*",
	r"^[\p{Hiragana}\p{Katakana}\p{Han}]+",
	]
	.into_iter()
	.map(\|pattern\| Regex::new(pattern).unwrap())
	.collect()
	});
	static SIMPLE_EPISODE_RE: Lazy<Regex> =
	Lazy::new(\|\| Regex::new(r"(?i)^(?:EP?\|#)?\d{1,4}$").unwrap());
	static SPECIAL_SPACE_RE: Lazy<Regex> = Lazy::new(\|\| Regex::new(r"[\s_.-]+").unwrap());
	static MUSIC_COLLECTION_RE: Lazy<Regex> = Lazy::new(\|\| {
	Regex::new(
	r"(?i)(?:^\|[^A-Z0-9])(?:MUSIC\s*CLIP\|MUSIC\s+COLLECTION\|SOUNDTRACK\|OST\|CHARACTER\s+SONG\|DRAMA\s+CD\|CD\s+ALBUM\|BONUS\s+CD)(?:$\|[^A-Z0-9])",
	)
	.unwrap()
	});

	fn main() -> Result<()> {
	let args = Args::parse();
	if let Some(threads) = args.threads {
	rayon::ThreadPoolBuilder::new()
	.num_threads(threads)
	.build_global()
	.context("failed to configure rayon thread pool")?;
	}
	let _ = RUNTIME_WHITELISTS.set(load_whitelists(&args)?);
	if args.cluster {
	return run_cluster(&args);
	}
	if args.audit_low_frequency {
	return run_low_frequency_audit(&args);
	}
	if args.verify_generated_output {
	return run_verify_generated_output(&args);
	}
	if args.rich_annotations {
	return run_rich_annotations(&args);
	}
	if args.expand != "all" && args.expand != "sample" {
	bail!("--expand must be all or sample");
	}

	let recipes = load_recipes(&args)?;
	if recipes.is_empty() {
	bail!("no recipes selected; adjust --recipes/--confidence/--min-count/--limit-templates");
	}
	let inputs = load_input(&args.input, args.limit)?;
	let sample_counters: HashMap<String, AtomicUsize> = recipes
	.values()
	.map(\|recipe\| (recipe.template_id.clone(), AtomicUsize::new(0)))
	.collect();

	let processed: Vec<Processed> = inputs
	.par_iter()
	.map(\|filename\| process_filename(filename, &args, &recipes, &sample_counters))
	.collect();

	if let Some(parent) = args.output.parent() {
	fs::create_dir_all(parent)?;
	}
	if let Some(parent) = args.manifest_output.parent() {
	fs::create_dir_all(parent)?;
	}

	let mut stats = Stats {
	seen: inputs.len(),
	..Stats::default()
	};
	let mut label_counts: HashMap<String, usize> = HashMap::new();
	let mut template_counts: HashMap<String, usize> = HashMap::new();
	let mut examples = Vec::new();
	let mut skipped_music_audio_collection_examples = Vec::new();
	let mut skipped_low_frequency_audit_warning_counts: HashMap<String, usize> = HashMap::new();
	let mut skipped_low_frequency_audit_warning_examples: HashMap<String, Vec<String>> =
	HashMap::new();
	let mut writer = BufWriter::new(File::create(&args.output)?);
	for item in processed {
	match item {
	Processed::Written {
	record,
	trimmed_parent,
	} => {
	if trimmed_parent {
	stats.trimmed_parent_path += 1;
	}
	for label in &record.labels {
	*label_counts.entry(label.clone()).or_default() += 1;
	}
	*template_counts
	.entry(record.template_id.clone())
	.or_default() += 1;
	if examples.len() < 20 {
	examples.push(serde_json::to_value(&record)?);
	}
	serde_json::to_writer(&mut writer, &record)?;
	writer.write_all(b"\n")?;
	stats.written += 1;
	}
	Processed::Skipped {
	reason,
	trimmed_parent,
	example,
	warnings,
	} => {
	if trimmed_parent {
	stats.trimmed_parent_path += 1;
	}
	match reason {
	"encoding_noise" => stats.skipped_encoding_noise += 1,
	"music_audio_collection" => {
	stats.skipped_music_audio_collection += 1;
	if let Some(example) = example {
	if skipped_music_audio_collection_examples.len() < 20 {
	skipped_music_audio_collection_examples.push(example);
	}
	}
	}
	"no_recipe" => stats.skipped_no_recipe += 1,
	"sample_cap" => stats.skipped_sample_cap += 1,
	"role_mismatch" => stats.skipped_role_mismatch += 1,
	"low_frequency_audit_warning" => {
	stats.skipped_low_frequency_audit_warning += 1;
	for warning in warnings {
	*skipped_low_frequency_audit_warning_counts
	.entry(warning.clone())
	.or_default() += 1;
	if let Some(example) = example.as_ref() {
	let bucket = skipped_low_frequency_audit_warning_examples
	.entry(warning)
	.or_default();
	if bucket.len() < 10 {
	bucket.push(example.clone());
	}
	}
	}
	}
	_ => {}
	}
	}
	}
	}
	writer.flush()?;

	let mut top_template_counts: Vec<_> = template_counts.into_iter().collect();
	top_template_counts.sort_by(\|a, b\| b.1.cmp(&a.1).then_with(\|\| a.0.cmp(&b.0)));
	top_template_counts.truncate(20);

	let manifest = json!({
	"generated_at": Utc::now().to_rfc3339(),
	"input": args.input.to_string_lossy(),
	"recipes": args.recipes.to_string_lossy(),
	"output": args.output.to_string_lossy(),
	"selected_templates": recipes.len(),
	"confidence": args.confidence,
	"min_count": args.min_count,
	"low_frequency_audit_max_count": args.audit_max_count,
	"low_frequency_blocking_warnings": [
	"ambiguous_no_episode_title",
	"encoding_noise_survived",
	"episode_version_missing_label",
	"episode_in_title",
	"generic_title_only",
	"hash_labeled",
	"multiple_title_spans",
	"no_title",
	"path_retained",
	"sxe_compact_unexpanded",
	"tech_in_title",
	"template_episode_missing_label",
	"template_sxe_missing_label"
	],
	"expand": args.expand,
	"sample_per_template": if args.expand == "sample" { Some(args.sample_per_template) } else { None },
	"stats": stats,
	"label_counts": label_counts,
	"top_template_counts": top_template_counts,
	"examples": examples,
	"skipped_music_audio_collection_examples": skipped_music_audio_collection_examples,
	"skipped_low_frequency_audit_warning_counts": skipped_low_frequency_audit_warning_counts,
	"skipped_low_frequency_audit_warning_examples": skipped_low_frequency_audit_warning_examples,
	"implementation": "rust_dmhy_template_apply"
	});
	fs::write(
	&args.manifest_output,
	serde_json::to_string_pretty(&manifest)?,
	)?;
	println!("{}", serde_json::to_string_pretty(&manifest)?);
	Ok(())
	}

	fn load_whitelists(args: &Args) -> Result<Whitelists> {
	Ok(Whitelists {
	title_phrases: load_title_whitelist(&args.title_whitelist)?,
	group_names: load_name_whitelist(&args.group_whitelist)?,
	})
	}

	fn load_title_whitelist(path: &PathBuf) -> Result<Vec<Vec<String>>> {
	let mut phrases = Vec::new();
	for line in load_whitelist_lines(path)? {
	let phrase = phrase_parts_for_whitelist(&line);
	if !phrase.is_empty() {
	phrases.push(phrase);
	}
	}
	Ok(phrases)
	}

	fn load_name_whitelist(path: &PathBuf) -> Result<HashSet<String>> {
	Ok(load_whitelist_lines(path)?
	.into_iter()
	.map(\|line\| normalize_whitelist_name(&line))
	.filter(\|line\| !line.is_empty())
	.collect())
	}

	fn load_whitelist_lines(path: &PathBuf) -> Result<Vec<String>> {
	if !path.exists() {
	return Ok(Vec::new());
	}
	let file =
	File::open(path).with_context(\|\| format!("failed to open whitelist {}", path.display()))?;
	let mut lines = Vec::new();
	for line in BufReader::new(file).lines() {
	let line = line?;
	let line = line.trim();
	if line.is_empty() \|\| line.starts_with('#') {
	continue;
	}
	let value = line
	.split_once('\t')
	.map(\|(_, value)\| value)
	.unwrap_or(line)
	.trim();
	if !value.is_empty() {
	lines.push(value.to_string());
	}
	}
	Ok(lines)
	}

	fn load_recipes(args: &Args) -> Result<HashMap<String, Recipe>> {
	let file = File::open(&args.recipes)
	.with_context(\|\| format!("recipe JSONL not found: {}", args.recipes.display()))?;
	let mut recipes = HashMap::new();
	for (line_number, line) in BufReader::new(file).lines().enumerate() {
	let line = line?;
	if line.trim().is_empty() {
	continue;
	}
	let row: Recipe = serde_json::from_str(&line).with_context(\|\| {
	format!(
	"invalid recipe JSON at {}:{}",
	args.recipes.display(),
	line_number + 1
	)
	})?;
	if !args.confidence.is_empty()
	&& row.confidence.as_deref() != Some(args.confidence.as_str())
	{
	continue;
	}
	if row.count.unwrap_or(0) < args.min_count {
	continue;
	}
	recipes.insert(row.template.clone(), row);
	if args
	.limit_templates
	.is_some_and(\|limit\| recipes.len() >= limit)
	{
	break;
	}
	}
	Ok(recipes)
	}

	fn load_input(path: &PathBuf, limit: Option<usize>) -> Result<Vec<String>> {
	let file =
	File::open(path).with_context(\|\| format!("input JSONL not found: {}", path.display()))?;
	let mut values = Vec::new();
	for (line_number, line) in BufReader::new(file).lines().enumerate() {
	if limit.is_some_and(\|limit\| values.len() >= limit) {
	break;
	}
	let line = line?;
	if line.trim().is_empty() {
	continue;
	}
	let row: Value = serde_json::from_str(&line)
	.with_context(\|\| format!("invalid JSON at {}:{}", path.display(), line_number + 1))?;
	if let Some(value) = row.get("value").and_then(Value::as_str) {
	let value = value.trim();
	if !value.is_empty() {
	values.push(value.to_string());
	}
	}
	}
	Ok(values)
	}

	fn run_cluster(args: &Args) -> Result<()> {
	let inputs = load_input(&args.input, args.limit)?;
	let source_rows = inputs.len();
	let mut clusters: HashMap<String, Cluster> = HashMap::new();
	let mut skipped_encoding_noise = 0usize;
	let mut trimmed_parent_path = 0usize;
	let mut total_rows = 0usize;

	for original in inputs {
	if !args.keep_encoding_noise
	&& (has_encoding_noise(&original)
	\|\| has_non_anime_noise(&original)
	\|\| has_music_collection_noise(&original)
	\|\| has_abstract_path_noise(&original))
	{
	skipped_encoding_noise += 1;
	continue;
	}
	let filename = if args.preserve_parent_paths {
	original
	} else {
	let (training_filename, was_trimmed) = training_filename_for(&original);
	if was_trimmed {
	trimmed_parent_path += 1;
	}
	training_filename
	};
	add_cluster(&mut clusters, &filename, args.examples);
	total_rows += 1;
	}

	let mut sorted_clusters: Vec<_> = clusters.into_iter().collect();
	sorted_clusters.sort_by(\|a, b\| b.1.count.cmp(&a.1.count).then_with(\|\| a.0.cmp(&b.0)));

	let cluster_rows: Vec<Value> = sorted_clusters
	.iter()
	.enumerate()
	.map(\|(index, (key, cluster))\| cluster_row(index + 1, key, cluster, total_rows))
	.collect();
	let samples: Vec<Value> = cluster_rows.iter().take(args.top).cloned().collect();
	let recipe_candidates: Vec<Value> =
	cluster_rows.iter().take(args.recipe_top).cloned().collect();
	let recipes: Vec<Value> = recipe_candidates
	.iter()
	.filter(\|row\| is_high_confidence_recipe(row, args.recipe_min_count))
	.map(\|row\| recipe_row(row, "high"))
	.collect();
	let review: Vec<Value> = recipe_candidates
	.iter()
	.filter(\|row\| !is_high_confidence_recipe(row, args.recipe_min_count))
	.take(args.review_top)
	.cloned()
	.collect();

	write_jsonl_values(&args.clusters_output, &cluster_rows)?;
	write_jsonl_values(&args.samples_output, &samples)?;
	write_jsonl_values(&args.recipes_output, &recipes)?;
	write_jsonl_values(&args.review_output, &review)?;

	let mut histogram: HashMap<usize, usize> = HashMap::new();
	for (_, cluster) in &sorted_clusters {
	*histogram.entry(cluster.count).or_default() += 1;
	}
	let mut count_histogram_top: Vec<_> = histogram.into_iter().collect();
	count_histogram_top.sort_by(\|a, b\| b.1.cmp(&a.1).then_with(\|\| a.0.cmp(&b.0)));
	count_histogram_top.truncate(20);

	let rows_covered_by_repeated_templates: usize = sorted_clusters
	.iter()
	.map(\|(_, cluster)\| cluster)
	.filter(\|cluster\| cluster.count as u64 >= args.min_count)
	.map(\|cluster\| cluster.count)
	.sum();
	let templates_at_least_min_count = sorted_clusters
	.iter()
	.filter(\|(_, cluster)\| cluster.count as u64 >= args.min_count)
	.count();
	let top_templates: Vec<Value> = cluster_rows.iter().take(20).cloned().collect();
	let summary = json!({
	"input": args.input.to_string_lossy(),
	"source_rows": source_rows,
	"skipped_encoding_noise": skipped_encoding_noise,
	"trimmed_parent_path": trimmed_parent_path,
	"total_rows": total_rows,
	"unique_templates": sorted_clusters.len(),
	"min_count": args.min_count,
	"templates_at_least_min_count": templates_at_least_min_count,
	"rows_covered_by_repeated_templates": rows_covered_by_repeated_templates,
	"rows_covered_by_repeated_templates_ratio": if total_rows == 0 { 0.0 } else { rows_covered_by_repeated_templates as f64 / total_rows as f64 },
	"top_output_rows": samples.len(),
	"clusters_output": args.clusters_output.to_string_lossy(),
	"cluster_rows": cluster_rows.len(),
	"recipes_output": args.recipes_output.to_string_lossy(),
	"recipe_rows": recipes.len(),
	"review_output": args.review_output.to_string_lossy(),
	"review_rows": review.len(),
	"recipe_top": args.recipe_top,
	"recipe_min_count": args.recipe_min_count,
	"top_templates": top_templates,
	"count_histogram_top": count_histogram_top,
	"implementation": "rust_dmhy_template_cluster",
	"generated_at": Utc::now().to_rfc3339(),
	});
	if let Some(parent) = args.summary_output.parent() {
	fs::create_dir_all(parent)?;
	}
	fs::write(
	&args.summary_output,
	serde_json::to_string_pretty(&summary)?,
	)?;
	println!("{}", serde_json::to_string_pretty(&summary)?);
	Ok(())
	}

	fn add_cluster(clusters: &mut HashMap<String, Cluster>, filename: &str, example_limit: usize) {
	let (key, tokens, classes, groups) = template_key_for_filename(filename);
	let cluster = clusters.entry(key).or_default();
	cluster.count += 1;
	if cluster.examples.len() < example_limit {
	cluster.examples.push(filename.to_string());
	}
	for (token, class_name) in tokens.iter().zip(classes.iter()) {
	*cluster.class_counts.entry(class_name.clone()).or_default() += 1;
	if matches!(class_name.as_str(), "TEXT" \| "BRACKET_TEXT") {
	let cleaned = strip_wrapper(token);
	if !cleaned.is_empty() {
	*cluster.literal_counts.entry(cleaned).or_default() += 1;
	}
	}
	}
	while cluster.position_literals.len() < groups.len() {
	cluster.position_literals.push(HashMap::new());
	}
	for (index, group) in groups.iter().enumerate() {
	if matches!(group.class_name.as_str(), "TEXT" \| "BRACKET_TEXT") {
	let text = group_text(&tokens, group);
	if !text.is_empty() {
	*cluster.position_literals[index].entry(text).or_default() += 1;
	}
	}
	}
	}

	fn cluster_row(rank: usize, key: &str, cluster: &Cluster, total: usize) -> Value {
	json!({
	"template_id": format!("tpl_{rank:06}"),
	"template": key,
	"count": cluster.count,
	"coverage": if total == 0 { 0.0 } else { cluster.count as f64 / total as f64 },
	"top_literals": top_counts(&cluster.literal_counts, 12),
	"suggested_roles": suggested_roles(key),
	"position_top_literals": cluster.position_literals.iter().map(\|counts\| top_counts(counts, 5)).collect::<Vec<_>>(),
	"class_counts": top_counts(&cluster.class_counts, 20),
	"examples": cluster.examples,
	})
	}

	fn top_counts(counts: &HashMap<String, usize>, limit: usize) -> Vec<(String, usize)> {
	let mut items: Vec<_> = counts
	.iter()
	.map(\|(key, count)\| (key.clone(), *count))
	.collect();
	items.sort_by(\|a, b\| b.1.cmp(&a.1).then_with(\|\| a.0.cmp(&b.0)));
	items.truncate(limit);
	items
	}

	fn is_high_confidence_recipe(row: &Value, min_count: usize) -> bool {
	if row.get("count").and_then(Value::as_u64).unwrap_or(0) < min_count as u64 {
	return false;
	}
	let roles = match row.get("suggested_roles").and_then(Value::as_array) {
	Some(roles) => roles,
	None => return false,
	};
	let role_strings: Vec<&str> = roles.iter().filter_map(Value::as_str).collect();
	if role_strings.iter().any(\|role\| role.contains("_OR_")) {
	return false;
	}
	if !role_strings.contains(&"TITLE")
	\|\| !role_strings.iter().any(\|role\| {
	role.starts_with("EPISODE") \|\| matches!(*role, "SPECIAL" \| "SOURCE" \| "RESOLUTION")
	})
	{
	return false;
	}
	let template = row.get("template").and_then(Value::as_str).unwrap_or("");
	if template.contains("BRACKET_TEXT BRACKET_TEXT") && !role_strings.contains(&"GROUP") {
	return false;
	}
	!role_strings.contains(&"TITLE_OR_TEXT")
	}

	fn recipe_row(row: &Value, confidence: &str) -> Value {
	json!({
	"template_id": row["template_id"],
	"template": row["template"],
	"roles": row["suggested_roles"],
	"confidence": confidence,
	"count": row["count"],
	"examples": row["examples"],
	})
	}

	fn write_jsonl_values(path: &PathBuf, rows: &[Value]) -> Result<()> {
	if let Some(parent) = path.parent() {
	fs::create_dir_all(parent)?;
	}
	let mut writer = BufWriter::new(File::create(path)?);
	for row in rows {
	serde_json::to_writer(&mut writer, row)?;
	writer.write_all(b"\n")?;
	}
	writer.flush()?;
	Ok(())
	}

	fn run_low_frequency_audit(args: &Args) -> Result<()> {
	let recipes = load_recipes(args)?;
	let inputs = load_input(&args.input, args.limit)?;
	let low_template_total = recipes
	.values()
	.filter(\|recipe\| recipe.count.unwrap_or(0) <= args.audit_max_count)
	.count();
	let mut seen_templates = HashSet::new();
	let mut rows = Vec::new();

	for original in inputs {
	if !args.keep_encoding_noise
	&& (has_encoding_noise(&original)
	\|\| has_non_anime_noise(&original)
	\|\| has_music_collection_noise(&original)
	\|\| has_abstract_path_noise(&original))
	{
	continue;
	}
	let (training_filename, trimmed_parent) = training_filename_for(&original);
	let (key, _tokens, _classes, groups) = template_key_for_filename(&training_filename);
	let Some(recipe) = recipes.get(&key) else {
	continue;
	};
	let count = recipe.count.unwrap_or(0);
	if count > args.audit_max_count \|\| !seen_templates.insert(recipe.template_id.clone()) {
	continue;
	}
	if recipe.roles.len() != groups.len() {
	continue;
	}
	let Some(mut record) = dmhy_record(&training_filename, &recipe.template_id, &recipe.roles)
	else {
	continue;
	};
	if trimmed_parent {
	record.source_filename = Some(original.clone());
	record.path_trimmed = Some(true);
	}
	rows.push(json!({
	"template_id": recipe.template_id,
	"count": count,
	"template": recipe.template,
	"filename": record.filename,
	"source_filename": record.source_filename,
	"path_trimmed": record.path_trimmed.unwrap_or(false),
	"spans": entity_spans(&record.tokens, &record.labels),
	"warnings": audit_warnings(&record),
	"tokens": record.tokens,
	"labels": record.labels,
	}));
	if seen_templates.len() >= low_template_total {
	break;
	}
	}

	rows.sort_by(\|a, b\| {
	let count_a = a.get("count").and_then(Value::as_u64).unwrap_or(0);
	let count_b = b.get("count").and_then(Value::as_u64).unwrap_or(0);
	let id_a = a.get("template_id").and_then(Value::as_str).unwrap_or("");
	let id_b = b.get("template_id").and_then(Value::as_str).unwrap_or("");
	count_a.cmp(&count_b).then_with(\|\| id_a.cmp(id_b))
	});
	write_jsonl_values(&args.audit_output, &rows)?;
	let warning_counts = warning_counts(&rows);
	let manifest = json!({
	"generated_at": Utc::now().to_rfc3339(),
	"input": args.input.to_string_lossy(),
	"recipes": args.recipes.to_string_lossy(),
	"audit_output": args.audit_output.to_string_lossy(),
	"audit_max_count": args.audit_max_count,
	"low_template_total": low_template_total,
	"audited_templates": rows.len(),
	"warning_counts": warning_counts,
	"implementation": "rust_dmhy_low_frequency_audit"
	});
	println!("{}", serde_json::to_string_pretty(&manifest)?);
	Ok(())
	}

	fn run_verify_generated_output(args: &Args) -> Result<()> {
	let file = File::open(&args.input)
	.with_context(\|\| format!("generated JSONL not found: {}", args.input.display()))?;
	let recipes_by_id: HashMap<String, u64> = load_recipes(args)?
	.into_values()
	.map(\|recipe\| (recipe.template_id, recipe.count.unwrap_or(0)))
	.collect();
	let mut rows = 0usize;
	let mut low_frequency_rows = 0usize;
	let mut warning_counts: HashMap<String, usize> = HashMap::new();
	let mut examples: HashMap<String, Vec<Value>> = HashMap::new();

	for (line_number, line) in BufReader::new(file).lines().enumerate() {
	let line = line?;
	if line.trim().is_empty() {
	continue;
	}
	let record: Record = serde_json::from_str(&line).with_context(\|\| {
	format!(
	"invalid generated record at {}:{}",
	args.input.display(),
	line_number + 1
	)
	})?;
	rows += 1;
	let count = recipes_by_id
	.get(&record.template_id)
	.copied()
	.unwrap_or(u64::MAX);
	if count > args.audit_max_count {
	continue;
	}
	low_frequency_rows += 1;
	for warning in audit_warnings(&record) {
	if !matches!(
	warning.as_str(),
	"ambiguous_no_episode_title"
	\| "encoding_noise_survived"
	\| "episode_version_missing_label"
	\| "episode_in_title"
	\| "generic_title_only"
	\| "hash_labeled"
	\| "multiple_title_spans"
	\| "no_title"
	\| "path_retained"
	\| "sxe_compact_unexpanded"
	\| "tech_in_title"
	\| "template_episode_missing_label"
	\| "template_sxe_missing_label"
	) {
	continue;
	}
	*warning_counts.entry(warning.clone()).or_default() += 1;
	let bucket = examples.entry(warning).or_default();
	if bucket.len() < 5 {
	bucket.push(json!({
	"template_id": record.template_id,
	"template_count": count,
	"filename": record.filename,
	"spans": entity_spans(&record.tokens, &record.labels),
	}));
	}
	}
	}

	let manifest = json!({
	"generated_at": Utc::now().to_rfc3339(),
	"input": args.input.to_string_lossy(),
	"recipes": args.recipes.to_string_lossy(),
	"audit_max_count": args.audit_max_count,
	"rows": rows,
	"low_frequency_rows": low_frequency_rows,
	"blocking_warning_counts": warning_counts,
	"examples": examples,
	"implementation": "rust_dmhy_generated_output_verify"
	});
	println!("{}", serde_json::to_string_pretty(&manifest)?);
	if !warning_counts.is_empty() {
	bail!("generated output still has low-frequency blocking warnings");
	}
	Ok(())
	}

	fn run_rich_annotations(args: &Args) -> Result<()> {
	let inputs = load_input(&args.input, args.limit)?;
	if let Some(parent) = args.rich_output.parent() {
	fs::create_dir_all(parent)?;
	}
	let rows: Vec<Value> = inputs
	.par_iter()
	.filter_map(\|original\| {
	if !args.keep_encoding_noise
	&& (has_encoding_noise(original)
	\|\| has_non_anime_noise(original)
	\|\| has_music_collection_noise(original)
	\|\| has_abstract_path_noise(original))
	{
	return None;
	}
	Some(rich_annotation_for(original))
	})
	.collect();
	let mut writer = BufWriter::new(File::create(&args.rich_output)?);
	for row in &rows {
	serde_json::to_writer(&mut writer, row)?;
	writer.write_all(b"\n")?;
	}
	writer.flush()?;
	let manifest = json!({
	"generated_at": Utc::now().to_rfc3339(),
	"input": args.input.to_string_lossy(),
	"rich_output": args.rich_output.to_string_lossy(),
	"rows": rows.len(),
	"implementation": "rust_dmhy_rich_annotations",
	"notes": [
	"rich roles are metadata for review/projection, not final training BIO labels",
	"TITLE_* candidates may be collapsed or filtered before dmhy_weak generation"
	]
	});
	println!("{}", serde_json::to_string_pretty(&manifest)?);
	Ok(())
	}

	fn rich_annotation_for(original: &str) -> Value {
	let (training_filename, path_trimmed) = training_filename_for(original);
	let parts: Vec<&str> = original
	.split(\|ch\| ch == '/' \|\| ch == '\\')
	.map(str::trim)
	.filter(\|part\| !part.is_empty())
	.collect();
	let leaf_index = parts.len().saturating_sub(1);
	let segments = parts
	.iter()
	.enumerate()
	.map(\|(index, segment)\| rich_segment(segment, index, index == leaf_index))
	.collect::<Vec<_>>();
	let projection = dmhy_record(
	&training_filename,
	"rich_projection",
	&suggested_roles(&template_key_for_filename(&training_filename).0),
	)
	.map(\|record\| {
	json!({
	"filename": record.filename,
	"spans": entity_spans(&record.tokens, &record.labels),
	"warnings": audit_warnings(&record),
	})
	});
	json!({
	"source_filename": original,
	"training_filename": training_filename,
	"path_trimmed": path_trimmed,
	"segments": segments,
	"projection_preview": projection,
	})
	}

	fn rich_segment(segment: &str, index: usize, is_leaf: bool) -> Value {
	let (key, tokens, _classes, groups) = template_key_for_filename(segment);
	let suggested = suggested_roles(&key);
	let roles = adjust_contextual_roles(&tokens, &groups, &suggested);
	let roles = refine_semantic_roles(&tokens, &groups, &roles);
	let candidates = rich_candidates_for_segment(segment, &tokens, &groups, &roles, is_leaf);
	json!({
	"index": index,
	"text": segment,
	"kind": rich_segment_kind(segment, is_leaf),
	"template": key,
	"candidates": candidates,
	})
	}

	fn rich_segment_kind(segment: &str, is_leaf: bool) -> &'static str {
	if path_segment_is_media_noise(segment) {
	"media_noise"
	} else if path_segment_is_plain_season(segment) {
	"season_dir"
	} else if is_leaf {
	"leaf"
	} else {
	"parent"
	}
	}

	fn rich_candidates_for_segment(
	segment: &str,
	tokens: &[String],
	groups: &[Group],
	roles: &[String],
	is_leaf: bool,
	) -> Vec<Value> {
	let mut output = Vec::new();
	let title_ranges = title_candidates(groups, roles);
	for (candidate_index, (start, end)) in title_ranges.iter().copied().enumerate() {
	let text = candidate_text(tokens, groups, start, end);
	if text.trim().is_empty() {
	continue;
	}
	output.push(json!({
	"role": fine_title_role_for_candidate(&roles, start, end)
	.unwrap_or_else(\|\| fine_title_role(segment, &text, is_leaf, candidate_index, title_ranges.len()).to_string()),
	"coarse_role": "TITLE",
	"text": text,
	"group_start": start,
	"group_end": end,
	}));
	}
	for (group_index, role) in roles.iter().enumerate() {
	if is_title_role(role) \|\| role == "O" \|\| role == "HASH" {
	continue;
	}
	let text = group_text(tokens, &groups[group_index]);
	if text.trim().is_empty() {
	continue;
	}
	let coarse_role = role_label(role)
	.strip_prefix("B-")
	.map(str::to_string)
	.unwrap_or_else(\|\| "O".to_string());
	output.push(json!({
	"role": fine_non_title_role(role),
	"coarse_role": coarse_role,
	"text": text,
	"group_start": group_index,
	"group_end": group_index + 1,
	}));
	}
	output
	}

	fn fine_title_role_for_candidate(roles: &[String], start: usize, end: usize) -> Option<String> {
	let mut entities: Vec<&str> = roles[start..end]
	.iter()
	.filter_map(\|role\| title_entity_from_role(role))
	.filter(\|entity\| *entity != "TITLE")
	.collect();
	entities.sort();
	entities.dedup();
	match entities.len() {
	0 => None,
	1 => Some(entities[0].to_string()),
	_ => Some("TITLE_MIXED".to_string()),
	}
	}

	fn candidate_text(tokens: &[String], groups: &[Group], start: usize, end: usize) -> String {
	let Some(first) = groups.get(start).and_then(\|group\| group.indices.first()) else {
	return String::new();
	};
	let Some(last) = groups
	.get(end.saturating_sub(1))
	.and_then(\|group\| group.indices.last())
	else {
	return String::new();
	};
	strip_wrapper(&tokens[first..=last].join(""))
	}

	fn fine_title_role(
	segment: &str,
	text: &str,
	is_leaf: bool,
	candidate_index: usize,
	candidate_count: usize,
	) -> &'static str {
	let cleaned = text.trim();
	if VERSIONISH_TITLE_RE.is_match(cleaned) {
	return "RELEASE_VERSION";
	}
	if matches!(
	cleaned.to_ascii_lowercase().as_str(),
	"国漫" \| "國漫" \| "anime" \| "movie" \| "movies"
	) {
	return "TITLE_CATEGORY";
	}
	if is_leaf && path_segment_starts_with_episode(segment) {
	return "EPISODE_TITLE";
	}
	if !is_leaf {
	return "PATH_TITLE";
	}
	if candidate_count > 1 && candidate_index > 0 {
	return "TITLE_ALIAS";
	}
	"TITLE_MAIN"
	}

	fn fine_non_title_role(role: &str) -> &'static str {
	match role {
	"GROUP" => "RELEASE_GROUP",
	"EPISODE" \| "EPISODE_VERSION" \| "EPISODE_RANGE" => "EPISODE",
	"SEASON" => "SEASON",
	"PATH_SEASON" => "PATH_SEASON",
	"TAG" => "TAG",
	"SPECIAL" \| "VOLUME" => "SPECIAL",
	"RESOLUTION" => "RESOLUTION",
	"SOURCE" => "SOURCE",
	_ => "OTHER",
	}
	}

	fn entity_spans(tokens: &[String], labels: &[String]) -> Vec<Value> {
	let mut spans = Vec::new();
	let mut current_label: Option<String> = None;
	let mut current_text = String::new();
	for (token, label) in tokens.iter().zip(labels.iter()) {
	let entity = label
	.strip_prefix("B-")
	.or_else(\|\| label.strip_prefix("I-"))
	.unwrap_or("O");
	if current_label.as_deref() == Some(entity) {
	current_text.push_str(token);
	continue;
	}
	if let Some(label) = current_label.take() {
	if label != "O" {
	spans.push(json!({ "label": label, "text": current_text }));
	}
	}
	current_label = Some(entity.to_string());
	current_text = token.clone();
	}
	if let Some(label) = current_label {
	if label != "O" {
	spans.push(json!({ "label": label, "text": current_text }));
	}
	}
	spans
	}

	fn audit_warnings(record: &Record) -> Vec<String> {
	let mut warnings = Vec::new();
	let title_texts = title_entity_texts(&record.tokens, &record.labels);
	let title_spans = title_texts.len();
	if title_spans == 0 {
	warnings.push("no_title".to_string());
	} else if repeated_title_entity_spans(&record.labels) {
	warnings.push("multiple_title_spans".to_string());
	}
	if !title_texts.is_empty() && title_texts.iter().all(\|title\| generic_title_text(title)) {
	warnings.push("generic_title_only".to_string());
	}
	if title_texts.iter().any(\|title\| technical_title_text(title)) {
	warnings.push("tech_in_title".to_string());
	}
	if title_texts.iter().any(\|title\| episodeish_title_text(title)) {
	warnings.push("episode_in_title".to_string());
	}
	let has_episode = record.labels.iter().any(\|label\| label.ends_with("EPISODE"));
	let has_season = record.labels.iter().any(\|label\| label.ends_with("SEASON"));
	let has_special = record.labels.iter().any(\|label\| label.ends_with("SPECIAL"));
	if !has_episode {
	warnings.push("no_episode".to_string());
	if record.template.contains("EPISODE") && !has_special {
	warnings.push("template_episode_missing_label".to_string());
	}
	if record
	.dropped_title_candidate_positions
	.as_ref()
	.is_some_and(\|dropped\| !dropped.is_empty())
	{
	warnings.push("ambiguous_no_episode_title".to_string());
	}
	}
	if record.template.contains("SXE") && (!has_season \|\| !has_episode) {
	warnings.push("template_sxe_missing_label".to_string());
	}
	if record.filename.contains('/') \|\| record.filename.contains('\\') {
	warnings.push("path_retained".to_string());
	}
	if has_encoding_noise(&record.filename)
	\|\| record
	.source_filename
	.as_ref()
	.is_some_and(\|source\| has_encoding_noise(source))
	{
	warnings.push("encoding_noise_survived".to_string());
	}
	for (index, token) in record.tokens.iter().enumerate() {
	let entity = record
	.labels
	.get(index)
	.and_then(\|label\| label_entity(label));
	let cleaned = strip_wrapper(token);
	if HASH_RE.is_match(token) && record.labels.get(index).is_some_and(\|label\| label != "O") {
	warnings.push("hash_labeled".to_string());
	break;
	}
	if EPISODE_VERSION_RE.is_match(&compact_for_classify(&cleaned)) && entity != Some("EPISODE")
	{
	warnings.push("episode_version_missing_label".to_string());
	}
	if SXE_VALUE_RE.is_match(&cleaned) && entity != Some("EPISODE") && entity != Some("SEASON")
	{
	warnings.push("sxe_compact_unexpanded".to_string());
	}
	}
	warnings.sort();
	warnings.dedup();
	warnings
	}

	fn label_entity(label: &str) -> Option<&str> {
	label
	.strip_prefix("B-")
	.or_else(\|\| label.strip_prefix("I-"))
	}

	fn title_entity_texts(tokens: &[String], labels: &[String]) -> Vec<String> {
	let mut spans = Vec::new();
	let mut current = String::new();
	let mut current_entity: Option<String> = None;
	for (token, label) in tokens.iter().zip(labels.iter()) {
	let entity = label_entity(label).filter(\|entity\| is_title_entity(entity));
	if entity.is_some() && current_entity.as_deref() == entity {
	current.push_str(token);
	} else {
	if !current.trim().is_empty() {
	spans.push(current.trim().to_string());
	}
	current.clear();
	current_entity = entity.map(str::to_string);
	if entity.is_some() {
	current.push_str(token);
	}
	}
	}
	if !current.trim().is_empty() {
	spans.push(current.trim().to_string());
	}
	spans
	}

	fn repeated_title_entity_spans(labels: &[String]) -> bool {
	let mut seen = HashSet::new();
	let mut previous: Option<String> = None;
	for label in labels {
	let entity = label_entity(label)
	.filter(\|entity\| is_title_entity(entity))
	.map(str::to_string);
	if entity.is_some() && entity != previous {
	let entity = entity.clone().unwrap();
	if !seen.insert(entity) {
	return true;
	}
	}
	previous = entity;
	}
	false
	}

	fn generic_title_text(text: &str) -> bool {
	matches!(
	text.trim().to_ascii_lowercase().as_str(),
	"tv" \| "movie"
	\| "mov"
	\| "sample"
	\| "commercial"
	\| "commercials"
	\| "cm"
	\| "pv"
	\| "op"
	\| "ed"
	\| "ncop"
	\| "nced"
	\| "menu"
	\| "trailer"
	\| "spot"
	\| "bdmv"
	\| "stream"
	)
	}

	fn technical_title_text(text: &str) -> bool {
	let normalized = text.to_ascii_lowercase();
	normalized.contains("bdrip")
	\|\| normalized.contains("webrip")
	\|\| normalized.contains("web-dl")
	\|\| normalized.contains("hevc")
	\|\| normalized.contains("x264")
	\|\| normalized.contains("x265")
	\|\| normalized.contains("aac")
	\|\| normalized.contains("flac")
	\|\| normalized.contains("sourceunknown")
	}

	fn episodeish_title_text(text: &str) -> bool {
	let trimmed = text.trim();
	EPISODE_VALUE_RE.is_match(trimmed)
	\|\| EPISODE_CJK_RE.is_match(trimmed)
	\|\| EPISODE_RANGE_RE.is_match(trimmed)
	\|\| trimmed.chars().all(\|ch\| ch.is_ascii_digit())
	}

	fn warning_counts(rows: &[Value]) -> HashMap<String, usize> {
	let mut counts = HashMap::new();
	for row in rows {
	if let Some(warnings) = row.get("warnings").and_then(Value::as_array) {
	for warning in warnings {
	if let Some(warning) = warning.as_str() {
	*counts.entry(warning.to_string()).or_default() += 1;
	}
	}
	}
	}
	counts
	}

	fn process_filename(
	original: &str,
	args: &Args,
	recipes: &HashMap<String, Recipe>,
	sample_counters: &HashMap<String, AtomicUsize>,
	) -> Processed {
	if !args.keep_encoding_noise && has_music_collection_noise(original) {
	return Processed::Skipped {
	reason: "music_audio_collection",
	trimmed_parent: false,
	example: Some(original.to_string()),
	warnings: Vec::new(),
	};
	}
	if !args.keep_encoding_noise
	&& (has_encoding_noise(original)
	\|\| has_non_anime_noise(original)
	\|\| has_abstract_path_noise(original))
	{
	return Processed::Skipped {
	reason: "encoding_noise",
	trimmed_parent: false,
	example: None,
	warnings: Vec::new(),
	};
	}
	let (training_filename, trimmed_parent) = training_filename_for(original);
	let (key, _tokens, _classes, groups) = template_key_for_filename(&training_filename);
	let recipe = match recipes.get(&key) {
	Some(recipe) => recipe,
	None => {
	return Processed::Skipped {
	reason: "no_recipe",
	trimmed_parent,
	example: None,
	warnings: Vec::new(),
	}
	}
	};
	if args.expand == "sample" {
	let counter = sample_counters.get(&recipe.template_id).unwrap();
	if counter.fetch_add(1, Ordering::Relaxed) >= args.sample_per_template {
	return Processed::Skipped {
	reason: "sample_cap",
	trimmed_parent,
	example: None,
	warnings: Vec::new(),
	};
	}
	}
	if recipe.roles.len() != groups.len() {
	return Processed::Skipped {
	reason: "role_mismatch",
	trimmed_parent,
	example: None,
	warnings: Vec::new(),
	};
	}
	let mut record = match dmhy_record(&training_filename, &recipe.template_id, &recipe.roles) {
	Some(record) => record,
	None => {
	return Processed::Skipped {
	reason: "role_mismatch",
	trimmed_parent,
	example: None,
	warnings: Vec::new(),
	}
	}
	};
	let warnings = audit_warnings(&record);
	if warnings.iter().any(\|warning\| warning == "no_title") \|\| has_blocking_warnings(&warnings) {
	return Processed::Skipped {
	reason: "low_frequency_audit_warning",
	trimmed_parent,
	example: Some(record.filename.clone()),
	warnings,
	};
	}
	if trimmed_parent {
	record.source_filename = Some(original.to_string());
	record.path_trimmed = Some(true);
	return Processed::Written {
	record,
	trimmed_parent: true,
	};
	}
	Processed::Written {
	record,
	trimmed_parent: false,
	}
	}

	fn has_blocking_warnings(warnings: &[String]) -> bool {
	warnings.iter().any(\|warning\| {
	matches!(
	warning.as_str(),
	"ambiguous_no_episode_title"
	\| "encoding_noise_survived"
	\| "episode_version_missing_label"
	\| "episode_in_title"
	\| "generic_title_only"
	\| "hash_labeled"
	\| "multiple_title_spans"
	\| "no_title"
	\| "path_retained"
	\| "sxe_compact_unexpanded"
	\| "tech_in_title"
	\| "template_episode_missing_label"
	\| "template_sxe_missing_label"
	)
	})
	}

	fn tokenize(value: &str) -> Vec<String> {
	let mut output = Vec::new();
	let mut index = 0;
	while index < value.len() {
	let rest = &value[index..];
	if let Some((token, len)) = next_token(rest) {
	output.push(token);
	index += len;
	} else {
	let ch = rest.chars().next().unwrap();
	output.push(ch.to_string());
	index += ch.len_utf8();
	}
	}
	output
	}

	fn next_token(rest: &str) -> Option<(String, usize)> {
	let first = rest.chars().next()?;
	if first == '[' {
	if let Some(end) = rest.find(']') {
	if end <= 121 {
	return Some((rest[..=end].to_string(), end + 1));
	}
	}
	}
	if first == '(' {
	if let Some(end) = rest.find(')') {
	if end <= 121 {
	return Some((rest[..=end].to_string(), end + 1));
	}
	}
	}
	if first == '【' {
	if let Some(end) = rest.find('】') {
	if rest[..end].chars().count() <= 120 {
	return Some((
	rest[..end + '】'.len_utf8()].to_string(),
	end + '】'.len_utf8(),
	));
	}
	}
	}
	for re in TOKEN_REGEXES.iter() {
	if let Some(mat) = re.find(rest) {
	if mat.start() == 0 && mat.end() > 0 {
	return Some((mat.as_str().to_string(), mat.end()));
	}
	}
	}
	None
	}

	fn strip_wrapper(token: &str) -> String {
	let chars: Vec<char> = token.chars().collect();
	if chars.len() >= 2 {
	let first = chars[0];
	let last = chars[chars.len() - 1];
	if (first == '[' && last == ']')
	\|\| (first == '(' && last == ')')
	\|\| (first == '【' && last == '】')
	{
	return chars[1..chars.len() - 1]
	.iter()
	.collect::<String>()
	.trim()
	.to_string();
	}
	}
	token.trim().to_string()
	}

	fn split_inner(inner: &str) -> Vec<String> {
	let mut parts = Vec::new();
	let mut current = String::new();
	for ch in inner.chars() {
	if ch.is_whitespace() \|\| "_.,+/&\|-()（）".contains(ch) {
	if !current.is_empty() {
	parts.push(std::mem::take(&mut current));
	}
	} else {
	current.push(ch);
	}
	}
	if !current.is_empty() {
	parts.push(current);
	}
	parts
	}

	fn compact_for_classify(text: &str) -> String {
	text.chars()
	.filter(\|ch\| !ch.is_whitespace() && !matches!(ch, '_' \| '.' \| ',' \| '-'))
	.collect()
	}

	fn classify_atom(text: &str) -> String {
	let cleaned = strip_wrapper(text);
	let compact = compact_for_classify(&cleaned);
	if cleaned.is_empty() {
	return "EMPTY".to_string();
	}
	if HASH_RE.is_match(&cleaned) {
	return "HASH".to_string();
	}
	if RESOLUTION_RE.is_match(&cleaned) {
	return "RESOLUTION".to_string();
	}
	if DATE_RE.is_match(&cleaned)
	\|\| DATE_RANGE_MIXED_RE.is_match(&cleaned)
	\|\| CJK_DATE_RE.is_match(&cleaned)
	{
	return "DATE".to_string();
	}
	if EPISODE_VERSION_RE.is_match(&compact) {
	return "EPISODE_VERSION".to_string();
	}
	if EPISODE_WITH_SUFFIX_RE.is_match(&cleaned) {
	return "EPISODE_VERSION".to_string();
	}
	if SXE_RE.is_match(&compact) {
	return "SXE".to_string();
	}
	if EPISODE_RE.is_match(&compact) {
	return "EPISODE".to_string();
	}
	if EPISODE_CJK_RE.is_match(&cleaned) {
	return "EPISODE".to_string();
	}
	if EPISODE_BATCH_RE.is_match(&cleaned) {
	return "EPISODE_RANGE".to_string();
	}
	if EPISODE_RANGE_RE.is_match(&cleaned) {
	return "EPISODE_RANGE".to_string();
	}
	if EPISODE_RE.is_match(&cleaned) {
	return "EPISODE".to_string();
	}
	if SEASON_RE.is_match(&cleaned) {
	return "SEASON".to_string();
	}
	if SPECIAL_RE.is_match(&cleaned) {
	return "SPECIAL".to_string();
	}
	if VOLUME_RE.is_match(&cleaned) {
	return "VOLUME".to_string();
	}
	if LANG_RE.is_match(&cleaned) \|\| lang_block_matches(&cleaned) {
	return "LANG".to_string();
	}
	if MEDIA_RE.is_match(&cleaned) {
	return "MEDIA".to_string();
	}
	"TEXT".to_string()
	}

	fn lang_block_matches(text: &str) -> bool {
	let upper = text.to_ascii_uppercase();
	if ["CHS", "CHT", "ZHS", "ZHT", "BIG5"]
	.iter()
	.any(\|marker\| upper.contains(marker))
	{
	return true;
	}
	if upper.contains("GB") {
	return true;
	}
	if [
	"简繁", "简日", "繁日", "简体", "繁体", "雙語", "双语", "内封", "外挂",
	]
	.iter()
	.any(\|marker\| text.contains(marker))
	{
	return true;
	}
	let chars: Vec<char> = text.chars().collect();
	chars.windows(2).enumerate().any(\|(index, pair)\| {
	pair[0] == '字' && pair[1] == '幕' && !matches!(chars.get(index + 2), Some('组' \| '組'))
	})
	}

	fn classify_token(token: &str) -> String {
	if token.is_empty() {
	return "EMPTY".to_string();
	}
	if token.chars().all(char::is_whitespace) {
	return "SPACE".to_string();
	}
	if token.chars().all(\|ch\| ch == '/' \|\| ch == '\\') {
	return "PATH".to_string();
	}
	if token.chars().all(\|ch\| "-_.:：+&\|".contains(ch)) {
	return "SEP".to_string();
	}
	if token.starts_with('[') \|\| token.starts_with('(') \|\| token.starts_with('【') {
	let inner = strip_wrapper(token);
	let parts = split_inner(&inner);
	let whole_class = classify_atom(&inner);
	let inner_class = if whole_class != "TEXT" {
	if whole_class == "LANG" && parts.len() > 1 {
	let part_classes: Vec<String> =
	parts.iter().map(\|part\| classify_atom(part)).collect();
	if part_classes.iter().all(\|item\| item == &part_classes[0]) {
	part_classes[0].clone()
	} else if part_classes.iter().all(\|item\| is_media_block_class(item)) {
	"MEDIA_BLOCK".to_string()
	} else {
	whole_class
	}
	} else {
	whole_class
	}
	} else if parts.is_empty() {
	"EMPTY".to_string()
	} else {
	let part_classes: Vec<String> = parts.iter().map(\|part\| classify_atom(part)).collect();
	if part_classes.iter().all(\|item\| item == &part_classes[0]) {
	part_classes[0].clone()
	} else if part_classes.iter().all(\|item\| is_media_block_class(item)) {
	"MEDIA_BLOCK".to_string()
	} else if part_classes.iter().any(\|item\| is_media_block_class(item))
	&& parts.iter().zip(part_classes.iter()).all(\|(part, item)\| {
	is_media_block_class(item)
	\|\| matches!(part.to_ascii_lowercase().as_str(), "anime" \| "アニメ")
	})
	{
	"MEDIA_BLOCK".to_string()
	} else if part_classes.iter().any(\|item\| item == "TEXT") {
	"TEXT".to_string()
	} else {
	let mut set: Vec<String> = part_classes
	.into_iter()
	.collect::<HashSet<_>>()
	.into_iter()
	.collect();
	set.sort();
	set.join("_")
	}
	};
	return format!("BRACKET_{inner_class}");
	}
	classify_atom(token)
	}

	fn is_media_block_class(value: &str) -> bool {
	matches!(value, "MEDIA" \| "RESOLUTION" \| "LANG" \| "HASH" \| "DATE")
	}

	fn compact_token_groups(_tokens: &[String], classes: &[String]) -> Vec<Group> {
	let mut groups: Vec<Group> = Vec::new();
	let mut previous: Option<String> = None;
	for (index, token_class) in classes.iter().enumerate() {
	let current = if token_class == "SPACE" {
	"SEP"
	} else {
	token_class
	}
	.to_string();
	if previous.as_deref() == Some(current.as_str())
	&& matches!(current.as_str(), "SEP" \| "TEXT")
	{
	groups.last_mut().unwrap().indices.push(index);
	} else {
	groups.push(Group {
	indices: vec![index],
	class_name: current.clone(),
	});
	}
	previous = Some(current);
	}
	groups
	}

	fn template_key_for_filename(filename: &str) -> (String, Vec<String>, Vec<String>, Vec<Group>) {
	let tokens = tokenize(filename);
	let classes: Vec<String> = tokens.iter().map(\|token\| classify_token(token)).collect();
	let groups = compact_token_groups(&tokens, &classes);
	let key = groups
	.iter()
	.map(\|group\| group.class_name.as_str())
	.collect::<Vec<_>>()
	.join(" ");
	(key, tokens, classes, groups)
	}

	fn suggested_roles(template: &str) -> Vec<String> {
	let items: Vec<&str> = template.split_whitespace().collect();
	let mut roles = vec!["O".to_string(); items.len()];
	let mut segment_starts = vec![0usize];
	for (index, item) in items.iter().enumerate() {
	if *item == "PATH" {
	segment_starts.push(index + 1);
	}
	}
	for (index, item) in items.iter().enumerate() {
	roles[index] = if item.contains("EPISODE_VERSION") {
	"EPISODE_VERSION"
	} else if item.contains("EPISODE_RANGE") {
	"EPISODE_RANGE"
	} else if item.contains("EPISODE") \|\| item.contains("SXE") {
	"EPISODE"
	} else if item.contains("RESOLUTION") {
	"RESOLUTION"
	} else if item.contains("HASH") {
	"HASH"
	} else if item.contains("LANG") \|\| item.contains("MEDIA") {
	"SOURCE"
	} else if item.contains("SPECIAL") {
	"SPECIAL"
	} else if item.contains("SEASON") {
	"SEASON"
	} else if item.contains("VOLUME") {
	"VOLUME"
	} else {
	"O"
	}
	.to_string();
	}
	for (offset, start) in segment_starts.iter().enumerate() {
	let end = if offset + 1 < segment_starts.len() {
	segment_starts[offset + 1] - 1
	} else {
	items.len()
	};
	if *start >= end {
	continue;
	}
	let first_structural = (*start..end)
	.find(\|&index\| {
	items[index].contains("EPISODE")
	\|\| matches!(items[index], "SXE" \| "SPECIAL" \| "SEASON")
	})
	.unwrap_or(end);
	let bracket_text: Vec<usize> = (*start..first_structural)
	.filter(\|&index\| items[index] == "BRACKET_TEXT" && roles[index] == "O")
	.collect();
	let text: Vec<usize> = (*start..first_structural)
	.filter(\|&index\| items[index] == "TEXT" && roles[index] == "O")
	.collect();
	if bracket_text.len() >= 2 {
	roles[bracket_text[0]] = "GROUP".to_string();
	for index in bracket_text.iter().skip(1) {
	roles[*index] = "TITLE".to_string();
	}
	} else if bracket_text.len() == 1 {
	roles[bracket_text[0]] = if text.is_empty() {
	"TITLE"
	} else if bracket_text[0] == *start {
	"GROUP"
	} else {
	"TITLE"
	}
	.to_string();
	}
	for index in text {
	roles[index] = "TITLE".to_string();
	}
	if !roles[*start..end].iter().any(\|role\| role == "TITLE")
	&& !items[*start..end].is_empty()
	&& items[*start].contains("EPISODE")
	{
	let mut run = Vec::new();
	for index in (*start + 1)..end {
	if items[index] == "TEXT" && roles[index] == "O" {
	run.push(index);
	continue;
	}
	if items[index] == "SEP" {
	continue;
	}
	if !run.is_empty() {
	break;
	}
	}
	if run.len() >= 2 {
	for index in run {
	roles[index] = "TITLE".to_string();
	}
	}
	}
	}
	roles
	}

	fn refine_semantic_roles(tokens: &[String], groups: &[Group], roles: &[String]) -> Vec<String> {
	let mut output = roles.to_vec();
	let mut segment_end = groups
	.iter()
	.position(\|group\| group.class_name == "PATH")
	.unwrap_or(groups.len());
	let mut is_path_segment = segment_end < groups.len();

	for index in 0..groups.len() {
	if groups[index].class_name == "PATH" {
	segment_end = groups[index + 1..]
	.iter()
	.position(\|group\| group.class_name == "PATH")
	.map(\|offset\| index + 1 + offset)
	.unwrap_or(groups.len());
	is_path_segment = segment_end < groups.len();
	continue;
	}

	let text = group_text(tokens, &groups[index]);
	let bracketed = is_bracket_group(&groups[index]);
	if is_category_tag_text(&text, bracketed, is_path_segment)
	&& matches!(output[index].as_str(), "O" \| "TITLE" \| "GROUP" \| "SPECIAL")
	{
	output[index] = "TAG".to_string();
	continue;
	}

	if output[index] == "SEASON" && is_path_segment {
	output[index] = "PATH_SEASON".to_string();
	continue;
	}

	if output[index] == "TITLE" {
	output[index] = title_role_for_text(&text, is_path_segment);
	}
	}
	output
	}

	fn filename_has_title(filename: &str) -> bool {
	let (key, _, _, _) = template_key_for_filename(filename);
	suggested_roles(&key).iter().any(\|role\| is_title_role(role))
	}

	fn training_filename_for(original: &str) -> (String, bool) {
	let parts: Vec<&str> = original
	.split(\|ch\| ch == '/' \|\| ch == '\\')
	.map(str::trim)
	.filter(\|part\| !part.is_empty())
	.collect();
	if parts.len() >= 2
	&& (path_segment_is_episodeish(parts[parts.len() - 1])
	\|\| (!path_segment_is_plain_season(parts[parts.len() - 2])
	&& path_segment_starts_with_episode(parts[parts.len() - 1])
	&& !leaf_has_full_title_after_episode(parts[parts.len() - 1])))
	{
	if let Some(parent) = parts[..parts.len() - 1].iter().rev().find(\|part\| {
	let trimmed = trim_parent_title_segment(part);
	filename_has_title(&trimmed) && !path_segment_is_media_noise(&trimmed)
	}) {
	let parent = trim_parent_title_segment(parent.trim());
	return (
	format!("{} {}", parent, parts[parts.len() - 1].trim()),
	true,
	);
	}
	}
	if parts.len() >= 2 && filename_has_title(parts[parts.len() - 1]) {
	if path_segment_has_season(parts[parts.len() - 2]) {
	if !path_segment_is_plain_season(parts[parts.len() - 2]) {
	return (parts[parts.len() - 1].to_string(), true);
	}
	let parent_seasons = path_segment_seasons(parts[parts.len() - 2]);
	let leaf_seasons = path_segment_seasons(parts[parts.len() - 1]);
	if parent_seasons
	.iter()
	.any(\|season\| leaf_seasons.contains(season))
	{
	(parts[parts.len() - 1].to_string(), true)
	} else {
	(
	format!(
	"{} {}",
	parts[parts.len() - 2].trim(),
	parts[parts.len() - 1].trim()
	),
	true,
	)
	}
	} else {
	(parts[parts.len() - 1].to_string(), true)
	}
	} else {
	(original.to_string(), false)
	}
	}

	fn path_segment_is_plain_season(segment: &str) -> bool {
	let cleaned = strip_wrapper(segment).trim().to_string();
	PLAIN_SEASON_SEGMENT_RE.is_match(&cleaned)
	}

	fn trim_terminal_series_kind(segment: &str) -> String {
	let mut output = segment.trim().to_string();
	for suffix in ["_TV", ".TV", " TV", "_tv", ".tv", " tv"] {
	if output.ends_with(suffix) {
	output.truncate(output.len() - suffix.len());
	return output.trim_end_matches(['_', '.', ' ']).to_string();
	}
	}
	output
	}

	fn trim_parent_title_segment(segment: &str) -> String {
	let mut output = trim_terminal_series_kind(segment);
	loop {
	let trimmed = output.trim_end();
	let Some(last) = trimmed.chars().next_back() else {
	return output;
	};
	let open = match last {
	')' => '(',
	']' => '[',
	'】' => '【',
	_ => return output,
	};
	let Some(start) = trimmed.rfind(open) else {
	return output;
	};
	let suffix = &trimmed[start..];
	if path_segment_is_media_noise(suffix) {
	output.truncate(start);
	output = output.trim_end_matches([' ', '_', '.', '-']).to_string();
	continue;
	}
	return output;
	}
	}

	fn path_segment_has_season(value: &str) -> bool {
	PATH_SEGMENT_SEASON_RE.is_match(value)
	}

	fn path_segment_seasons(value: &str) -> HashSet<u8> {
	SEASON_WORD_NUMBER_RE
	.captures_iter(value)
	.chain(S_NUMBER_SEGMENT_RE.captures_iter(value))
	.chain(SXE_SEASON_RE.captures_iter(value))
	.filter_map(\|captures\| captures.get(1))
	.filter_map(\|item\| item.as_str().parse::<u8>().ok())
	.collect()
	}

	fn has_encoding_noise(value: &str) -> bool {
	if value.contains('\u{fffd}') {
	return true;
	}
	let markers = [
	"譁", "蜈", "螟", "蟄", "謇", "邱", "荳", "縺", "繧", "莨", "鬆", "髯", "瀛", "楀", "箷",
	"绲", "刔", "鏃", "湪", "鏍", "犲", "儚", "鐗", "吀", "铦", "躲", "伄", "椋", "伓", "姘",
	"帽", "娆", "洖", "浜", "堝", "澶", "湴", "鐒", "銇", "銈", "銉", "偅", "偗", "儱", "儫",
	"兗", "仧", "鏉变", "鍠靛", "銉熴", "銈︺", "瀵掕", "潐楦", "常涔", "涓歖", "缁堟", "湯鍒",
	"瀵诲", "線浣", "曟柟", "瓒呴", "绁炪", "偘銉", "兇銈", "銉砡", "銉砕", "杩风", "硦澶",
	"銇淬", "仧銉", "銉嗐", "偅銈", "銈躲",
	];
	let marker_hits = markers
	.iter()
	.map(\|marker\| value.matches(marker).count())
	.sum::<usize>();
	let halfwidth_hits = value
	.chars()
	.filter(\|ch\| ('\u{ff61}'..='\u{ff9f}').contains(ch))
	.count();
	let latin_mojibake = value.split_whitespace().any(\|part\| {
	part.chars()
	.any(\|ch\| matches!(ch, '帽' \| '茅' \| '脳' \| '锛'))
	&& part.chars().any(\|ch\| ch.is_ascii_alphabetic())
	});
	marker_hits >= 2 \|\| (marker_hits >= 1 && halfwidth_hits >= 1) \|\| latin_mojibake
	}

	fn has_non_anime_noise(value: &str) -> bool {
	let normalized = value.replace('\\', "/").trim().to_ascii_lowercase();
	normalized == "mtv"
	\|\| normalized.starts_with("mtv/")
	\|\| normalized.contains("/mtv/")
	\|\| value.contains("[旅游")
	\|\| value.contains("[旅游番")
	\|\| normalized.contains("tokyo deep")
	\|\| value.contains("日本不思议铁路之旅")
	\|\| value.contains("ニッポンぶらり鉄道旅")
	}

	fn normalized_path_segment(value: &str) -> String {
	value
	.split_whitespace()
	.collect::<String>()
	.to_ascii_lowercase()
	}

	fn normalized_tag_text(value: &str) -> String {
	value
	.replace(['_', '.', '-', '・'], " ")
	.split_whitespace()
	.collect::<Vec<_>>()
	.join(" ")
	.trim()
	.to_ascii_lowercase()
	}

	fn compact_tag_text(value: &str) -> String {
	value
	.chars()
	.filter(\|ch\| ch.is_alphanumeric())
	.collect::<String>()
	.to_ascii_lowercase()
	}

	fn is_bracket_group(group: &Group) -> bool {
	group.class_name.starts_with("BRACKET_")
	}

	fn is_category_tag_text(text: &str, bracketed: bool, path_segment: bool) -> bool {
	let cleaned = strip_wrapper(text);
	let trimmed = cleaned.trim();
	if trimmed.is_empty() {
	return false;
	}
	if (bracketed \|\| path_segment) && (DATE_RE.is_match(trimmed) \|\| YEAR_RANGE_RE.is_match(trimmed))
	{
	return true;
	}
	if (bracketed \|\| path_segment)
	&& matches!(
	trimmed,
	"国漫" \| "國漫" \| "日漫" \| "剧场版" \| "劇場版" \| "新番"
	)
	{
	return true;
	}
	if (bracketed \|\| path_segment)
	&& (trimmed.ends_with("月新番") \|\| trimmed.ends_with("月新番合集"))
	{
	return true;
	}
	let normalized = normalized_tag_text(trimmed);
	(bracketed \|\| path_segment)
	&& matches!(
	normalized.as_str(),
	"anime" \| "gekijouban" \| "movie" \| "movies" \| "the movie" \| "tv" \| "tv series"
	)
	}

	fn has_music_collection_noise(value: &str) -> bool {
	let normalized = value
	.replace(['_', '.', '-', '・', '/', '\\'], " ")
	.split_whitespace()
	.collect::<Vec<_>>()
	.join(" ");
	let compact = compact_tag_text(value);
	MUSIC_COLLECTION_RE.is_match(&normalized) \|\| compact.contains("musicclip")
	}

	fn is_title_role(role: &str) -> bool {
	role == "TITLE" \|\| role.starts_with("TITLE_") \|\| role.starts_with("PATH_TITLE_")
	}

	fn is_path_title_role(role: &str) -> bool {
	role.starts_with("PATH_TITLE_")
	}

	fn title_entity_from_role(role: &str) -> Option<&str> {
	if role == "TITLE" {
	Some("TITLE")
	} else if role.starts_with("TITLE_") \|\| role.starts_with("PATH_TITLE_") {
	Some(role)
	} else {
	None
	}
	}

	fn is_title_entity(entity: &str) -> bool {
	entity == "TITLE"
	\|\| matches!(
	entity,
	"TITLE_CHS"
	\| "TITLE_CHT"
	\| "TITLE_JPN"
	\| "TITLE_LATIN"
	\| "TITLE_MIXED"
	\| "PATH_TITLE_CHS"
	\| "PATH_TITLE_CHT"
	\| "PATH_TITLE_JPN"
	\| "PATH_TITLE_LATIN"
	\| "PATH_TITLE_MIXED"
	)
	}

	fn is_title_label(label: &str) -> bool {
	label_entity(label).is_some_and(is_title_entity)
	}

	fn title_language_suffix(text: &str) -> &'static str {
	let mut has_latin = false;
	let mut has_han = false;
	let mut has_kana = false;
	for ch in text.chars() {
	if ch.is_ascii_alphabetic() {
	has_latin = true;
	} else if ('\u{3040}'..='\u{30ff}').contains(&ch) \|\| ('\u{31f0}'..='\u{31ff}').contains(&ch)
	{
	has_kana = true;
	} else if ('\u{4e00}'..='\u{9fff}').contains(&ch) {
	has_han = true;
	}
	}
	if has_kana {
	return "JPN";
	}
	if has_latin && has_han {
	return "MIXED";
	}
	if has_han {
	return cjk_title_language_suffix(text);
	}
	if has_latin {
	return "LATIN";
	}
	"MIXED"
	}

	fn cjk_title_language_suffix(text: &str) -> &'static str {
	let japanese_markers = [
	'々', 'ヶ', '君', '戦', '気', '辺', '沢', '桜', '竜', '広', '処', '歩', '黒', '円',
	];
	if text.chars().any(\|ch\| japanese_markers.contains(&ch)) {
	return "JPN";
	}
	let simplified_markers = [
	'国', '剧', '场', '农', '闲', '汉', '龙', '门', '击', '战', '体', '后', '爱', '边', '声',
	'岛', '学', '万',
	];
	if text.chars().any(\|ch\| simplified_markers.contains(&ch)) {
	return "CHS";
	}
	let traditional_markers = [
	'國', '劇', '場', '農', '閒', '漢', '龍', '門', '擊', '戰', '體', '後', '愛', '邊', '聲',
	'島', '學', '萬', '縛', '異', '臺', '灣', '搖', '滾',
	];
	if text.chars().any(\|ch\| traditional_markers.contains(&ch)) {
	return "CHT";
	}
	"CHS"
	}

	fn title_role_for_text(text: &str, path_title: bool) -> String {
	let prefix = if path_title { "PATH_TITLE" } else { "TITLE" };
	format!("{prefix}_{}", title_language_suffix(text))
	}

	fn path_segment_is_episodeish(value: &str) -> bool {
	let (_, _, _, groups) = template_key_for_filename(value);
	let structural: Vec<&String> = groups
	.iter()
	.map(\|group\| &group.class_name)
	.filter(\|item\| item.as_str() != "SEP")
	.collect();
	!structural.is_empty()
	&& structural.iter().all(\|item\| {
	item.starts_with("EPISODE")
	\|\| item.as_str() == "SPECIAL"
	\|\| item.as_str() == "VOLUME"
	\|\| item.as_str() == "BRACKET_VOLUME"
	})
	}

	fn path_segment_starts_with_episode(value: &str) -> bool {
	if EPISODE_CJK_PREFIX_RE.is_match(value.trim()) {
	return true;
	}
	let (key, _, _, groups) = template_key_for_filename(value);
	let roles = suggested_roles(&key);
	groups
	.iter()
	.zip(roles.iter())
	.find(\|(group, _)\| group.class_name != "SEP")
	.is_some_and(\|(_, role)\| role.starts_with("EPISODE"))
	}

	fn leaf_has_full_title_after_episode(value: &str) -> bool {
	let (key, _, _, groups) = template_key_for_filename(value);
	let roles = suggested_roles(&key);
	let first_structural = roles.iter().position(\|role\| role.starts_with("EPISODE"));
	let Some(first_episode) = first_structural else {
	return false;
	};
	groups
	.iter()
	.zip(roles.iter())
	.skip(first_episode + 1)
	.filter(\|(group, _)\| group.class_name != "SEP")
	.any(\|(_, role)\| role == "TITLE")
	}

	fn path_segment_is_media_noise(value: &str) -> bool {
	let normalized = value.to_ascii_lowercase();
	if normalized.contains("sourceunknown") \|\| normalized.contains("sourceunknow") {
	return true;
	}
	if (normalized.contains("dvdrip")
	\|\| normalized.contains("bdrip")
	\|\| normalized.contains("webrip")
	\|\| normalized.contains("web-dl")
	\|\| normalized.contains("bluray"))
	&& tokenize(value)
	.iter()
	.map(\|token\| classify_atom(token))
	.any(\|class_name\| class_name == "RESOLUTION")
	{
	return true;
	}
	let (_, _, _, groups) = template_key_for_filename(value);
	let structural: Vec<&String> = groups
	.iter()
	.map(\|group\| &group.class_name)
	.filter(\|item\| item.as_str() != "SEP")
	.collect();
	!structural.is_empty()
	&& structural.iter().all(\|item\| {
	matches!(
	item.as_str(),
	"MEDIA"
	\| "RESOLUTION"
	\| "LANG"
	\| "HASH"
	\| "DATE"
	\| "BRACKET_MEDIA"
	\| "BRACKET_RESOLUTION"
	\| "BRACKET_LANG"
	\| "BRACKET_HASH"
	\| "BRACKET_DATE"
	\| "MEDIA_BLOCK"
	\| "BRACKET_MEDIA_BLOCK"
	)
	})
	}

	fn has_abstract_path_noise(value: &str) -> bool {
	let parts: Vec<&str> = value
	.split(\|ch\| ch == '/' \|\| ch == '\\')
	.map(str::trim)
	.filter(\|part\| !part.is_empty())
	.collect();
	if parts.len() < 3 {
	return false;
	}
	if normalized_path_segment(parts[0]) == normalized_path_segment(parts[parts.len() - 1]) {
	return true;
	}
	path_segment_is_episodeish(parts[0]) && path_segment_is_episodeish(parts[parts.len() - 1])
	}

	fn role_label(role: &str) -> String {
	let entity = match role {
	"GROUP" => Some("GROUP"),
	role if is_title_role(role) => Some("TITLE"),
	"EPISODE" \| "EPISODE_VERSION" \| "EPISODE_RANGE" => Some("EPISODE"),
	"SEASON" => Some("SEASON"),
	"PATH_SEASON" => Some("PATH_SEASON"),
	"SPECIAL" \| "VOLUME" => Some("SPECIAL"),
	"RESOLUTION" => Some("RESOLUTION"),
	"SOURCE" => Some("SOURCE"),
	"TAG" => Some("TAG"),
	_ => None,
	};
	entity.map_or_else(\|\| "O".to_string(), \|entity\| format!("B-{entity}"))
	}

	fn is_separator(piece: &str) -> bool {
	piece.is_empty()
	\|\| piece
	.chars()
	.all(\|ch\| ch.is_whitespace() \|\| !ch.is_alphanumeric())
	}

	fn char_kind(ch: char) -> &'static str {
	if ch.is_whitespace() \|\| !ch.is_alphanumeric() {
	"sep"
	} else if ch.is_ascii_digit() {
	"digit"
	} else if ch.is_ascii_alphabetic() {
	"alpha"
	} else {
	"text"
	}
	}

	fn split_refined_token(token: &str) -> Vec<String> {
	let whole_class = classify_atom(token);
	let is_wrapped = {
	let chars: Vec<char> = token.chars().collect();
	chars.len() >= 2
	&& ((chars[0] == '[' && chars[chars.len() - 1] == ']')
	\|\| (chars[0] == '(' && chars[chars.len() - 1] == ')')
	\|\| (chars[0] == '【' && chars[chars.len() - 1] == '】'))
	};
	if !is_wrapped
	&& matches!(
	whole_class.as_str(),
	"RESOLUTION" \| "MEDIA" \| "LANG" \| "HASH" \| "SXE" \| "EPISODE_VERSION"
	)
	&& token.chars().all(char::is_alphanumeric)
	{
	return vec![token.to_string()];
	}
	if !is_wrapped && whole_class == "EPISODE" && SIMPLE_EPISODE_RE.is_match(token) {
	return vec![token.to_string()];
	}
	let mut pieces = Vec::new();
	let mut current = String::new();
	let mut current_kind: Option<&str> = None;
	for ch in token.chars() {
	let kind = char_kind(ch);
	if kind == "sep" {
	if !current.is_empty() {
	pieces.push(std::mem::take(&mut current));
	current_kind = None;
	}
	pieces.push(ch.to_string());
	continue;
	}
	if !current.is_empty() && current_kind != Some(kind) {
	pieces.push(std::mem::take(&mut current));
	}
	current.push(ch);
	current_kind = Some(kind);
	}
	if !current.is_empty() {
	pieces.push(current);
	}
	let mut merged = Vec::new();
	let mut index = 0;
	while index < pieces.len() {
	if index + 2 < pieces.len()
	&& !is_separator(&pieces[index])
	&& is_separator(&pieces[index + 1])
	&& !is_separator(&pieces[index + 2])
	{
	let combined = format!(
	"{}{}{}",
	pieces[index],
	pieces[index + 1],
	pieces[index + 2]
	);
	let combined_class = classify_atom(&combined);
	if !pieces[index + 1].chars().any(char::is_whitespace)
	&& matches!(pieces[index + 1].as_str(), "." \| "x" \| "X" \| "×")
	&& matches!(
	combined_class.as_str(),
	"RESOLUTION" \| "MEDIA" \| "LANG" \| "HASH" \| "SXE" \| "EPISODE_VERSION"
	)
	{
	merged.push(combined);
	index += 3;
	continue;
	}
	}
	if !is_separator(&pieces[index]) {
	let mut end = index;
	let mut combined = String::new();
	while end < pieces.len() && !is_separator(&pieces[end]) {
	combined.push_str(&pieces[end]);
	end += 1;
	}
	if end > index + 1 && is_mergeable_refined_class(&classify_atom(&combined)) {
	merged.push(combined);
	index = end;
	continue;
	}
	}
	if index + 1 < pieces.len()
	&& !is_separator(&pieces[index])
	&& !is_separator(&pieces[index + 1])
	{
	let combined = format!("{}{}", pieces[index], pieces[index + 1]);
	if is_mergeable_refined_class(&classify_atom(&combined)) {
	merged.push(combined);
	index += 2;
	continue;
	}
	}
	merged.push(pieces[index].clone());
	index += 1;
	}
	merged
	}

	fn is_mergeable_refined_class(value: &str) -> bool {
	matches!(
	value,
	"RESOLUTION" \| "MEDIA" \| "LANG" \| "HASH" \| "SXE" \| "EPISODE_VERSION" \| "SEASON"
	)
	}

	fn label_for_refined_piece(piece: &str, role: &str, token_class: &str) -> String {
	if is_separator(piece) {
	return "O".to_string();
	}
	let atom_class = classify_atom(piece);
	let upper = piece.to_ascii_uppercase();
	if matches!(role, "EPISODE" \| "EPISODE_VERSION" \| "EPISODE_RANGE") {
	if atom_class == "SEASON" {
	return "B-SEASON".to_string();
	}
	if matches!(atom_class.as_str(), "EPISODE" \| "EPISODE_VERSION" \| "SXE")
	\|\| piece.chars().all(\|ch\| ch.is_ascii_digit())
	{
	return "B-EPISODE".to_string();
	}
	if matches!(atom_class.as_str(), "SPECIAL" \| "VOLUME")
	\|\| matches!(
	upper.as_str(),
	"OVA" \| "OAD" \| "SP" \| "PV" \| "CM" \| "OP" \| "ED" \| "NCOP" \| "NCED"
	)
	{
	return "B-SPECIAL".to_string();
	}
	return "O".to_string();
	}
	if role == "SOURCE" \|\| matches!(token_class, "BRACKET_MEDIA_BLOCK" \| "MEDIA_BLOCK") {
	if atom_class == "EPISODE_VERSION" {
	return "B-EPISODE".to_string();
	}
	if atom_class == "RESOLUTION" {
	return "B-RESOLUTION".to_string();
	}
	if atom_class == "HASH" {
	return "O".to_string();
	}
	if matches!(atom_class.as_str(), "MEDIA" \| "LANG") {
	return "B-SOURCE".to_string();
	}
	if matches!(atom_class.as_str(), "SPECIAL" \| "VOLUME") {
	return "B-SPECIAL".to_string();
	}
	return if matches!(
	upper.as_str(),
	"END" \| "FIN" \| "COMPLETE" \| "TV" \| "全集" \| "全"
	) {
	"O".to_string()
	} else {
	"B-SOURCE".to_string()
	};
	}
	if role == "RESOLUTION" {
	return if atom_class == "RESOLUTION" \|\| piece.chars().all(\|ch\| ch.is_ascii_digit()) {
	"B-RESOLUTION".to_string()
	} else {
	"O".to_string()
	};
	}
	role_label(role)
	}

	fn split_sxe_token(token: &str) -> Option<(Vec<String>, Vec<String>)> {
	let caps = SXE_VALUE_RE.captures(token)?;
	let mut pieces = vec![
	"S".to_string(),
	caps[1].to_string(),
	"E".to_string(),
	caps[2].to_string(),
	];
	let mut labels = vec![
	"O".to_string(),
	"B-SEASON".to_string(),
	"O".to_string(),
	"B-EPISODE".to_string(),
	];
	if let Some(version) = caps.get(3) {
	pieces.push("v".to_string());
	pieces.push(version.as_str().to_string());
	labels.push("O".to_string());
	labels.push("O".to_string());
	}
	Some((pieces, labels))
	}

	fn repair_compact_sxe_tokens(
	tokens: Vec<String>,
	labels: Vec<String>,
	) -> (Vec<String>, Vec<String>) {
	let mut output_tokens = Vec::new();
	let mut output_labels = Vec::new();
	for (token, label) in tokens.into_iter().zip(labels.into_iter()) {
	if let Some((pieces, piece_labels)) = split_sxe_token(&token) {
	output_tokens.extend(pieces);
	output_labels.extend(piece_labels);
	} else {
	output_tokens.push(token);
	output_labels.push(label);
	}
	}
	(output_tokens, output_labels)
	}

	fn split_episode_token(token: &str) -> Option<(Vec<String>, Vec<String>)> {
	if DECIMAL_EPISODE_RE.is_match(token) {
	let pieces = split_generated_token(token);
	let labels = pieces.iter().map(\|_\| "B-EPISODE".to_string()).collect();
	return Some((pieces, labels));
	}
	let caps = EPISODE_VALUE_RE.captures(token)?;
	let mut pieces = vec![caps[1].to_string()];
	let mut labels = vec!["O".to_string()];
	for piece in split_generated_token(&caps[2]) {
	pieces.push(piece);
	labels.push("B-EPISODE".to_string());
	}
	if let Some(version) = caps.get(3) {
	pieces.push("v".to_string());
	pieces.push(version.as_str().to_string());
	labels.push("O".to_string());
	labels.push("O".to_string());
	}
	Some((pieces, labels))
	}

	fn split_season_token(token: &str) -> Option<(Vec<String>, Vec<String>)> {
	let caps = SEASON_VALUE_RE.captures(token)?;
	Some((
	vec!["S".to_string(), caps[1].to_string()],
	vec!["O".to_string(), "B-SEASON".to_string()],
	))
	}

	fn group_text(tokens: &[String], group: &Group) -> String {
	strip_wrapper(
	&group
	.indices
	.iter()
	.map(\|&index\| tokens[index].as_str())
	.collect::<String>(),
	)
	}

	fn normalize_whitelist_name(value: &str) -> String {
	value.split_whitespace().collect::<Vec<_>>().join(" ")
	}

	fn phrase_parts_for_whitelist(value: &str) -> Vec<String> {
	let tokens = tokenize(value);
	let classes: Vec<String> = tokens.iter().map(\|token\| classify_token(token)).collect();
	let groups = compact_token_groups(&tokens, &classes);
	groups
	.iter()
	.filter(\|group\| whitelist_phrase_group(group))
	.map(\|group\| group_text(&tokens, group))
	.filter(\|part\| !part.trim().is_empty())
	.collect()
	}

	fn whitelist_phrase_group(group: &Group) -> bool {
	matches!(
	group.class_name.as_str(),
	"TEXT" \| "EPISODE" \| "SPECIAL" \| "SEASON" \| "BRACKET_TEXT"
	)
	}

	fn is_special_title_phrase(text: &str) -> bool {
	let normalized = SPECIAL_SPACE_RE
	.replace_all(text, " ")
	.trim()
	.to_ascii_uppercase();
	matches!(
	normalized.as_str(),
	"CM" \| "EVENT"
	\| "EIZOU"
	\| "EXTRA"
	\| "EXTRAS"
	\| "LOGO"
	\| "MENU"
	\| "OMAKE"
	\| "PREVIEW"
	\| "PV"
	\| "THEATER GREETING EVENT"
	\| "TOKUTEN"
	\| "TRAILER"
	\| "TV SPOT"
	\| "SPOT"
	\| "WORLD PREMIERE"
	\| "予告"
	\| "番宣"
	\| "宣番"
	\| "映像特典"
	\| "特典"
	) \|\| normalized.contains("映像特典")
	\|\| normalized.contains("特典映像")
	\|\| normalized.contains("番宣")
	\|\| normalized.contains("宣番")
	\|\| normalized.contains("TV SPOT")
	\|\| normalized.contains("BD SPOT")
	\|\| text.contains("予告")
	\|\| SPECIAL_TITLE_PHRASE_RE.is_match(text)
	}

	fn looks_like_release_group(text: &str) -> bool {
	let normalized = text.to_ascii_lowercase();
	normalized.contains("fansub")
	\|\| normalized.ends_with("sub")
	\|\| normalized.contains("sub&")
	\|\| normalized.contains("&sub")
	\|\| normalized.contains("字幕组")
	\|\| normalized.contains("字幕組")
	}

	fn title_context_before(
	tokens: &[String],
	groups: &[Group],
	roles: &[String],
	index: usize,
	) -> String {
	(0..index)
	.filter(\|&cursor\| roles[cursor] == "TITLE")
	.map(\|cursor\| group_text(tokens, &groups[cursor]))
	.collect::<Vec<_>>()
	.join(" ")
	}

	fn short_number_title_exception(context: &str, number: &str) -> bool {
	let normalized = normalized_tag_text(context);
	let compact = compact_tag_text(context);
	matches!(
	(normalized.as_str(), number),
	("kamisama hajimemashita", "2") \| ("ghiblies episode", "2") \| ("r", "15")
	) \|\| (normalized.contains("91 days") && number == "91")
	\|\| (context.contains("銀河鉄道") && number == "999")
	\|\| compact.contains("highschooldd")
	\|\| (context.contains("機動戦士ガンダム") && number == "00")
	}

	fn group_followed_by_quote(tokens: &[String], groups: &[Group], index: usize) -> bool {
	let Some(last_token) = groups.get(index).and_then(\|group\| group.indices.last()) else {
	return false;
	};
	for token in &tokens[*last_token + 1..] {
	if token.chars().all(char::is_whitespace) {
	continue;
	}
	return matches!(token.as_str(), "「" \| "｢" \| "\"" \| "'");
	}
	false
	}

	const KNOWN_TITLE_PHRASES: &[&[&str]] = &[
	&["SPY", "x", "FAMILY"],
	&["Spy", "x", "Family"],
	&["Slime", "300"],
	&["Zom", "100"],
	&["Kamisama", "Hajimemashita", "2"],
	&["Phantasy", "Star", "Online", "2", "Episode", "Oracle"],
	&["Durarara", "2", "Ketsu"],
	&["Ghiblies", "Episode", "2"],
	&["Eien", "no", "831"],
	&["Lupin The Thrid Jigen Daisuke no Bohyou"],
	&["Lupin The Third Jigen Daisuke no Bohyou"],
	];

	fn apply_known_title_phrases(tokens: &[String], groups: &[Group], roles: &mut [String]) {
	if let Some(whitelists) = RUNTIME_WHITELISTS.get() {
	for (index, group) in groups.iter().enumerate() {
	let previous_structural = roles[..index].iter().any(\|role\| {
	role.starts_with("EPISODE")
	\|\| matches!(
	role.as_str(),
	"SEASON" \| "SPECIAL" \| "SOURCE" \| "RESOLUTION"
	)
	});
	if group.class_name == "BRACKET_TEXT"
	&& !previous_structural
	&& whitelists
	.group_names
	.contains(&normalize_whitelist_name(&group_text(tokens, group)))
	&& !roles.get(index).is_some_and(\|role\| {
	matches!(
	role.as_str(),
	"EPISODE"
	\| "EPISODE_VERSION"
	\| "EPISODE_RANGE"
	\| "SEASON"
	\| "SOURCE"
	\| "RESOLUTION"
	\| "SPECIAL"
	)
	})
	{
	roles[index] = "GROUP".to_string();
	}
	}
	}
	let searchable: Vec<(usize, String)> = groups
	.iter()
	.enumerate()
	.filter(\|(_, group)\| whitelist_phrase_group(group))
	.map(\|(index, group)\| (index, group_text(tokens, group)))
	.collect();
	for phrase in KNOWN_TITLE_PHRASES {
	apply_title_phrase(&searchable, phrase, roles, true);
	}
	if let Some(whitelists) = RUNTIME_WHITELISTS.get() {
	for phrase in &whitelists.title_phrases {
	if phrase.len() >= 2 {
	apply_title_phrase(&searchable, phrase, roles, false);
	}
	}
	}
	}

	fn apply_title_phrase(
	searchable: &[(usize, String)],
	phrase: &[impl AsRef<str>],
	roles: &mut [String],
	allow_structural_override: bool,
	) {
	if phrase.is_empty() \|\| phrase.len() > searchable.len() {
	return;
	}
	for window in searchable.windows(phrase.len()) {
	if window
	.iter()
	.zip(phrase.iter())
	.all(\|((_, text), expected)\| text.eq_ignore_ascii_case(expected.as_ref()))
	{
	for (group_index, _) in window {
	if roles.get(*group_index).is_some_and(\|role\| role == "GROUP") {
	let is_known_group = RUNTIME_WHITELISTS.get().is_some_and(\|whitelists\| {
	whitelists
	.group_names
	.contains(&normalize_whitelist_name(&window[0].1))
	});
	if is_known_group {
	continue;
	}
	}
	if !allow_structural_override
	&& roles.get(*group_index).is_some_and(\|role\| {
	matches!(
	role.as_str(),
	"EPISODE"
	\| "EPISODE_VERSION"
	\| "EPISODE_RANGE"
	\| "SEASON"
	\| "SOURCE"
	\| "RESOLUTION"
	)
	})
	{
	continue;
	}
	{
	roles[*group_index] = "TITLE".to_string();
	}
	}
	}
	}
	}

	fn adjust_contextual_roles(tokens: &[String], groups: &[Group], roles: &[String]) -> Vec<String> {
	let mut output = roles.to_vec();
	let ep_markers = ["EP", "E", "Episode", "ep", "episode"];
	let roman = ["I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX"];
	apply_known_title_phrases(tokens, groups, &mut output);
	if output.first().is_some_and(\|role\| role == "GROUP") {
	let first_text = group_text(tokens, &groups[0]);
	let first_is_known_group = RUNTIME_WHITELISTS.get().is_some_and(\|whitelists\| {
	whitelists
	.group_names
	.contains(&normalize_whitelist_name(&first_text))
	});
	if !first_is_known_group {
	if let Some(groupish_index) = (1..groups.len()).find(\|&index\| {
	output[index] == "TITLE"
	&& looks_like_release_group(&group_text(tokens, &groups[index]))
	}) {
	output[0] = "TITLE".to_string();
	output[groupish_index] = "GROUP".to_string();
	}
	}
	}
	if roles
	.first()
	.is_some_and(\|role\| role.starts_with("EPISODE"))
	&& YEAR_RANGE_RE.is_match(&group_text(tokens, &groups[0]))
	{
	let first_real_structural = (1..roles.len())
	.find(\|&index\| {
	roles[index].starts_with("EPISODE")
	\|\| matches!(roles[index].as_str(), "SEASON" \| "SPECIAL")
	})
	.unwrap_or(roles.len());
	for index in 1..first_real_structural {
	if groups[index].class_name == "TEXT"
	&& !matches!(
	group_text(tokens, &groups[index])
	.to_ascii_uppercase()
	.as_str(),
	"TV" \| "OVA" \| "OAD" \| "SP"
	)
	{
	output[index] = "TITLE".to_string();
	}
	}
	}
	if !output.iter().any(\|role\| role == "TITLE")
	&& roles
	.first()
	.is_some_and(\|role\| role.starts_with("EPISODE"))
	{
	let mut title_run = Vec::new();
	for index in 1..roles.len() {
	if groups[index].class_name == "TEXT" && output[index] == "O" {
	title_run.push(index);
	continue;
	}
	if groups[index].class_name == "SEP" {
	continue;
	}
	if !title_run.is_empty() {
	break;
	}
	}
	if title_run.len() >= 2 {
	let last_title_index = *title_run.last().unwrap();
	let later_structural = roles[last_title_index + 1..].iter().any(\|role\| {
	role.starts_with("EPISODE") \|\| matches!(role.as_str(), "SEASON" \| "SPECIAL")
	});
	if group_text(tokens, &groups[0])
	.chars()
	.all(\|ch\| ch.is_ascii_digit())
	&& later_structural
	{
	output[0] = "TITLE".to_string();
	}
	for index in title_run {
	output[index] = "TITLE".to_string();
	}
	}
	}
	if roles
	.first()
	.is_some_and(\|role\| role.starts_with("EPISODE"))
	&& group_text(tokens, &groups[0])
	.chars()
	.all(\|ch\| ch.is_ascii_digit())
	{
	if let Some(first_title) = output.iter().position(\|role\| role == "TITLE") {
	let later_structural = roles[first_title + 1..].iter().any(\|role\| {
	role.starts_with("EPISODE") \|\| matches!(role.as_str(), "SEASON" \| "SPECIAL")
	});
	if later_structural {
	output[0] = "TITLE".to_string();
	}
	}
	}
	for index in 0..roles.len() {
	let text = group_text(tokens, &groups[index]);
	if output[index] == "O" && groups[index].class_name.contains("SXE") {
	output[index] = "EPISODE".to_string();
	}
	if text.eq_ignore_ascii_case("TV") {
	let next_text = (index + 1..roles.len())
	.find(\|&cursor\| groups[cursor].class_name != "SEP")
	.map(\|cursor\| (cursor, group_text(tokens, &groups[cursor])));
	if let Some((spot_index, spot_text)) = next_text {
	if spot_text.eq_ignore_ascii_case("Spot") {
	output[index] = "SPECIAL".to_string();
	output[spot_index] = "SPECIAL".to_string();
	continue;
	}
	}
	}
	if roles[index].starts_with("EPISODE") && YEAR_RANGE_RE.is_match(&text) {
	output[index] = "O".to_string();
	continue;
	}
	if roles[index].starts_with("EPISODE")
	&& index >= 2
	&& matches!(
	group_text(tokens, &groups[index - 1]).as_str(),
	"×" \| "x" \| "X"
	)
	&& output[index - 2] == "TITLE"
	&& !roles[index + 1..]
	.iter()
	.any(\|role\| role.starts_with("EPISODE"))
	{
	output[index] = "TITLE".to_string();
	if let Some(next_text_index) = (index + 1..roles.len()).find(\|&cursor\| {
	groups[cursor].class_name != "SEP" && groups[cursor].class_name == "TEXT"
	}) {
	output[next_text_index] = "TITLE".to_string();
	}
	continue;
	}
	if roles[index].starts_with("EPISODE")
	&& !output[..index]
	.iter()
	.any(\|role\| role.starts_with("EPISODE"))
	&& group_text(
	tokens,
	&groups[(0..index)
	.rev()
	.find(\|&cursor\| groups[cursor].class_name != "SEP")
	.unwrap_or(index)],
	)
	.eq_ignore_ascii_case("Movie")
	{
	output[index] = "TITLE".to_string();
	continue;
	}
	if output[index] == "TITLE" && matches!(text.as_str(), "中日" \| "日中" \| "英日" \| "日英")
	{
	let next_source_lang = (index + 1..roles.len())
	.find(\|&cursor\| groups[cursor].class_name != "SEP")
	.is_some_and(\|cursor\| {
	output[cursor] == "SOURCE" && group_text(tokens, &groups[cursor]).contains('语')
	});
	if next_source_lang {
	output[index] = "SOURCE".to_string();
	continue;
	}
	}
	if roles[index].starts_with("EPISODE")
	&& index >= 1
	&& output[..index].iter().any(\|role\| role == "TITLE")
	&& text.chars().all(\|ch\| ch.is_ascii_digit())
	&& short_number_title_exception(
	&title_context_before(tokens, groups, &output, index),
	&text,
	)
	{
	output[index] = "TITLE".to_string();
	continue;
	}
	if roles[index].starts_with("EPISODE")
	&& index >= 1
	&& output[index - 1] == "TITLE"
	&& groups[index - 1].class_name != "SEP"
	&& text.chars().all(\|ch\| ch.is_ascii_digit())
	&& text.len() <= 2
	&& roles[index + 1..]
	.iter()
	.any(\|role\| role.starts_with("EPISODE"))
	&& !group_followed_by_quote(tokens, groups, index)
	{
	let context = title_context_before(tokens, groups, &output, index);
	output[index] = if short_number_title_exception(&context, &text) {
	"TITLE"
	} else {
	"SEASON"
	}
	.to_string();
	continue;
	}
	if roles[index].starts_with("EPISODE") && (2..roles.len()).contains(&index) {
	let previous_text = group_text(tokens, &groups[index - 2]);
	let next_special = output[index + 1..roles.len().min(index + 4)]
	.iter()
	.any(\|role\| role == "SPECIAL");
	let next_episode = roles[index + 1..]
	.iter()
	.any(\|role\| role.starts_with("EPISODE"));
	if groups[index - 1].class_name == "SEP"
	&& matches!(
	previous_text.to_ascii_lowercase().as_str(),
	"vol" \| "volume"
	)
	{
	let next_text_before_episode = (index + 1..roles.len())
	.find(\|&cursor\| groups[cursor].class_name != "SEP")
	.is_some_and(\|cursor\| {
	groups[cursor].class_name == "TEXT"
	&& roles[cursor + 1..]
	.iter()
	.any(\|role\| role.starts_with("EPISODE"))
	});
	if next_text_before_episode {
	output[index - 2] = "TITLE".to_string();
	output[index] = "TITLE".to_string();
	continue;
	}
	output[index - 2] = "SPECIAL".to_string();
	output[index] = "SPECIAL".to_string();
	continue;
	}
	if index >= 1
	&& output[index - 1] == "TITLE"
	&& groups[index - 1].class_name != "SEP"
	&& text.chars().all(\|ch\| ch.is_ascii_digit())
	&& text.len() <= 2
	&& roles[index + 1..]
	.iter()
	.any(\|role\| role.starts_with("EPISODE"))
	&& !group_followed_by_quote(tokens, groups, index)
	{
	let context = title_context_before(tokens, groups, &output, index);
	output[index] = if short_number_title_exception(&context, &text) {
	"TITLE"
	} else {
	"SEASON"
	}
	.to_string();
	continue;
	}
	if !output[..index].iter().any(\|role\| role == "TITLE")
	&& NUMERIC_TITLE_PREFIX_RE.is_match(&text)
	&& output[..index].iter().any(\|role\| role == "GROUP")
	&& roles[index + 1..]
	.iter()
	.any(\|role\| role.starts_with("EPISODE"))
	{
	output[index] = "TITLE".to_string();
	continue;
	}
	if !output[..index].iter().any(\|role\| role == "TITLE")
	&& NUMERIC_TITLE_PREFIX_RE.is_match(&text)
	&& index + 2 < roles.len()
	&& groups[index + 1].class_name == "SEP"
	&& groups[index + 2].class_name == "TEXT"
	&& group_text(tokens, &groups[index + 2])
	.chars()
	.any(\|ch\| ch.is_alphabetic())
	&& roles[index + 3..]
	.iter()
	.any(\|role\| role.starts_with("EPISODE"))
	{
	output[index] = "TITLE".to_string();
	output[index + 2] = "TITLE".to_string();
	continue;
	}
	if output[index - 2] == "TITLE"
	&& groups[index - 1].class_name == "SEP"
	&& previous_text.len() <= 48
	&& previous_text.chars().any(\|ch\| ch.is_alphabetic())
	&& text.chars().all(\|ch\| ch.is_ascii_digit())
	&& text.len() <= 2
	&& !(index + 2 < roles.len()
	&& groups[index + 1].class_name == "SEP"
	&& group_text(tokens, &groups[index + 2]).eq_ignore_ascii_case("episode"))
	&& !(index + 1 < roles.len()
	&& groups[index + 1].class_name == "SEP"
	&& group_text(tokens, &groups[index + 1])
	.chars()
	.any(\|ch\| matches!(ch, '「' \| '｢' \| '"' \| '\'')))
	&& !group_followed_by_quote(tokens, groups, index)
	&& (next_episode
	\|\| (next_special
	&& (text.parse::<u16>().is_ok_and(\|value\| value >= 100)
	\|\| (previous_text.len() <= 4
	&& previous_text.is_ascii()
	&& previous_text.chars().all(\|ch\| ch.is_ascii_alphabetic())))))
	{
	output[index] = if next_episode
	&& !short_number_title_exception(
	&title_context_before(tokens, groups, &output, index),
	&text,
	) {
	"SEASON"
	} else {
	"TITLE"
	}
	.to_string();
	continue;
	}
	}
	if roles[index].starts_with("EPISODE")
	&& (text.chars().all(\|ch\| ch.is_ascii_digit())
	\|\| matches!(classify_atom(&text).as_str(), "EPISODE" \| "EPISODE_VERSION"))
	&& output[..index].iter().any(\|role\| role == "SPECIAL")
	&& !output[..index]
	.iter()
	.any(\|role\| role.starts_with("EPISODE"))
	{
	let previous_structural = (0..index)
	.rev()
	.find(\|&cursor\| groups[cursor].class_name != "SEP")
	.and_then(\|cursor\| output.get(cursor))
	.map(String::as_str);
	let next_real = (index + 1..roles.len())
	.find(\|&cursor\| groups[cursor].class_name != "SEP")
	.and_then(\|cursor\| roles.get(cursor))
	.map(String::as_str);
	if matches!(previous_structural, Some("SPECIAL"))
	&& !matches!(next_real, Some("TITLE" \| "SEASON"))
	{
	output[index] = "SPECIAL".to_string();
	continue;
	}
	}
	if roles[index].starts_with("EPISODE")
	&& BARE_RESOLUTION_RE.is_match(&text)
	&& index >= 2
	&& groups[index - 1].class_name == "SEP"
	{
	let previous_text = group_text(tokens, &groups[index - 2]);
	let next_sourceish = (index + 1..roles.len())
	.find(\|&cursor\| groups[cursor].class_name != "SEP")
	.is_some_and(\|cursor\| matches!(roles[cursor].as_str(), "SOURCE" \| "RESOLUTION"));
	if previous_text
	.chars()
	.any(\|ch\| ch.is_ascii_digit() \|\| matches!(ch, '.' \| '-' \| '_' \| '．'))
	\|\| next_sourceish
	{
	output[index] = "RESOLUTION".to_string();
	continue;
	}
	}
	if roles[index].starts_with("EPISODE")
	&& index >= 2
	&& output[..index].iter().any(\|role\| role == "TITLE")
	&& group_text(tokens, &groups[index])
	.chars()
	.all(\|ch\| ch.is_ascii_digit())
	{
	let next_episode_word = index + 2 < roles.len()
	&& groups[index + 1].class_name == "SEP"
	&& group_text(tokens, &groups[index + 2]).eq_ignore_ascii_case("episode");
	if next_episode_word {
	let mut run = Vec::new();
	let mut cursor = index + 2;
	while cursor < roles.len() {
	if groups[cursor].class_name == "SEP" {
	cursor += 1;
	continue;
	}
	if groups[cursor].class_name == "TEXT" && !roles[cursor].starts_with("EPISODE")
	{
	run.push(cursor);
	cursor += 1;
	continue;
	}
	break;
	}
	let later_episode = roles[cursor..]
	.iter()
	.any(\|role\| role.starts_with("EPISODE"));
	if run.len() >= 2 && later_episode {
	output[index] = "TITLE".to_string();
	for item in run {
	output[item] = "TITLE".to_string();
	}
	continue;
	}
	}
	}
	if roles[index] == "TITLE" && is_special_title_phrase(&text) {
	output[index] = "SPECIAL".to_string();
	continue;
	}
	if roles[index] == "TITLE"
	&& matches!(text.to_ascii_uppercase().as_str(), "TV" \| "TV版")
	&& output
	.iter()
	.enumerate()
	.any(\|(other, role)\| other != index && role == "TITLE")
	{
	output[index] = "O".to_string();
	continue;
	}
	if roles[index] == "TITLE"
	&& matches!(text.as_str(), "TVアニメ" \| "テレビアニメ")
	&& output
	.iter()
	.enumerate()
	.any(\|(other, role)\| other != index && role == "TITLE")
	{
	output[index] = "O".to_string();
	continue;
	}
	if output[index] == "TITLE" && text.eq_ignore_ascii_case("Creditless") {
	let later_special = output[index + 1..].iter().any(\|role\| role == "SPECIAL");
	if later_special {
	output[index] = "SPECIAL".to_string();
	continue;
	}
	}
	if roles[index] == "TITLE" && matches!(text.as_str(), "第" \| "話" \| "话" \| "回" \| "集")
	{
	output[index] = "O".to_string();
	continue;
	}
	if output[index] == "O"
	&& groups[index].class_name == "TEXT"
	&& roles[index + 1..]
	.iter()
	.any(\|role\| role.starts_with("EPISODE"))
	&& text.chars().any(\|ch\| ch.is_alphabetic())
	&& !ep_markers.contains(&text.as_str())
	{
	if !output[..index].iter().any(\|role\| role == "TITLE") {
	let previous_structural = (0..index)
	.rev()
	.find(\|&cursor\| groups[cursor].class_name != "SEP")
	.and_then(\|cursor\| output.get(cursor))
	.map(String::as_str);
	if matches!(previous_structural, Some("SPECIAL")) {
	output[index] = "TITLE".to_string();
	continue;
	}
	}
	if let Some(last_title) = output[..index].iter().rposition(\|role\| role == "TITLE") {
	let episode_since_title = output[last_title + 1..index]
	.iter()
	.any(\|role\| role.starts_with("EPISODE"));
	if !episode_since_title {
	output[index] = "TITLE".to_string();
	continue;
	}
	}
	}
	if roles[index] == "TITLE"
	&& matches!(text.to_ascii_lowercase().as_str(), "season" \| "saison")
	&& index + 2 < roles.len()
	&& groups[index + 1].class_name == "SEP"
	&& roles[index + 2].starts_with("EPISODE")
	{
	if !output[..index].iter().any(\|role\| role == "TITLE") {
	output[index] = "O".to_string();
	output[index + 2] = "SEASON".to_string();
	}
	continue;
	}
	if roles[index] == "TITLE"
	&& text == text.to_ascii_uppercase()
	&& roman.contains(&text.as_str())
	{
	let previous_title = output[..index].iter().any(\|role\| role == "TITLE");
	let next_structural = roles[index + 1..]
	.iter()
	.any(\|role\| role.starts_with("EPISODE") \|\| role == "SPECIAL");
	if previous_title && next_structural {
	output[index] = "SEASON".to_string();
	continue;
	}
	}
	if roles[index].starts_with("EPISODE") && index + 4 < roles.len() {
	if groups[index + 1].class_name == "SEP"
	&& ep_markers.contains(&group_text(tokens, &groups[index + 2]).as_str())
	&& groups[index + 3].class_name == "SEP"
	&& roles[index + 4].starts_with("EPISODE")
	{
	output[index] = "TITLE".to_string();
	output[index + 2] = "O".to_string();
	}
	}
	if roles[index].starts_with("EPISODE")
	&& !output[index + 1..].iter().any(\|role\| role == "TITLE")
	{
	let mut run = Vec::new();
	let mut cursor = index + 1;
	while cursor < roles.len() {
	if groups[cursor].class_name == "SEP" {
	cursor += 1;
	continue;
	}
	if groups[cursor].class_name == "TEXT"
	&& !matches!(
	roles[cursor].as_str(),
	"SOURCE" \| "RESOLUTION" \| "SEASON" \| "SPECIAL"
	)
	{
	run.push(cursor);
	cursor += 1;
	continue;
	}
	if !run.is_empty() {
	break;
	}
	cursor += 1;
	}
	if run.len() >= 2 {
	for item in run {
	output[item] = "TITLE".to_string();
	}
	}
	}
	if roles[index].starts_with("EPISODE") {
	let previous_text = if index >= 1 {
	group_text(tokens, &groups[index - 1])
	} else {
	String::new()
	};
	let next_text = if index + 1 < roles.len() {
	group_text(tokens, &groups[index + 1])
	} else {
	String::new()
	};
	let previous_real_text = (0..index)
	.rev()
	.find(\|&cursor\| groups[cursor].class_name != "SEP")
	.map(\|cursor\| group_text(tokens, &groups[cursor]))
	.unwrap_or_default();
	if previous_text.ends_with('第') && next_text.starts_with('期') {
	output[index] = "SEASON".to_string();
	continue;
	}
	if matches!(
	previous_real_text.to_ascii_lowercase().as_str(),
	"lesson" \| "part" \| "no"
	) {
	output[index] = "O".to_string();
	continue;
	}
	if previous_real_text.contains("予告")
	\|\| previous_real_text.eq_ignore_ascii_case("Spot")
	{
	output[index] = "SPECIAL".to_string();
	continue;
	}
	if output[..index].iter().any(\|role\| role == "TITLE")
	&& (output[..index].iter().enumerate().any(\|(cursor, role)\| {
	role == "TITLE" && is_special_title_phrase(&group_text(tokens, &groups[cursor]))
	}))
	&& !output[..index]
	.iter()
	.any(\|role\| role.starts_with("EPISODE"))
	&& text.chars().all(\|ch\| ch.is_ascii_digit())
	&& text.len() <= 3
	{
	output[index] = "SPECIAL".to_string();
	continue;
	}
	if previous_text.contains('点')
	\|\| previous_text.contains('點')
	\|\| previous_text.contains("晚上")
	\|\| previous_text.contains("上午")
	\|\| previous_text.contains("下午")
	\|\| previous_text.contains('年')
	\|\| previous_text.contains('月')
	\|\| previous_text.contains('秒')
	\|\| next_text.contains('点')
	\|\| next_text.contains('點')
	\|\| next_text.contains('半')
	\|\| next_text.contains('月')
	\|\| next_text.contains('日')
	\|\| next_text.contains('秒')
	{
	output[index] = "O".to_string();
	}
	}
	}
	output
	}

	fn title_candidates(groups: &[Group], roles: &[String]) -> Vec<(usize, usize)> {
	let mut candidates = Vec::new();
	let mut index = 0;
	while index < roles.len() {
	if !is_title_role(&roles[index]) {
	index += 1;
	continue;
	}
	let start = index;
	index += 1;
	loop {
	if index < roles.len()
	&& is_title_role(&roles[index])
	&& !(groups[index - 1].class_name == "BRACKET_TEXT"
	&& groups[index].class_name == "BRACKET_TEXT")
	{
	index += 1;
	continue;
	}
	if index + 1 < roles.len()
	&& roles[index] == "O"
	&& groups[index].class_name == "SEP"
	&& is_title_role(&roles[index + 1])
	{
	index += 2;
	continue;
	}
	break;
	}
	candidates.push((start, index));
	}
	candidates
	}

	fn enforce_single_title_candidate(
	tokens: &[String],
	groups: &[Group],
	roles: &[String],
	) -> (Vec<String>, Vec<String>) {
	let candidates = title_candidates(groups, roles);
	if candidates.len() <= 1 {
	return (roles.to_vec(), Vec::new());
	}
	let first_anchor = roles
	.iter()
	.position(\|role\| {
	role.starts_with("EPISODE")
	\|\| matches!(
	role.as_str(),
	"SEASON" \| "PATH_SEASON" \| "SPECIAL" \| "SOURCE" \| "RESOLUTION"
	)
	})
	.unwrap_or(roles.len());
	let before_anchor: Vec<(usize, usize)> = candidates
	.iter()
	.copied()
	.filter(\|(_, end)\| *end <= first_anchor)
	.collect();
	let before_anchor_only_path_titles = !before_anchor.is_empty()
	&& before_anchor.iter().all(\|(start, end)\| {
	(start..end)
	.all(\|index\| !is_title_role(&roles[index]) \|\| is_path_title_role(&roles[index]))
	});
	let selected_pool = if before_anchor.is_empty() \|\| before_anchor_only_path_titles {
	&candidates
	} else {
	&before_anchor
	};
	let mut selected_by_kind: HashMap<String, ((usize, usize), (isize, usize, usize))> =
	HashMap::new();
	for (start, end) in selected_pool.iter().copied() {
	let score = (
	title_candidate_score(tokens, groups, start, end),
	end,
	end - start,
	);
	let key = title_candidate_key(tokens, groups, roles, start, end);
	match selected_by_kind.get(&key) {
	Some((_, best_score)) if *best_score >= score => {}
	_ => {
	selected_by_kind.insert(key, ((start, end), score));
	}
	}
	}
	let selected: HashSet<(usize, usize)> =
	selected_by_kind.values().map(\|(range, _)\| *range).collect();
	let mut output = roles.to_vec();
	let mut dropped = Vec::new();
	for (start, end) in candidates {
	if selected.contains(&(start, end)) {
	continue;
	}
	for index in start..end {
	if is_title_role(&output[index]) {
	output[index] = "O".to_string();
	dropped.push(index.to_string());
	}
	}
	}
	(output, dropped)
	}

	fn title_candidate_key(
	tokens: &[String],
	groups: &[Group],
	roles: &[String],
	start: usize,
	end: usize,
	) -> String {
	let mut entities: Vec<String> = (start..end)
	.filter_map(\|index\| title_entity_from_role(&roles[index]).map(str::to_string))
	.filter(\|entity\| entity != "TITLE")
	.collect();
	entities.sort();
	entities.dedup();
	if entities.is_empty() {
	let text = candidate_text(tokens, groups, start, end);
	return title_role_for_text(&text, false);
	}
	entities.join("+")
	}

	fn title_candidate_score(tokens: &[String], groups: &[Group], start: usize, end: usize) -> isize {
	let text = (start..end)
	.filter(\|&index\| roles_candidate_text_group(&groups[index]))
	.map(\|index\| group_text(tokens, &groups[index]))
	.collect::<Vec<_>>()
	.join("");
	let cleaned = text.trim();
	if cleaned.is_empty() {
	return -1000;
	}
	let mut score = cleaned.chars().filter(\|ch\| ch.is_alphanumeric()).count() as isize;
	if VERSIONISH_TITLE_RE.is_match(cleaned) {
	score -= 500;
	}
	if matches!(
	cleaned.to_ascii_lowercase().as_str(),
	"国漫" \| "國漫" \| "anime" \| "movie" \| "movies"
	) {
	score -= 500;
	}
	if title_noise_score_penalty(cleaned) {
	score -= 700;
	}
	score
	}

	fn title_noise_score_penalty(text: &str) -> bool {
	let normalized = text
	.replace(['_', '-', '.'], " ")
	.split_whitespace()
	.collect::<Vec<_>>()
	.join(" ")
	.to_ascii_lowercase();
	normalized.contains("bdrip")
	\|\| normalized.contains("webrip")
	\|\| normalized.contains("web dl")
	\|\| normalized.contains("bluray")
	\|\| normalized.contains("full hd")
	\|\| normalized.contains("hdtv")
	}

	fn roles_candidate_text_group(group: &Group) -> bool {
	matches!(group.class_name.as_str(), "TEXT" \| "BRACKET_TEXT")
	}

	fn normalize_generated_tokens(tokens: &[String], labels: &[String]) -> (Vec<String>, Vec<String>) {
	let mut output_tokens = Vec::new();
	let mut output_labels = Vec::new();
	for (token, label) in tokens.iter().zip(labels.iter()) {
	for piece in split_generated_token(token) {
	output_labels.push(if label == "O" \|\| is_standalone_separator(&piece) {
	"O".to_string()
	} else {
	label.clone()
	});
	output_tokens.push(piece);
	}
	}
	(output_tokens, output_labels)
	}

	fn normalize_title_token(token: &str) -> (Vec<String>, Vec<String>) {
	let pieces = split_generated_token(token);
	let mut output_pieces = Vec::new();
	let mut labels = Vec::new();
	for piece in pieces {
	if is_standalone_separator(&piece) {
	output_pieces.push(piece);
	labels.push("O".to_string());
	continue;
	}
	if let Some((pieces, piece_labels)) = split_sxe_token(&piece) {
	output_pieces.extend(pieces);
	labels.extend(piece_labels);
	continue;
	}
	if EPISODE_VERSION_RE.is_match(&compact_for_classify(&piece)) {
	output_pieces.push(piece);
	labels.push("B-EPISODE".to_string());
	continue;
	}
	if CJK_SEASON_TOKEN_RE.is_match(&piece) \|\| SEASON_RE.is_match(&piece) {
	output_pieces.push(piece);
	labels.push("B-SEASON".to_string());
	continue;
	}
	if EPISODE_CJK_RE.is_match(&piece) {
	output_pieces.push(piece);
	labels.push("B-EPISODE".to_string());
	continue;
	}
	if let Some(caps) = ASCII_SEASON_SUFFIX_RE.captures(&piece) {
	let before = caps.get(1).map(\|m\| m.as_str()).unwrap_or_default();
	let season = caps.get(2).map(\|m\| m.as_str()).unwrap_or_default();
	if !before.is_empty() {
	output_pieces.push(before.to_string());
	labels.push("B-TITLE".to_string());
	}
	output_pieces.push(season.to_string());
	labels.push("B-SEASON".to_string());
	continue;
	}
	if let Some(caps) = CJK_SEASON_EMBEDDED_RE.captures(&piece) {
	let before = caps.get(1).map(\|m\| m.as_str()).unwrap_or_default();
	let season = caps.get(2).map(\|m\| m.as_str()).unwrap_or_default();
	let after = caps.get(3).map(\|m\| m.as_str()).unwrap_or_default();
	if !before.is_empty() {
	output_pieces.push(before.to_string());
	labels.push("B-TITLE".to_string());
	}
	output_pieces.push(season.to_string());
	labels.push("B-SEASON".to_string());
	if !after.is_empty() {
	output_pieces.push(after.to_string());
	labels.push("O".to_string());
	}
	continue;
	}
	if let Some(caps) = CJK_EPISODE_EMBEDDED_RE.captures(&piece) {
	let before = caps.get(1).map(\|m\| m.as_str()).unwrap_or_default();
	let episode = caps.get(2).map(\|m\| m.as_str()).unwrap_or_default();
	let after = caps.get(3).map(\|m\| m.as_str()).unwrap_or_default();
	if !before.is_empty() {
	output_pieces.push(before.to_string());
	labels.push("B-TITLE".to_string());
	}
	output_pieces.push(episode.to_string());
	labels.push("B-EPISODE".to_string());
	if !after.is_empty() {
	output_pieces.push(after.to_string());
	labels.push("O".to_string());
	}
	continue;
	}
	if let Some(caps) = CJK_TITLE_TRAILING_EPISODE_RE.captures(&piece) {
	let before = caps.get(1).map(\|m\| m.as_str()).unwrap_or_default();
	let episode = caps.get(2).map(\|m\| m.as_str()).unwrap_or_default();
	if before.contains("銀河鉄道") && episode == "999" {
	output_pieces.push(before.to_string());
	labels.push("B-TITLE".to_string());
	output_pieces.push(episode.to_string());
	labels.push("B-TITLE".to_string());
	continue;
	}
	if !before.is_empty() {
	output_pieces.push(before.to_string());
	labels.push("B-TITLE".to_string());
	}
	output_pieces.push(episode.to_string());
	labels.push("B-EPISODE".to_string());
	continue;
	}
	output_pieces.push(piece);
	labels.push("B-TITLE".to_string());
	}
	(output_pieces, labels)
	}

	fn split_generated_token(token: &str) -> Vec<String> {
	let mut pieces = Vec::new();
	let mut current = String::new();
	for ch in token.chars() {
	if ch.is_whitespace() \|\| !ch.is_alphanumeric() {
	if !current.is_empty() {
	pieces.push(std::mem::take(&mut current));
	}
	pieces.push(ch.to_string());
	} else {
	current.push(ch);
	}
	}
	if !current.is_empty() {
	pieces.push(current);
	}
	pieces
	}

	fn is_standalone_separator(token: &str) -> bool {
	token.chars().count() == 1
	&& token
	.chars()
	.next()
	.is_some_and(\|ch\| ch.is_whitespace() \|\| !ch.is_alphanumeric())
	}

	fn is_unicode_roman_season(token: &str) -> bool {
	matches!(
	token,
	"Ⅰ" \| "Ⅱ" \| "Ⅲ" \| "Ⅳ" \| "Ⅴ" \| "Ⅵ" \| "Ⅶ" \| "Ⅷ" \| "Ⅸ" \| "Ⅹ" \| "Ⅺ" \| "Ⅻ"
	)
	}

	fn split_cjk_title_lang_prefix(token: &str) -> Option<(Vec<String>, Vec<String>)> {
	let caps = CJK_TITLE_LANG_PREFIX_RE.captures(token)?;
	let title = caps.get(1)?.as_str();
	let lang = caps.get(2)?.as_str();
	let marker = caps.get(3).map(\|m\| m.as_str()).unwrap_or_default();
	if title.chars().count() < 2 {
	return None;
	}
	let mut pieces = vec![title.to_string(), lang.to_string()];
	let mut labels = vec!["B-TITLE".to_string(), "B-SOURCE".to_string()];
	if !marker.is_empty() {
	pieces.push(marker.to_string());
	labels.push("O".to_string());
	}
	Some((pieces, labels))
	}

	fn project_refined_tokens(
	tokens: &[String],
	groups: &[Group],
	roles: &[String],
	) -> (Vec<String>, Vec<String>) {
	let mut output_tokens = Vec::new();
	let mut output_labels = Vec::new();
	for (group_index, group) in groups.iter().enumerate() {
	let mut role = roles.get(group_index).map(String::as_str).unwrap_or("O");
	if matches!(group.class_name.as_str(), "SEP" \| "PATH" \| "EMPTY") {
	role = "O";
	}
	for &index in &group.indices {
	let token = &tokens[index];
	if matches!(
	role,
	"EPISODE"
	\| "EPISODE_VERSION"
	\| "EPISODE_RANGE"
	\| "SOURCE"
	\| "RESOLUTION"
	\| "SEASON"
	\| "PATH_SEASON"
	) {
	if matches!(role, "SEASON" \| "PATH_SEASON") {
	if let Some((pieces, labels)) = split_season_token(token) {
	output_tokens.extend(pieces);
	output_labels.extend(labels);
	continue;
	}
	}
	if role == "SOURCE" {
	if let Some((pieces, labels)) = split_cjk_title_lang_prefix(token) {
	output_tokens.extend(pieces);
	output_labels.extend(labels);
	continue;
	}
	}
	if matches!(role, "EPISODE" \| "EPISODE_VERSION" \| "EPISODE_RANGE") {
	if let Some((pieces, labels)) = split_sxe_token(&strip_wrapper(token)) {
	output_tokens.extend(pieces);
	output_labels.extend(labels);
	continue;
	}
	if let Some((pieces, labels)) = split_episode_token(&strip_wrapper(token)) {
	output_tokens.extend(pieces);
	output_labels.extend(labels);
	continue;
	}
	}
	for piece in split_refined_token(token) {
	if matches!(role, "EPISODE" \| "EPISODE_VERSION" \| "EPISODE_RANGE") {
	if let Some((pieces, labels)) = split_season_token(&piece) {
	output_tokens.extend(pieces);
	output_labels.extend(labels);
	continue;
	}
	if let Some((pieces, labels)) = split_episode_token(&piece) {
	output_tokens.extend(pieces);
	output_labels.extend(labels);
	continue;
	}
	}
	let label = label_for_refined_piece(&piece, role, &group.class_name);
	let (pieces, labels) = normalize_generated_tokens(&[piece], &[label]);
	output_tokens.extend(pieces);
	output_labels.extend(labels);
	}
	} else {
	if is_title_role(role) && matches!(token.as_str(), "第" \| "話" \| "话" \| "回" \| "集")
	{
	output_tokens.push(token.clone());
	output_labels.push("O".to_string());
	continue;
	}
	if is_title_role(role) && token.ends_with('第') && token.chars().count() > 1 {
	let trimmed = token.trim_end_matches('第').to_string();
	let (pieces, labels) = normalize_generated_tokens(
	&[trimmed, "第".to_string()],
	&["B-TITLE".to_string(), "O".to_string()],
	);
	output_tokens.extend(pieces);
	output_labels.extend(labels);
	continue;
	}
	if is_title_role(role) {
	let (pieces, labels) = normalize_title_token(token);
	output_tokens.extend(pieces);
	output_labels.extend(labels);
	continue;
	}
	let (pieces, labels) =
	normalize_generated_tokens(&[token.clone()], &[role_label(role)]);
	output_tokens.extend(pieces);
	output_labels.extend(labels);
	}
	}
	}
	(output_tokens, output_labels)
	}

	fn smooth_title_spans(tokens: &[String], labels: &[String]) -> Vec<String> {
	let joiners = [
	" ", ".", "-", "_", "·", "・", "×", "／", "/", "'", "’", ":", "：", "!", "！", "?", "？",
	";", "；", ",", "，", "、", "。", "～", "~", "－", "+", "＋", "(", ")", "（", "）", "[",
	"]", "【", "】", "<", ">", "＜", "＞", "｢", "｣", "「", "」", "《", "》", "☆", "♪", "`",
	"@", "‐", "‑", "–", "—", "−", "$", "＄", "∽", "꞉", "♥",
	];
	let title_terminal_punctuation = ["!", "！", "?", "？"];
	let entity_joiners = [
	" ", ".", "-", "_", "·", "・", "×", "／", "/", "'", "’", ":", "：", "!", "！", "?", "？",
	";", "；", ",", "，", "、", "。", "～", "~", "－", "+", "＋", "(", ")", "（", "）", "[",
	"]", "【", "】", "<", ">", "＜", "＞", "｢", "｣", "「", "」", "《", "》", "☆", "♪", "`",
	"@", "&", "＆", "‐", "‑", "–", "—", "−", "$", "＄", "∽", "꞉", "♥",
	];
	let mut output = labels.to_vec();
	for (index, (token, label)) in tokens.iter().zip(labels.iter()).enumerate() {
	if label == "B-TITLE"
	&& token == "TV"
	&& index + 1 < tokens.len()
	&& tokens[index + 1] == "アニメ"
	&& output[index + 2..].iter().any(\|label\| label == "B-TITLE")
	{
	output[index] = "O".to_string();
	output[index + 1] = "O".to_string();
	continue;
	}
	if label == "B-TITLE"
	&& token == "アニメ"
	&& output[index + 1..].iter().any(\|label\| label == "B-TITLE")
	{
	output[index] = "O".to_string();
	continue;
	}
	if label == "B-TITLE" && token.eq_ignore_ascii_case("part") {
	let next_number = (index + 1..tokens.len()).find(\|&cursor\| {
	!joiners.contains(&tokens[cursor].as_str())
	&& !tokens[cursor].chars().all(char::is_whitespace)
	});
	let nearby_lupin = tokens[..index]
	.iter()
	.rev()
	.take(8)
	.any(\|item\| item.eq_ignore_ascii_case("lupin"))
	\|\| tokens[index + 1..]
	.iter()
	.take(12)
	.any(\|item\| item.eq_ignore_ascii_case("lupin"));
	if nearby_lupin
	&& next_number.is_some_and(\|cursor\| {
	tokens[cursor].chars().all(\|ch\| ch.is_ascii_digit())
	&& tokens[cursor].len() <= 2
	})
	{
	output[index] = "B-SEASON".to_string();
	continue;
	}
	}
	if label == "B-TITLE"
	&& token.chars().all(\|ch\| ch.is_ascii_digit())
	&& token.len() == 3
	&& index + 1 < tokens.len()
	&& matches!(tokens[index + 1].as_str(), "「" \| "｢" \| "\"" \| "'")
	{
	output[index] = "B-EPISODE".to_string();
	let mut cursor = index + 1;
	while cursor < tokens.len() {
	output[cursor] = "O".to_string();
	if matches!(tokens[cursor].as_str(), "」" \| "｣" \| "\"" \| "'") && cursor > index + 1
	{
	break;
	}
	cursor += 1;
	}
	continue;
	}
	if label == "B-TITLE" && matches!(token.as_str(), "中日" \| "日中" \| "英日" \| "日英")
	{
	let next_word = (index + 1..tokens.len())
	.find(\|&cursor\| tokens[cursor].chars().any(\|ch\| ch.is_alphanumeric()));
	if next_word
	.is_some_and(\|cursor\| labels[cursor] == "B-SOURCE" && tokens[cursor].contains('语'))
	{
	output[index] = "B-SOURCE".to_string();
	continue;
	}
	}
	if label == "B-TITLE" && matches!(token.to_ascii_lowercase().as_str(), "ep" \| "episode") {
	let next_episode = (index + 1..tokens.len()).find(\|&cursor\| {
	!joiners.contains(&tokens[cursor].as_str()) \|\| labels[cursor] != "O"
	});
	if next_episode.is_some_and(\|cursor\| labels[cursor] == "B-EPISODE") {
	output[index] = "O".to_string();
	continue;
	}
	}
	if label == "B-TITLE" && is_unicode_roman_season(token) {
	let previous_title_word = (0..index).rev().find(\|&cursor\| {
	output[cursor] == "B-TITLE"
	&& tokens[cursor]
	.chars()
	.any(\|ch\| ch.is_alphanumeric() \|\| ('\u{4e00}'..='\u{9fff}').contains(&ch))
	});
	let later_episode =
	(index + 1..tokens.len()).any(\|cursor\| labels[cursor] == "B-EPISODE");
	if previous_title_word.is_none() && later_episode {
	output[index] = "B-SEASON".to_string();
	continue;
	}
	let previous_word =
	previous_title_word.map(\|cursor\| tokens[cursor].to_ascii_lowercase());
	if previous_title_word.is_some() && !matches!(previous_word.as_deref(), Some("lupin")) {
	output[index] = "B-SEASON".to_string();
	continue;
	}
	}
	if label == "B-TITLE"
	&& (ORDINAL_SEASON_TOKEN_RE.is_match(token)
	\|\| WORD_ORDINAL_SEASON_TOKEN_RE.is_match(token))
	{
	let next_word = (index + 1..tokens.len()).find(\|&cursor\| {
	!joiners.contains(&tokens[cursor].as_str())
	&& tokens[cursor].chars().any(\|ch\| ch.is_alphabetic())
	});
	if next_word.is_some_and(\|cursor\| {
	labels[cursor] == "B-TITLE" && SEASON_WORD_RE.is_match(&tokens[cursor])
	}) {
	output[index] = "B-SEASON".to_string();
	if let Some(cursor) = next_word {
	for joiner_index in index + 1..cursor {
	if joiners.contains(&tokens[joiner_index].as_str()) {
	output[joiner_index] = "B-SEASON".to_string();
	}
	}
	output[cursor] = "B-SEASON".to_string();
	}
	continue;
	}
	}
	if label == "O"
	&& (EPISODE_CJK_RE.is_match(token)
	\|\| EPISODE_VALUE_RE.is_match(token)
	\|\| EPISODE_RANGE_RE.is_match(token))
	{
	output[index] = "B-EPISODE".to_string();
	continue;
	}
	if label == "O" && token.chars().all(\|ch\| ch.is_ascii_digit()) {
	let previous_non_space = (0..index)
	.rev()
	.find(\|&cursor\| !tokens[cursor].chars().all(char::is_whitespace));
	let next_non_space = (index + 1..tokens.len())
	.find(\|&cursor\| !tokens[cursor].chars().all(char::is_whitespace));
	if previous_non_space.is_some_and(\|cursor\| tokens[cursor] == "#") {
	output[index] = "B-EPISODE".to_string();
	if next_non_space.is_some_and(\|cursor\| matches!(tokens[cursor].as_str(), "-" \| "~"))
	{
	if let Some(separator) = next_non_space {
	output[separator] = "B-EPISODE".to_string();
	if let Some(right) = (separator + 1..tokens.len())
	.find(\|&cursor\| !tokens[cursor].chars().all(char::is_whitespace))
	{
	if tokens[right].chars().all(\|ch\| ch.is_ascii_digit()) {
	output[right] = "B-EPISODE".to_string();
	}
	}
	}
	}
	continue;
	}
	if previous_non_space.is_some_and(\|cursor\| tokens[cursor] == "第")
	&& next_non_space.is_some_and(\|cursor\| {
	matches!(tokens[cursor].as_str(), "话" \| "話" \| "回" \| "集")
	\|\| tokens[cursor].starts_with('话')
	\|\| tokens[cursor].starts_with('話')
	\|\| tokens[cursor].starts_with('回')
	\|\| tokens[cursor].starts_with('集')
	})
	{
	if let Some(cursor) = previous_non_space {
	output[cursor] = "B-EPISODE".to_string();
	}
	output[index] = "B-EPISODE".to_string();
	if let Some(cursor) = next_non_space {
	if matches!(tokens[cursor].as_str(), "话" \| "話" \| "回" \| "集") {
	output[cursor] = "B-EPISODE".to_string();
	}
	}
	continue;
	}
	}
	if matches!(label.as_str(), "B-TITLE" \| "O")
	&& token.chars().all(\|ch\| ch.is_ascii_digit())
	&& token.len() <= 3
	{
	let previous_word = (0..index)
	.rev()
	.find(\|&cursor\| {
	!joiners.contains(&tokens[cursor].as_str())
	&& tokens[cursor].chars().any(\|ch\| ch.is_alphabetic())
	})
	.map(\|cursor\| tokens[cursor].to_ascii_lowercase());
	let next_structural = (index + 1..tokens.len())
	.find(\|&cursor\| !joiners.contains(&tokens[cursor].as_str()))
	.map(\|cursor\| tokens[cursor].as_str());
	let next_non_space = (index + 1..tokens.len())
	.find(\|&cursor\| !tokens[cursor].chars().all(char::is_whitespace))
	.map(\|cursor\| tokens[cursor].as_str());
	let later_technical_block = output[index + 1..]
	.iter()
	.any(\|label\| matches!(label.as_str(), "B-SOURCE" \| "B-RESOLUTION"));
	let nearby_lupin_part = previous_word.as_deref() == Some("part")
	&& (tokens[..index]
	.iter()
	.rev()
	.take(8)
	.any(\|item\| item.eq_ignore_ascii_case("lupin"))
	\|\| tokens[index + 1..]
	.iter()
	.take(12)
	.any(\|item\| item.eq_ignore_ascii_case("lupin")));
	if nearby_lupin_part {
	output[index] = "B-SEASON".to_string();
	continue;
	}
	let followed_by_title_word = (index + 1..tokens.len())
	.find(\|&cursor\| {
	!joiners.contains(&tokens[cursor].as_str())
	&& !matches!(
	tokens[cursor].as_str(),
	"-" \| "－" \| "," \| "，" \| ":" \| "："
	)
	})
	.is_some_and(\|cursor\| {
	!matches!(
	tokens[cursor].as_str(),
	"[" \| "【" \| "(" \| "（" \| "]" \| "】"
	) && output.get(cursor).is_some_and(\|label\| label == "B-TITLE")
	&& tokens[cursor].chars().any(\|ch\| ch.is_alphabetic())
	});
	if followed_by_title_word && matches!(previous_word.as_deref(), Some("movie" \| "part"))
	{
	output[index] = "B-TITLE".to_string();
	continue;
	}
	if (later_technical_block
	\|\| next_non_space.is_some_and(\|token\| matches!(token, "[" \| "【" \| "(" \| "（"))
	\|\| next_structural.is_some_and(\|token\| matches!(token, "[" \| "【" \| "(" \| "（")))
	&& matches!(previous_word.as_deref(), Some("movie" \| "part"))
	{
	output[index] = "B-SPECIAL".to_string();
	continue;
	}
	let eien_title_number = token == "831"
	&& previous_word.as_deref() == Some("no")
	&& (0..index).any(\|cursor\| {
	output[cursor] == "B-TITLE" && tokens[cursor].eq_ignore_ascii_case("Eien")
	});
	if eien_title_number {
	for joiner_index in (0..index).rev() {
	if tokens[joiner_index].eq_ignore_ascii_case("no") {
	break;
	}
	if joiners.contains(&tokens[joiner_index].as_str()) {
	output[joiner_index] = "B-TITLE".to_string();
	}
	}
	output[index] = "B-TITLE".to_string();
	continue;
	}
	}
	if label == "O" && token.chars().all(\|ch\| ch.is_ascii_digit()) && token.len() <= 3 {
	let previous_non_space = (0..index)
	.rev()
	.find(\|&cursor\| !tokens[cursor].chars().all(char::is_whitespace));
	let next_non_space = (index + 1..tokens.len())
	.find(\|&cursor\| !tokens[cursor].chars().all(char::is_whitespace));
	if previous_non_space
	.is_some_and(\|cursor\| matches!(tokens[cursor].as_str(), "[" \| "【"))
	&& next_non_space
	.is_some_and(\|cursor\| matches!(tokens[cursor].as_str(), "]" \| "】"))
	&& output[..index].iter().any(\|label\| label == "B-TITLE")
	&& output[index + 1..]
	.iter()
	.any(\|label\| matches!(label.as_str(), "B-SOURCE" \| "B-RESOLUTION"))
	{
	output[index] = "B-EPISODE".to_string();
	continue;
	}
	if previous_non_space
	.is_some_and(\|cursor\| matches!(tokens[cursor].as_str(), "-" \| "－"))
	&& output[..index].iter().any(\|label\| label == "B-TITLE")
	&& output[index + 1..]
	.iter()
	.any(\|label\| matches!(label.as_str(), "B-SOURCE" \| "B-RESOLUTION"))
	{
	output[index] = "B-EPISODE".to_string();
	continue;
	}
	if next_non_space.is_none()
	&& previous_non_space.is_some_and(\|cursor\| {
	output[cursor] == "B-TITLE"
	&& tokens[cursor].chars().any(\|ch\| {
	('\u{4e00}'..='\u{9fff}').contains(&ch)
	\|\| ('\u{3040}'..='\u{30ff}').contains(&ch)
	})
	})
	{
	output[index] = "B-EPISODE".to_string();
	continue;
	}
	}
	if label == "B-EPISODE" && token.chars().all(\|ch\| ch.is_ascii_digit()) {
	let previous_non_space = (0..index)
	.rev()
	.find(\|&cursor\| !tokens[cursor].chars().all(char::is_whitespace));
	let next_non_space = (index + 1..tokens.len())
	.find(\|&cursor\| !tokens[cursor].chars().all(char::is_whitespace));
	if previous_non_space.is_some_and(\|cursor\| tokens[cursor] == "第")
	&& next_non_space.is_some_and(\|cursor\| {
	matches!(tokens[cursor].as_str(), "话" \| "話" \| "回" \| "集")
	})
	{
	if let Some(cursor) = previous_non_space {
	output[cursor] = "B-EPISODE".to_string();
	}
	if let Some(cursor) = next_non_space {
	output[cursor] = "B-EPISODE".to_string();
	}
	}
	if previous_non_space
	.is_some_and(\|cursor\| matches!(tokens[cursor].as_str(), "×" \| "x" \| "X"))
	{
	let left_title = (0..previous_non_space.unwrap())
	.rev()
	.find(\|&cursor\| labels[cursor] != "O")
	.is_some_and(\|cursor\| labels[cursor] == "B-TITLE");
	if left_title {
	output[index] = "B-TITLE".to_string();
	if let Some(next_word) = (index + 1..tokens.len()).find(\|&cursor\| {
	labels[cursor] == "O" && tokens[cursor].chars().any(\|ch\| ch.is_alphabetic())
	}) {
	output[next_word] = "B-TITLE".to_string();
	}
	continue;
	}
	}
	let mut previous_word = None;
	for cursor in (0..index).rev() {
	if matches!(tokens[cursor].as_str(), "]" \| "】" \| ")" \| "）") {
	break;
	}
	if joiners.contains(&tokens[cursor].as_str()) {
	continue;
	}
	if tokens[cursor].chars().any(\|ch\| ch.is_alphabetic()) {
	previous_word = Some(tokens[cursor].to_ascii_lowercase());
	}
	break;
	}
	let previous_non_space = (0..index)
	.rev()
	.find(\|&cursor\| !tokens[cursor].chars().all(char::is_whitespace))
	.map(\|cursor\| tokens[cursor].as_str());
	if matches!(previous_word.as_deref(), Some("lesson" \| "part"))
	\|\| (previous_word.as_deref() == Some("no") && previous_non_space == Some("."))
	{
	output[index] = "O".to_string();
	continue;
	}
	}
	if label != "O" \|\| !entity_joiners.contains(&token.as_str()) {
	continue;
	}
	let mut left = index as isize - 1;
	while left >= 0
	&& joiners.contains(&tokens[left as usize].as_str())
	&& labels[left as usize] == "O"
	{
	left -= 1;
	}
	let mut right = index + 1;
	while right < tokens.len()
	&& joiners.contains(&tokens[right].as_str())
	&& labels[right] == "O"
	{
	right += 1;
	}
	if left >= 0 && right < tokens.len() {
	let left_label = output[left as usize].clone();
	let right_label = labels[right].clone();
	if left_label == right_label && matches!(left_label.as_str(), "B-TITLE" \| "B-GROUP") {
	output[index] = left_label.clone();
	}
	if token == "." && left_label == "B-EPISODE" && right_label == "B-EPISODE" {
	output[index] = "B-EPISODE".to_string();
	}
	}
	if title_terminal_punctuation.contains(&token.as_str()) && index > 0 {
	let left_label = &output[index - 1];
	if left_label == "B-TITLE" {
	output[index] = "B-TITLE".to_string();
	}
	}
	if matches!(
	token.as_str(),
	"]" \| "】" \| ")" \| "）" \| ">" \| "＞" \| "｣" \| "」"
	) && index > 0
	&& output[index - 1] == "B-TITLE"
	&& title_span_has_labeled_opener(&tokens[..index], &output[..index], token)
	{
	output[index] = "B-TITLE".to_string();
	}
	}
	output
	}

	fn title_span_has_labeled_opener(tokens: &[String], labels: &[String], closer: &str) -> bool {
	for (token, label) in tokens.iter().zip(labels.iter()).rev() {
	if label != "B-TITLE" {
	return false;
	}
	if closer_matches_opener(closer, token) {
	return true;
	}
	}
	false
	}

	fn closer_matches_opener(closer: &str, opener: &str) -> bool {
	matches!(
	(closer, opener),
	("]", "[")
	\| ("】", "【")
	\| (")", "(")
	\| ("）", "（")
	\| (">", "<")
	\| ("＞", "＜")
	\| ("｣", "｢")
	\| ("」", "「")
	)
	}

	fn retag_semantic_labels(tokens: &[String], labels: &[String]) -> Vec<String> {
	let last_path = tokens
	.iter()
	.rposition(\|token\| token == "/" \|\| token == "\\");
	let mut output = labels.to_vec();
	for index in 0..labels.len() {
	let Some(entity) = label_entity(&labels[index]) else {
	continue;
	};
	let prefix = if labels[index].starts_with("I-") {
	"I"
	} else {
	"B"
	};
	if entity == "TITLE" {
	let path_title = last_path.is_some_and(\|path_index\| index < path_index);
	let suffix = title_suffix_for_label_index(tokens, labels, index);
	output[index] = format!(
	"{prefix}-{}_{}",
	if path_title { "PATH_TITLE" } else { "TITLE" },
	suffix
	);
	} else if entity == "SEASON" && last_path.is_some_and(\|path_index\| index < path_index) {
	output[index] = format!("{prefix}-PATH_SEASON");
	}
	}
	output
	}

	fn title_suffix_for_label_index(
	tokens: &[String],
	labels: &[String],
	index: usize,
	) -> &'static str {
	if let Some(suffix) = direct_title_suffix(&tokens[index]) {
	return suffix;
	}
	let left = nearest_title_suffix(tokens, labels, index, true);
	let right = nearest_title_suffix(tokens, labels, index, false);
	match (left, right) {
	(Some(left), Some(right)) if left == right => left,
	(Some(left), None) => left,
	(None, Some(right)) => right,
	_ => "MIXED",
	}
	}

	fn nearest_title_suffix(
	tokens: &[String],
	labels: &[String],
	index: usize,
	search_left: bool,
	) -> Option<&'static str> {
	let mut cursor = index as isize;
	loop {
	cursor += if search_left { -1 } else { 1 };
	if cursor < 0 \|\| cursor as usize >= tokens.len() {
	return None;
	}
	let cursor = cursor as usize;
	if !is_title_label(&labels[cursor]) {
	if tokens[cursor]
	.chars()
	.all(\|ch\| ch.is_whitespace() \|\| !ch.is_alphanumeric())
	{
	continue;
	}
	return None;
	}
	if let Some(suffix) = direct_title_suffix(&tokens[cursor]) {
	return Some(suffix);
	}
	}
	}

	fn direct_title_suffix(token: &str) -> Option<&'static str> {
	if !token.chars().any(\|ch\| {
	ch.is_ascii_alphabetic()
	\|\| ('\u{3040}'..='\u{30ff}').contains(&ch)
	\|\| ('\u{31f0}'..='\u{31ff}').contains(&ch)
	\|\| ('\u{4e00}'..='\u{9fff}').contains(&ch)
	}) {
	return None;
	}
	Some(title_language_suffix(token))
	}

	fn dmhy_record(filename: &str, template_id: &str, roles: &[String]) -> Option<Record> {
	let (key, tokens, _classes, groups) = template_key_for_filename(filename);
	if groups.len() != roles.len() {
	return None;
	}
	let roles = adjust_contextual_roles(&tokens, &groups, roles);
	let roles = refine_semantic_roles(&tokens, &groups, &roles);
	let (roles, dropped) = enforce_single_title_candidate(&tokens, &groups, &roles);
	let (tokens, labels) = project_refined_tokens(&tokens, &groups, &roles);
	let (tokens, labels) = repair_compact_sxe_tokens(tokens, labels);
	let labels = smooth_title_spans(&tokens, &labels);
	let labels = retag_semantic_labels(&tokens, &labels);
	if tokens.len() != labels.len() {
	return None;
	}
	Some(Record {
	filename: filename.to_string(),
	tokens,
	labels,
	template_id: template_id.to_string(),
	template: key,
	source_filename: None,
	path_trimmed: None,
	dropped_title_candidate_positions: if dropped.is_empty() {
	None
	} else {
	Some(dropped)
	},
	})
	}

	#[cfg(test)]
	mod tests {
	use super::*;

	fn schema_labels_for(filename: &str) -> Vec<(String, String)> {
	let (key, _, _, _) = template_key_for_filename(filename);
	let roles = suggested_roles(&key);
	let record = dmhy_record(filename, "tpl_test", &roles).unwrap();
	record.tokens.into_iter().zip(record.labels).collect()
	}

	fn labels_for(filename: &str) -> Vec<(String, String)> {
	schema_labels_for(filename)
	.into_iter()
	.map(\|(token, label)\| (token, legacy_label(&label)))
	.collect()
	}

	fn legacy_label(label: &str) -> String {
	let Some(entity) = label_entity(label) else {
	return label.to_string();
	};
	let prefix = if label.starts_with("I-") { "I" } else { "B" };
	if is_title_entity(entity) {
	return format!("{prefix}-TITLE");
	}
	if entity == "PATH_SEASON" {
	return format!("{prefix}-SEASON");
	}
	if entity == "TAG" {
	return format!("{prefix}-SPECIAL");
	}
	label.to_string()
	}

	#[test]
	fn rich_title_candidates_keep_readable_spacing() {
	let row = rich_annotation_for(
	"(1998) Initial D First Stage [1080p BDRip AVC AAC DTS-HD]/Initial D First Stage - 01 [1080p BDRip AVC AAC DTS-HD]",
	);
	assert_eq!(
	row.pointer("/segments/1/candidates/0/text")
	.and_then(Value::as_str),
	Some("Initial D First Stage")
	);
	}

	#[test]
	fn semantic_schema_roles_cover_multilingual_tags_paths_and_music_skips() {
	let gm = schema_labels_for(
	"[GM-Team][国漫][神印王座][Throne of Seal][2022][200][AVC][GB][1080P].mp4",
	);
	assert!(gm.contains(&("GM".to_string(), "B-GROUP".to_string())));
	assert!(gm.contains(&("国漫".to_string(), "B-TAG".to_string())));
	assert!(gm.contains(&("神印王座".to_string(), "B-TITLE_CHS".to_string())));
	assert!(gm.contains(&("Throne".to_string(), "B-TITLE_LATIN".to_string())));
	assert!(gm.contains(&("Seal".to_string(), "B-TITLE_LATIN".to_string())));
	assert!(gm.contains(&("2022".to_string(), "B-TAG".to_string())));
	assert!(gm.contains(&("200".to_string(), "B-EPISODE".to_string())));

	let sky = schema_labels_for("[Skytree][海贼王][One_Piece][918][GB_JP][1080P]");
	assert!(sky.contains(&("Skytree".to_string(), "B-GROUP".to_string())));
	assert!(sky.contains(&("海贼王".to_string(), "B-TITLE_CHS".to_string())));
	assert!(sky.contains(&("One".to_string(), "B-TITLE_LATIN".to_string())));
	assert!(sky.contains(&("Piece".to_string(), "B-TITLE_LATIN".to_string())));
	assert!(sky.contains(&("918".to_string(), "B-EPISODE".to_string())));

	let farming = schema_labels_for("異世界悠閒農家 2 - 06");
	assert!(farming.contains(&("異世界悠閒農家".to_string(), "B-TITLE_CHT".to_string())));
	assert!(farming.contains(&("2".to_string(), "B-SEASON".to_string())));
	assert!(farming.contains(&("06".to_string(), "B-EPISODE".to_string())));

	let hanako = schema_labels_for("地縛少年花子君 2 - 13");
	assert!(hanako.contains(&("地縛少年花子君".to_string(), "B-TITLE_JPN".to_string())));
	assert!(hanako.contains(&("2".to_string(), "B-SEASON".to_string())));
	assert!(hanako.contains(&("13".to_string(), "B-EPISODE".to_string())));

	let one_piece = schema_labels_for("One.Piece.1110");
	assert!(one_piece.contains(&("One".to_string(), "B-TITLE_LATIN".to_string())));
	assert!(one_piece.contains(&("Piece".to_string(), "B-TITLE_LATIN".to_string())));
	assert!(one_piece.contains(&("1110".to_string(), "B-EPISODE".to_string())));
	assert!(!one_piece.contains(&("1110".to_string(), "B-SEASON".to_string())));

	let nekomoe_prefix = schema_labels_for("[喵萌奶茶屋][7月新番][Lycoris Recoil][01][1080P]");
	assert!(nekomoe_prefix.contains(&("喵萌奶茶屋".to_string(), "B-GROUP".to_string())));
	assert!(nekomoe_prefix.contains(&("7月新番".to_string(), "B-TAG".to_string())));
	assert!(nekomoe_prefix.contains(&("Lycoris".to_string(), "B-TITLE_LATIN".to_string())));
	let subtitle_group = schema_labels_for("[桜都字幕组][Title][01][1080P]");
	assert!(subtitle_group.contains(&("桜都字幕组".to_string(), "B-GROUP".to_string())));

	let path = schema_labels_for("海贼王/Season 2/One Piece - 01 [1080P]");
	assert!(path.contains(&("海贼王".to_string(), "B-PATH_TITLE_CHS".to_string())));
	assert!(path.contains(&("2".to_string(), "B-PATH_SEASON".to_string())));
	assert!(path.contains(&("One".to_string(), "B-TITLE_LATIN".to_string())));
	assert!(path.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let tags = schema_labels_for("[日漫][剧场版][Movie][TV][2024][Title][01][1080P]");
	assert!(tags.contains(&("日漫".to_string(), "B-TAG".to_string())));
	assert!(tags.contains(&("剧场版".to_string(), "B-TAG".to_string())));
	assert!(tags.contains(&("Movie".to_string(), "B-TAG".to_string())));
	assert!(tags.contains(&("TV".to_string(), "B-TAG".to_string())));
	assert!(tags.contains(&("2024".to_string(), "B-TAG".to_string())));
	assert!(tags.contains(&("Title".to_string(), "B-TITLE_LATIN".to_string())));

	for skipped in [
	"[Group] Title OST [FLAC]",
	"[Group] Title MUSICCLIP [BDRip]",
	"[Group] Title Music Collection [FLAC]",
	"[Group] Title Character Song [MP3]",
	"[Group] Title Drama CD [FLAC]",
	"[Group] Title CD Album [FLAC]",
	"[Group] Title Bonus CD [FLAC]",
	"[Group] Title Soundtrack [FLAC]",
	] {
	assert!(has_music_collection_noise(skipped), "{skipped}");
	}
	for preserved in [
	"[Group] Title OP [FLAC]",
	"[Group] Title ED [FLAC]",
	"[Group] Title NCOP [FLAC]",
	"[Group] Title NCED [FLAC]",
	"[Group] Title PV [1080P]",
	"[Group] Title CM [1080P]",
	"[Group] Title Menu [1080P]",
	"[Group] Title Trailer [1080P]",
	] {
	assert!(!has_music_collection_noise(preserved), "{preserved}");
	}
	}

	#[test]
	fn required_regressions() {
	let title_91 = labels_for("Title 91 EP 01 [1080p]");
	assert!(title_91.contains(&("91".to_string(), "B-SEASON".to_string())));
	assert!(title_91.contains(&("EP".to_string(), "O".to_string())));
	assert!(title_91.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let event = labels_for("[HYSUB]Dragon Ball Super Broly[Theater Greeting Event][1080P]");
	assert!(event.contains(&("Theater".to_string(), "B-SPECIAL".to_string())));
	assert!(!event.contains(&("Theater".to_string(), "B-TITLE".to_string())));

	let roman = labels_for("Chibi Maruko-chan I 001");
	assert!(roman.contains(&("I".to_string(), "B-SEASON".to_string())));
	assert!(roman.contains(&("001".to_string(), "B-EPISODE".to_string())));

	let dxd = labels_for("High School D×D");
	assert!(dxd.contains(&("×".to_string(), "B-TITLE".to_string())));
	let colon_title = labels_for("Megumi no Daigo：Kyuukoku no Orange 06");
	assert!(colon_title.contains(&("：".to_string(), "B-TITLE".to_string())));

	let sxe = labels_for("S01E02");
	assert_eq!(
	sxe,
	vec![
	("S".to_string(), "O".to_string()),
	("01".to_string(), "B-SEASON".to_string()),
	("E".to_string(), "O".to_string()),
	("02".to_string(), "B-EPISODE".to_string())
	]
	);
	let ep_prefix = labels_for("Toradora! EP01 [BD 1080p]");
	assert!(ep_prefix.contains(&("EP".to_string(), "O".to_string())));
	assert!(ep_prefix.contains(&("01".to_string(), "B-EPISODE".to_string())));
	let bracket_sxe = labels_for("[FLsnow.feat.PO][Himitsu_no_Aipri][1080P][S2E01]");
	assert!(bracket_sxe.contains(&("2".to_string(), "B-SEASON".to_string())));
	assert!(bracket_sxe.contains(&("01".to_string(), "B-EPISODE".to_string())));
	let bocchi_sxe =
	labels_for("Bocchi the Rock! 孤獨搖滾！S01E12「早起的日頭光照佇你的身上」");
	assert!(bocchi_sxe.contains(&("01".to_string(), "B-SEASON".to_string())));
	assert!(bocchi_sxe.contains(&("12".to_string(), "B-EPISODE".to_string())));
	assert!(!bocchi_sxe.contains(&("S01E12".to_string(), "O".to_string())));
	let sxe_range = labels_for(
	"【CXRAW】【TMNT 2012 TV series】【S5E12-S5E14】【Wanted：Bebop & Rocksteady】【DVDrip】【480p】【AVC Hi10P AAC MP4】",
	);
	assert!(sxe_range.contains(&("5".to_string(), "B-SEASON".to_string())));
	assert!(sxe_range.contains(&("12".to_string(), "B-EPISODE".to_string())));
	assert!(sxe_range.contains(&("14".to_string(), "B-EPISODE".to_string())));
	let episode_version_title = labels_for("[DHR][Dumbbell[10v2][BIG5][720P][AVC_AAC]");
	assert!(episode_version_title.contains(&("10v2".to_string(), "B-EPISODE".to_string())));
	assert!(!episode_version_title.contains(&("10v2".to_string(), "B-TITLE".to_string())));
	let episode_version_lang =
	labels_for("[GalaxyRailroad-888] Yu-Gi-Oh! GO RUSH !! [043v2_GB]");
	assert!(episode_version_lang.contains(&("043v2".to_string(), "B-EPISODE".to_string())));
	assert!(episode_version_lang.contains(&("GB".to_string(), "B-SOURCE".to_string())));

	let cursed = labels_for("[Coalgirls]_C3-Cube_x_Cursed_x_Curious_01_[8E416230]");
	assert!(cursed.contains(&("x".to_string(), "B-TITLE".to_string())));
	assert!(!cursed.contains(&("x".to_string(), "B-SEASON".to_string())));
	let beyblade = labels_for("[jibaketa]Beyblade X - 118 (WEB 1920x1080 AVC AAC)");
	assert!(beyblade.contains(&("X".to_string(), "B-TITLE".to_string())));
	assert!(!beyblade.contains(&("X".to_string(), "B-SEASON".to_string())));
	let bang_title = labels_for("[Dymy][Gugure! Kokkuri-san][06][BIG5][1280X720]");
	assert!(bang_title.contains(&("!".to_string(), "B-TITLE".to_string())));
	let pso2 = labels_for("[Lilith-Raws] Phantasy Star Online 2 Episode Oracle - 01 [1080p]");
	assert!(pso2.contains(&("2".to_string(), "B-TITLE".to_string())));
	assert!(pso2.contains(&("Episode".to_string(), "B-TITLE".to_string())));
	assert!(pso2.contains(&("Oracle".to_string(), "B-TITLE".to_string())));
	assert!(pso2.contains(&("01".to_string(), "B-EPISODE".to_string())));
	let aikatsu = labels_for("Aikatsu Friends! - S2E01 (BD 1920x1080 x264 FLAC)");
	assert!(aikatsu.contains(&("!".to_string(), "B-TITLE".to_string())));
	let intro = labels_for("[VCB-Studio] LoveLive! µ's Live Collection [01][intro][1080p]");
	assert!(intro.contains(&("intro".to_string(), "B-SPECIAL".to_string())));
	let hash = labels_for("[Group][Title][01][1080p][00270AC8]");
	assert!(hash.contains(&("00270AC8".to_string(), "O".to_string())));
	let yamato = labels_for("[1995.01] YAMATO2520 Vol.1 明日への希望-0001");
	assert!(yamato.contains(&("YAMATO2520".to_string(), "B-TITLE".to_string())));
	assert!(yamato.contains(&("明日への希望".to_string(), "B-TITLE".to_string())));
	let ubw = labels_for("Fate／stay night [Unlimited Blade Works] #00 「プロローグ」");
	assert!(ubw.contains(&("Unlimited".to_string(), "B-TITLE".to_string())));
	assert!(!ubw.contains(&("Unlimited".to_string(), "B-GROUP".to_string())));
	let alias_title = labels_for("[Koten_Gars] Tegami Bachi; Letter Bee - 01 [1080p]");
	assert!(alias_title.contains(&(";".to_string(), "B-TITLE".to_string())));
	let comma_title =
	labels_for("[Studio GreenTea] Kamiina Botan, Yoeru Sugata wa Yuri no Hana [01]");
	assert!(comma_title.contains(&(",".to_string(), "B-TITLE".to_string())));
	let backtick_title =
	labels_for("[Hayate no Gotoku! Can`t Take My Eyes Off You][01][BDrip X264 AAC 720P]");
	assert!(backtick_title.contains(&("`".to_string(), "B-TITLE".to_string())));
	assert!(backtick_title.contains(&("t".to_string(), "B-TITLE".to_string())));
	let cjk_period_title =
	labels_for("[云光字幕组]剃须。然后捡到高中生 Hige o Soru. Soshite Joshikousei o Hirou-[ 01 ][简体双语][1080p]");
	assert!(cjk_period_title.contains(&("。".to_string(), "B-TITLE".to_string())));
	let music_title =
	labels_for("[アニメ BD] うたの☆プリンスさまっ♪ マジLOVE2000% 第01話「ポワゾンKISS」(1920x1080 x264 Hi10p AAC)");
	assert!(music_title.contains(&("♪".to_string(), "B-TITLE".to_string())));
	let cm_version =
	labels_for("[U2-Rip]Inari, Konkon, Koi Iroha[CMv2][Hi10p_1080p][x264_flac]");
	assert!(cm_version.contains(&("CMv2".to_string(), "B-SPECIAL".to_string())));
	assert!(!cm_version.contains(&("CMv2".to_string(), "B-TITLE".to_string())));
	let hdma_block = labels_for(
	"[Niconeiko Works] Gekijouban Violet Evergarden [1080P_Ma10p_DTS-HDMA][CM01]",
	);
	assert!(hdma_block.contains(&("Gekijouban".to_string(), "B-TITLE".to_string())));
	assert!(hdma_block.contains(&("1080P".to_string(), "B-RESOLUTION".to_string())));
	assert!(hdma_block.contains(&("HDMA".to_string(), "B-SOURCE".to_string())));
	assert!(!hdma_block.contains(&("1080P".to_string(), "B-TITLE".to_string())));
	let extra_menu = labels_for("Extra Menu OVA");
	assert!(extra_menu.contains(&("Extra".to_string(), "B-SPECIAL".to_string())));
	assert!(!extra_menu.contains(&("Extra".to_string(), "B-TITLE".to_string())));
	let eizou_tokuten = labels_for("おジャ魔女どれみ♯ 映像特典｢ともだちの唄｣(DVD 640x480 )");
	assert!(eizou_tokuten.contains(&("映像特典".to_string(), "B-SPECIAL".to_string())));
	let happy_lesson = labels_for("【DVD】 HAPPY☆LESSON THE TV 第01話");
	assert!(happy_lesson.contains(&("☆".to_string(), "B-TITLE".to_string())));
	let idolmaster = labels_for("[CASO&SumiSora][THE_IDOLM@STER_CINDERELLA_GIRLS][07.5_SP]");
	assert!(idolmaster.contains(&("@".to_string(), "B-TITLE".to_string())));
	let soul_taker = labels_for("[AI-Raws] THE SOUL TAKER～魂狩～ #01 (HEVC 1312x720)");
	assert!(soul_taker.contains(&("～".to_string(), "B-TITLE".to_string())));
	let mayoi = labels_for("[Snow-Raws] 迷家[マヨイガ] 第01話");
	assert!(mayoi.contains(&("迷家".to_string(), "B-TITLE".to_string())));
	assert!(mayoi.contains(&("マヨイガ".to_string(), "B-TITLE".to_string())));
	assert!(mayoi.contains(&("]".to_string(), "B-TITLE".to_string())));

	let conan_time = labels_for("【蓝色狂想】名侦探柯南TV版第036集-周一晚上7点半杀人事件");
	assert!(conan_time.contains(&("036".to_string(), "B-EPISODE".to_string())));
	assert!(!conan_time.contains(&("7".to_string(), "B-EPISODE".to_string())));
	let zom =
	labels_for("[Nekomoe kissaten&VCB-Studio] Zom 100 [Animatics02][Ma10p_1080p][x265]");
	assert!(zom.contains(&("100".to_string(), "B-TITLE".to_string())));
	assert!(!zom.contains(&("100".to_string(), "B-EPISODE".to_string())));
	assert!(zom.contains(&("Animatics02".to_string(), "B-SPECIAL".to_string())));

	let sky = schema_labels_for("[Skytree][海贼王][One_Piece][918][GB_JP][1080P]");
	assert!(sky.contains(&("海贼王".to_string(), "B-TITLE_CHS".to_string())));
	assert!(sky.contains(&("One".to_string(), "B-TITLE_LATIN".to_string())));
	assert!(sky.contains(&("Piece".to_string(), "B-TITLE_LATIN".to_string())));
	assert!(sky.contains(&("918".to_string(), "B-EPISODE".to_string())));

	let happy =
	labels_for("My.Happy.Marriage.S01E01.The.Meeting.1080p.NF.WEB-DL.AAC2.0.H.264-VARYG");
	assert!(happy.contains(&("01".to_string(), "B-SEASON".to_string())));
	assert!(happy.contains(&("01".to_string(), "B-EPISODE".to_string())));
	assert!(!happy.contains(&("0".to_string(), "B-EPISODE".to_string())));

	let garo = labels_for("[牙狼＜GARO＞～炎の刻印～][01][1080p]");
	assert!(garo.contains(&("牙狼".to_string(), "B-TITLE".to_string())));
	assert!(garo.contains(&("＜".to_string(), "B-TITLE".to_string())));
	assert!(garo.contains(&("＞".to_string(), "B-TITLE".to_string())));
	assert!(garo.contains(&("炎の刻印".to_string(), "B-TITLE".to_string())));

	let akira = labels_for("[QYQ][AKIRA][AVC_AC3x2][1080p]");
	assert!(akira.contains(&("AKIRA".to_string(), "B-TITLE".to_string())));
	assert!(!akira.contains(&("AVC".to_string(), "B-TITLE".to_string())));
	assert!(akira.contains(&("AVC".to_string(), "B-SOURCE".to_string())));

	let doraemon = labels_for(
	"[DORASUB][DORAEMON1979][1998.03.07][WEB][1998x1080][AVC][简日]哆啦A梦归来了",
	);
	assert!(doraemon.contains(&("DORAEMON1979".to_string(), "B-TITLE".to_string())));
	assert!(doraemon.contains(&("WEB".to_string(), "B-SOURCE".to_string())));
	assert!(!doraemon.contains(&("WEB".to_string(), "B-TITLE".to_string())));

	let devilman = labels_for("[DBD-Raws][恶魔人][1972版][01][1080P][BDRip][HEVC-10bit][FLAC]");
	assert!(devilman.contains(&("恶魔人".to_string(), "B-TITLE".to_string())));
	assert!(!devilman.contains(&("1972版".to_string(), "B-TITLE".to_string())));

	let classroom = labels_for("[Dymy][Assassination Classroom (2016)][01][BIG5][1280X720]");
	assert!(classroom.contains(&("(".to_string(), "B-TITLE".to_string())));
	assert!(classroom.contains(&(")".to_string(), "B-TITLE".to_string())));
	assert!(!classroom.contains(&("]".to_string(), "B-TITLE".to_string())));

	let bang_season =
	labels_for("[LoliHouse] Bang Dream! 2nd Season - 01 [BDRip 1080p HEVC-10bit FLAC]");
	assert!(bang_season.contains(&("Bang".to_string(), "B-TITLE".to_string())));
	assert!(bang_season.contains(&("2nd".to_string(), "B-SEASON".to_string())));
	assert!(bang_season.contains(&("Season".to_string(), "B-SEASON".to_string())));
	assert!(bang_season.contains(&("01".to_string(), "B-EPISODE".to_string())));
	assert!(!bang_season.contains(&("01".to_string(), "B-SEASON".to_string())));

	let basket = labels_for(
	"[Nekomoe kissaten&VCB-Studio] Fruits Basket 1st Season [24][1080p][x264_aac][sc]",
	);
	assert!(basket.contains(&("Fruits".to_string(), "B-TITLE".to_string())));
	assert!(basket.contains(&("1st".to_string(), "B-SEASON".to_string())));
	assert!(basket.contains(&("Season".to_string(), "B-SEASON".to_string())));
	assert!(basket.contains(&("24".to_string(), "B-EPISODE".to_string())));
	assert!(!basket.contains(&("24".to_string(), "B-SEASON".to_string())));

	let notice = labels_for("[KTXP][Zankyou_no_Terror][08_Notice][GB_BIG5][X264_AAC][720p]");
	assert!(notice.contains(&("Zankyou".to_string(), "B-TITLE".to_string())));
	assert!(notice.contains(&("08".to_string(), "B-EPISODE".to_string())));
	assert!(!notice.contains(&("08".to_string(), "B-TITLE".to_string())));

	let full = labels_for("[POPGO][Soukyuu_no_Fafner_Exodus][01_Full][GB][720p]");
	assert!(full.contains(&("01".to_string(), "B-EPISODE".to_string())));
	assert!(!full.contains(&("01".to_string(), "B-TITLE".to_string())));

	let r18 =
	labels_for("[HYSUB]Skirt no Naka wa Kedamono Deshita.[01_R18][BIG5_MP4][1280X720]");
	assert!(r18.contains(&("01".to_string(), "B-EPISODE".to_string())));
	assert!(!r18.contains(&("01".to_string(), "B-TITLE".to_string())));

	let ddp = labels_for("Akuma.Kun.S01E02.1080p.NF.WEB-DL.DDP5.1.H.264");
	assert!(ddp.contains(&("02".to_string(), "B-EPISODE".to_string())));
	assert!(!ddp.contains(&("1".to_string(), "B-EPISODE".to_string())));
	assert!(ddp
	.iter()
	.any(\|(token, label)\| token.starts_with("DDP") && label == "B-SOURCE"));

	let aac_space = labels_for("Bleach S01E02 AAC 2.0 H.264");
	assert!(aac_space.contains(&("02".to_string(), "B-EPISODE".to_string())));
	assert!(!aac_space.contains(&("2".to_string(), "B-EPISODE".to_string())));
	assert!(aac_space
	.iter()
	.any(\|(token, label)\| token.starts_with("AAC") && label == "B-SOURCE"));

	let bare_resolution = labels_for("日本桥15.03.30 720");
	assert!(bare_resolution.contains(&("日本桥".to_string(), "B-TITLE".to_string())));
	assert!(bare_resolution.contains(&("720".to_string(), "B-RESOLUTION".to_string())));
	assert!(!bare_resolution.contains(&("720".to_string(), "B-EPISODE".to_string())));

	let air_episode = labels_for("Air 01");
	assert!(air_episode.contains(&("Air".to_string(), "B-TITLE".to_string())));
	assert!(air_episode.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let decimal_episode =
	labels_for("[HoneyGod] Usagi Drop [02.5][x264_10bit][粤日双语][BDrip_1080p]");
	assert!(decimal_episode.contains(&("02".to_string(), "B-EPISODE".to_string())));
	assert!(decimal_episode.contains(&(".".to_string(), "B-EPISODE".to_string())));
	assert!(decimal_episode.contains(&("5".to_string(), "B-EPISODE".to_string())));

	let _ = RUNTIME_WHITELISTS.set(Whitelists {
	title_phrases: Vec::new(),
	group_names: [
	"LowPower-Raws".to_string(),
	"ANi".to_string(),
	"LoliHouse".to_string(),
	"QTS".to_string(),
	]
	.into_iter()
	.collect(),
	});
	let lowpower = labels_for("[LowPower-Raws] 91 Days - 01 (BD 720P x264 10bit AAC)");
	assert!(lowpower.contains(&("LowPower".to_string(), "B-GROUP".to_string())));
	assert!(lowpower.contains(&("91".to_string(), "B-TITLE".to_string())));
	assert!(lowpower.contains(&("Days".to_string(), "B-TITLE".to_string())));
	assert!(lowpower.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let ririsa = labels_for("[ANi] 2.5 次元的誘惑 - 01 [1080P][Baha][WEB-DL][AAC AVC][CHT]");
	assert!(ririsa.contains(&("2".to_string(), "B-TITLE".to_string())));
	assert!(ririsa.contains(&(".".to_string(), "B-TITLE".to_string())));
	assert!(ririsa.contains(&("5".to_string(), "B-TITLE".to_string())));
	assert!(ririsa.contains(&("次元的誘惑".to_string(), "B-TITLE".to_string())));
	assert!(ririsa.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let nanabun = labels_for("[LoliHouse] 22-7 - 01 [WebRip 1080p HEVC-10bit AAC ASS]");
	assert!(nanabun.contains(&("22".to_string(), "B-TITLE".to_string())));
	assert!(nanabun.contains(&("-".to_string(), "B-TITLE".to_string())));
	assert!(nanabun.contains(&("7".to_string(), "B-TITLE".to_string())));
	assert!(nanabun.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let saint = labels_for("[QTS] OVA Saint Seiya The Lost Canvas Meiou Shinwa ep 01 (BD H264 1920x1080 24fps FLAC)");
	assert!(saint.contains(&("OVA".to_string(), "B-SPECIAL".to_string())));
	assert!(saint.contains(&("Saint".to_string(), "B-TITLE".to_string())));
	assert!(saint.contains(&("Seiya".to_string(), "B-TITLE".to_string())));
	assert!(saint.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let gundam = labels_for("機動戦士ガンダム00 セカンドシーズン／Ep.01 「# 天使再臨」");
	assert!(gundam.contains(&("機動戦士ガンダム".to_string(), "B-TITLE".to_string())));
	assert!(gundam.contains(&("00".to_string(), "B-TITLE".to_string())));
	assert!(gundam.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let spy =
	labels_for("[Studio GreenTea] Spy x Family [38][WebRip][HEVC-10bit 1080p AAC ASSx2]");
	assert!(spy.contains(&("Studio".to_string(), "B-GROUP".to_string())));
	assert!(spy.contains(&("Spy".to_string(), "B-TITLE".to_string())));
	assert!(spy.contains(&("x".to_string(), "B-TITLE".to_string())));
	assert!(spy.contains(&("Family".to_string(), "B-TITLE".to_string())));
	assert!(spy.contains(&("38".to_string(), "B-EPISODE".to_string())));
	assert!(!spy.contains(&("Spy".to_string(), "B-SPECIAL".to_string())));

	let spy_s3 = labels_for(
	"[Feibanyama] SPY x FAMILY S3 - 01 [IQIYI WebRip 2160p HEVC-10bit OPUS Multi-Subs]",
	);
	assert!(spy_s3.contains(&("Feibanyama".to_string(), "B-GROUP".to_string())));
	assert!(spy_s3.contains(&("SPY".to_string(), "B-TITLE".to_string())));
	assert!(spy_s3.contains(&("FAMILY".to_string(), "B-TITLE".to_string())));
	assert!(spy_s3.contains(&("3".to_string(), "B-SEASON".to_string())));
	assert!(spy_s3.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let slime =
	labels_for("[Nekomoe kissaten&VCB-Studio] Slime 300 [Menu01][Ma10p_1080p][x265_flac]");
	assert!(slime.contains(&("Slime".to_string(), "B-TITLE".to_string())));
	assert!(
	slime.contains(&("300".to_string(), "B-TITLE".to_string())),
	"{slime:?}"
	);
	assert!(!slime.contains(&("300".to_string(), "B-EPISODE".to_string())));

	let kamisama =
	labels_for("[SFEO-Raws] Kamisama Hajimemashita 2 - 01 (BD 720P x264 10bit AAC)");
	assert!(kamisama.contains(&("Kamisama".to_string(), "B-TITLE".to_string())));
	assert!(kamisama.contains(&("2".to_string(), "B-TITLE".to_string())));
	assert!(kamisama.contains(&("01".to_string(), "B-EPISODE".to_string())));
	}

	#[test]
	fn updated_python_alignment_regressions() {
	let original = "The New Woody Woodpecker Show (Season 1-4) (1999-2002) WEB-DL 720p [Hurtom]/Season 4/E07 - The New Woody Woodpecker Show (Season 1-4) (1999-2002) WEB-DL 720p";
	let (trimmed, was_trimmed) = training_filename_for(original);
	assert!(was_trimmed);
	assert_eq!(
	trimmed,
	"Season 4 E07 - The New Woody Woodpecker Show (Season 1-4) (1999-2002) WEB-DL 720p"
	);
	let pokemon = "Pokémon Season 2 - Orange League [Ep. 83-118]/Pokemon - 084 - OL002 - A Scare in the Air [DVD][PM-Dragon-x264-AC3][00270AC8]";
	let (trimmed_pokemon, pokemon_was_trimmed) = training_filename_for(pokemon);
	assert!(pokemon_was_trimmed);
	assert_eq!(
	trimmed_pokemon,
	"Pokemon - 084 - OL002 - A Scare in the Air [DVD][PM-Dragon-x264-AC3][00270AC8]"
	);
	let woody = labels_for(&trimmed);
	assert!(woody.contains(&("4".to_string(), "B-SEASON".to_string())));
	assert!(woody.contains(&("E".to_string(), "O".to_string())));
	assert!(woody.contains(&("07".to_string(), "B-EPISODE".to_string())));
	assert!(woody.contains(&("The".to_string(), "B-TITLE".to_string())));
	assert!(woody.contains(&("Show".to_string(), "B-TITLE".to_string())));
	assert!(!woody.contains(&("1999".to_string(), "B-EPISODE".to_string())));

	let group = labels_for("[DBD-Raws][Title][01][1080P]");
	assert!(group.contains(&("-".to_string(), "B-GROUP".to_string())));
	let amp_group = labels_for("[SumiSora&CASO][Title][01][1080P]");
	assert!(amp_group.contains(&("&".to_string(), "B-GROUP".to_string())));

	let cjk_season =
	labels_for("[DBD-Raws][魔道祖师第一季][08][1080P][BDRip][HEVC-10bit][FLAC]");
	assert!(cjk_season.contains(&("魔道祖师".to_string(), "B-TITLE".to_string())));
	assert!(cjk_season.contains(&("第一季".to_string(), "B-SEASON".to_string())));
	assert!(!cjk_season.contains(&("第一季".to_string(), "B-TITLE".to_string())));

	let (trimmed, was_trimmed) =
	training_filename_for("12/小剧场/[LKSUB][KAGE-JITSU!][01][GB][720P]");
	assert!(was_trimmed);
	assert_eq!(trimmed, "[LKSUB][KAGE-JITSU!][01][GB][720P]");
	let (key, _, _, _) = template_key_for_filename(&trimmed);
	assert_eq!(
	key,
	"BRACKET_TEXT BRACKET_TEXT BRACKET_EPISODE BRACKET_LANG BRACKET_RESOLUTION"
	);

	let short = labels_for("[Snow-Raws] R-15 CM&PV12 (BD 1920x1080 HEVC-YUV420P10 FLAC)");
	assert!(short.contains(&("R".to_string(), "B-TITLE".to_string())));
	assert!(short.contains(&("-".to_string(), "B-TITLE".to_string())));
	assert!(short.contains(&("15".to_string(), "B-TITLE".to_string())));
	assert!(!short.contains(&("15".to_string(), "B-EPISODE".to_string())));

	let short_before_episode =
	labels_for("[Snow-Raws] R-15 第01話 (BD 1920x1080 HEVC-YUV420P10 FLAC)");
	assert!(short_before_episode.contains(&("R".to_string(), "B-TITLE".to_string())));
	assert!(short_before_episode.contains(&("-".to_string(), "B-TITLE".to_string())));
	assert!(short_before_episode.contains(&("15".to_string(), "B-TITLE".to_string())));
	assert!(short_before_episode.contains(&("01".to_string(), "B-EPISODE".to_string())));
	assert!(!short_before_episode.contains(&("15".to_string(), "B-EPISODE".to_string())));

	let avatar = "Avatar The Last Airbender S2/Avatar The Last Airbender S2 14 [1080p]";
	let (trimmed, was_trimmed) = training_filename_for(avatar);
	assert!(was_trimmed);
	assert_eq!(trimmed, "Avatar The Last Airbender S2 14 [1080p]");

	let plain_season_dir =
	"Season 1/[Kamigami] Junjou Romantica 1 - 01 [BD 1280x720 x264 AAC Sub(Chs,Jap)]";
	let (trimmed, was_trimmed) = training_filename_for(plain_season_dir);
	assert!(was_trimmed);
	assert_eq!(
	trimmed,
	"Season 1 [Kamigami] Junjou Romantica 1 - 01 [BD 1280x720 x264 AAC Sub(Chs,Jap)]"
	);
	let plain_season_labels = labels_for(&trimmed);
	assert!(plain_season_labels.contains(&("1".to_string(), "B-SEASON".to_string())));
	assert!(plain_season_labels.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let menu_parent =
	"[Airota&ANK-Raws] 亜人ちゃんは語りたい (BDrip 1920x1080 HEVC-YUV420P10 FLAC SUP)/Menu (Vol.1)";
	let (trimmed, was_trimmed) = training_filename_for(menu_parent);
	assert!(was_trimmed);
	assert_eq!(
	trimmed,
	"[Airota&ANK-Raws] 亜人ちゃんは語りたい Menu (Vol.1)"
	);

	assert!(has_encoding_noise(
	"[4K_SDR][DBD-Raws&HKG瀛楀箷绲刔[鏃ュ湪鏍″湌][01][2160P]"
	));
	assert!(has_encoding_noise(
	"ATRI -My Dear Moments-/娆″洖浜堝憡 EP01 Log01"
	));
	assert!(has_encoding_noise(
	"[2002-2003] Mew Mew_鏉变含鍠靛柕(鏉变含銉熴儱銈︺儫銉ャ偊)_TV"
	));
	assert!(has_encoding_noise("[DAY][Megami no Caf茅 Terrace][01]"));
	assert!(has_encoding_noise(
	"[4K_SDR][DBD-Raws][瀵掕潐楦ｆ常涔嬫椂涓歖[NCED1]"
	));
	assert!(has_non_anime_noise(
	"13-[旅游番][花丸字幕组][日本不思议铁路之旅][15.03.19-16.02.03][720&1080][中日双语]/铁道旅 15.03.19 720"
	));

	let tintin = "Adventures of Tintin (1991) Season 1-3 S01-S03 (1080p BluRay x265 HEVC 10bit EAC3 2.0 Garshasp)/Season 1/Adventures of Tintin (1991) - S01E06 - Cigars of the Pharaoh (Part 1) (1080p BluRay x265 Garshasp)";
	let (trimmed, was_trimmed) = training_filename_for(tintin);
	assert!(was_trimmed);
	assert_eq!(
	trimmed,
	"Adventures of Tintin (1991) - S01E06 - Cigars of the Pharaoh (Part 1) (1080p BluRay x265 Garshasp)"
	);
	let (key, _, _, _) = template_key_for_filename(&trimmed);
	assert_eq!(
	key,
	"TEXT SEP TEXT SEP TEXT SEP BRACKET_DATE SEP SXE SEP TEXT SEP TEXT SEP TEXT SEP TEXT SEP BRACKET_TEXT SEP BRACKET_TEXT"
	);

	let bocchi = "Bocchi the Rock S01 孤獨搖滾！第一季 [Taiwanese Hokkien Dub][臺灣閩南語配音]/Bocchi the Rock S01 孤獨搖滾！第一季 [Taiwanese Hokkien Dub][Hàn-jī Hardsub][臺灣閩南語配音][漢字字幕]/Bocchi the Rock! 孤獨搖滾！S01E01「孤獨反輾轉」";
	let (leaf_key, _, _, _) =
	template_key_for_filename("Bocchi the Rock! 孤獨搖滾！S01E01「孤獨反輾轉」");
	assert_eq!(leaf_key, "TEXT SEP TEXT SEP TEXT SEP TEXT SXE TEXT");
	assert!(filename_has_title(
	"Bocchi the Rock! 孤獨搖滾！S01E01「孤獨反輾轉」"
	));
	let (trimmed, was_trimmed) = training_filename_for(bocchi);
	assert!(was_trimmed);
	assert_eq!(trimmed, "Bocchi the Rock! 孤獨搖滾！S01E01「孤獨反輾轉」");
	let (key, _, _, _) = template_key_for_filename(&trimmed);
	assert_eq!(key, "TEXT SEP TEXT SEP TEXT SEP TEXT SXE TEXT");

	let usagi = "Gochuumon wa Usagi Desuka-60fps/Gochuumon wa Usagi Desuka S1/Usagi S1[01][60fps][8bit_1080p][x265_flac]";
	let (trimmed, was_trimmed) = training_filename_for(usagi);
	assert!(was_trimmed);
	assert_eq!(trimmed, "Usagi S1[01][60fps][8bit_1080p][x265_flac]");
	let (key, _, _, _) = template_key_for_filename(&trimmed);
	assert_eq!(
	key,
	"TEXT SEP SEASON BRACKET_EPISODE BRACKET_TEXT BRACKET_MEDIA_BLOCK BRACKET_MEDIA"
	);

	let woody_parent =
	"Season 4/E07 - The New Woody Woodpecker Show (Season 1-4) (1999-2002) WEB-DL 720p";
	let (trimmed, was_trimmed) = training_filename_for(&format!("Batch/{woody_parent}"));
	assert!(was_trimmed);
	assert_eq!(
	trimmed,
	"Season 4 E07 - The New Woody Woodpecker Show (Season 1-4) (1999-2002) WEB-DL 720p"
	);

	let najica =
	"[2001] Najica_七虹香電擊作戰(ナジカ電撃作戦)_TV/SourceUnknown.RMVB.640x480.twHard/01";
	let (trimmed, was_trimmed) = training_filename_for(najica);
	assert!(was_trimmed);
	assert_eq!(trimmed, "[2001] Najica_七虹香電擊作戰(ナジカ電撃作戦) 01");
	let najica_labels = labels_for(&trimmed);
	assert!(najica_labels.contains(&("Najica".to_string(), "B-TITLE".to_string())));
	assert!(!najica_labels.contains(&("SourceUnknown".to_string(), "B-TITLE".to_string())));
	assert!(najica_labels.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let galient = "[1984-1986] Galient_機甲界(機甲界ガリアン)_TV.OVA/[1984-1985] Galient_機甲界(機甲界ガリアン)_TV/DVDRip.MKV.720x480.ruSub.左右黑邊保留/01";
	let (trimmed, was_trimmed) = training_filename_for(galient);
	assert!(was_trimmed);
	assert_eq!(trimmed, "[1984-1985] Galient_機甲界(機甲界ガリアン) 01");
	let galient_labels = labels_for(&trimmed);
	assert!(galient_labels.contains(&("Galient".to_string(), "B-TITLE".to_string())));
	assert!(!galient_labels.contains(&("TV".to_string(), "B-TITLE".to_string())));
	assert!(galient_labels.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let nced = "[BDrip] Ao no Exorcist Yuki no Hate Hen S04 [343-Labs]/NCED";
	let (trimmed, was_trimmed) = training_filename_for(nced);
	assert!(was_trimmed);
	assert_eq!(
	trimmed,
	"[BDrip] Ao no Exorcist Yuki no Hate Hen S04 [343-Labs] NCED"
	);

	let sakura =
	"Card Captor Sakura Chinese/魔卡少女樱(台配国语)/第01集小樱与不可思议的魔法书";
	let (trimmed, was_trimmed) = training_filename_for(sakura);
	assert!(was_trimmed);
	assert_eq!(
	trimmed,
	"魔卡少女樱(台配国语) 第01集小樱与不可思议的魔法书"
	);
	let sakura_labels = labels_for(&trimmed);
	assert!(sakura_labels.contains(&("魔卡少女樱".to_string(), "B-TITLE".to_string())));
	assert!(sakura_labels.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let volume =
	labels_for("[Snow-Raws] 生徒会役員共 Vol.01 MENU02 (BD 1920x1080 HEVC-YUV420P10 FLAC)");
	assert!(volume.contains(&("生徒会役員共".to_string(), "B-TITLE".to_string())));
	assert!(volume.contains(&("Vol".to_string(), "B-SPECIAL".to_string())));
	assert!(volume.contains(&("01".to_string(), "B-SPECIAL".to_string())));
	assert!(volume.contains(&("MENU02".to_string(), "B-SPECIAL".to_string())));
	assert!(!volume.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let aria_notice = labels_for(
	"[KNA-Subs&ANK-Raws] 緋弾のアリアAA 番宣1 (BDrip 1920x1080 HEVC-YUV420P10 FLAC)",
	);
	assert!(aria_notice.contains(&("緋弾のアリア".to_string(), "B-TITLE".to_string())));
	assert!(aria_notice.contains(&("番宣".to_string(), "B-SPECIAL".to_string())));
	assert!(aria_notice.contains(&("1".to_string(), "B-SPECIAL".to_string())));
	assert!(!aria_notice.contains(&("1".to_string(), "B-EPISODE".to_string())));

	let lost_song =
	labels_for("[Snow-Raws] LOST SONG CM&PV 01(BD 1920x1080 HEVC-YUV420P10 FLAC)");
	assert!(lost_song.contains(&("LOST".to_string(), "B-TITLE".to_string())));
	assert!(lost_song.contains(&("CM".to_string(), "B-SPECIAL".to_string())));
	assert!(lost_song.contains(&("PV".to_string(), "B-SPECIAL".to_string())));
	assert!(lost_song.contains(&("01".to_string(), "B-SPECIAL".to_string())));
	assert!(!lost_song.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let numeric_title =
	labels_for("3000.Leagues.in.Search.of.Mother.S01E01.1080p.WEB-DL.H.264-D00oo00M");
	assert!(numeric_title.contains(&("3000".to_string(), "B-TITLE".to_string())));
	assert!(numeric_title.contains(&("01".to_string(), "B-SEASON".to_string())));
	assert!(numeric_title.contains(&("01".to_string(), "B-EPISODE".to_string())));
	assert!(numeric_title.contains(&("1080p".to_string(), "B-RESOLUTION".to_string())));
	assert!(numeric_title.contains(&("H".to_string(), "B-SOURCE".to_string())));
	assert!(numeric_title.contains(&("264".to_string(), "B-SOURCE".to_string())));
	assert!(!numeric_title.contains(&("264".to_string(), "B-EPISODE".to_string())));

	let media_block =
	labels_for("[Kamigami] Kantai Collection - 06v2 [1920×1080 x264 AAC Sub(Chs,Cht,Jap)]");
	assert!(media_block.contains(&("1920".to_string(), "B-RESOLUTION".to_string())));
	assert!(media_block.contains(&("1080".to_string(), "B-RESOLUTION".to_string())));
	assert!(media_block.contains(&("x264".to_string(), "B-SOURCE".to_string())));
	assert!(media_block.contains(&("Chs".to_string(), "B-SOURCE".to_string())));

	let ge999 = labels_for("GE999 第024話「次元航海惑星」1979年02月22日 (720x540 x264 AAC2)");
	assert!(ge999.contains(&("GE999".to_string(), "B-TITLE".to_string())));
	assert!(ge999.contains(&("024".to_string(), "B-EPISODE".to_string())));
	assert!(!ge999.contains(&("22".to_string(), "B-EPISODE".to_string())));

	let galaxy = labels_for("銀河鉄道999 第024話「次元航海惑星」 (DVD 640x480 WMV9)");
	assert!(galaxy.contains(&("銀河鉄道".to_string(), "B-TITLE".to_string())));
	assert!(galaxy.contains(&("999".to_string(), "B-TITLE".to_string())));
	assert!(galaxy.contains(&("024".to_string(), "B-EPISODE".to_string())));

	let mahoro = labels_for("[POPGO][FREEWIND][Mahoro_Matic][Full_HD-BDRIP][01]");
	assert!(mahoro.contains(&("Mahoro".to_string(), "B-TITLE".to_string())));
	assert!(!mahoro.contains(&("Full".to_string(), "B-TITLE".to_string())));
	assert!(mahoro.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let kitaro = labels_for(
	"[1985.10-1988.02] Kitaro_鬼太郎第3期(ゲゲゲの鬼太郎)_TV 036 異次元妖怪かまなり",
	);
	assert!(kitaro.contains(&("Kitaro".to_string(), "B-TITLE".to_string())));
	assert!(kitaro.contains(&("3".to_string(), "B-SEASON".to_string())));
	assert!(kitaro.contains(&("036".to_string(), "B-EPISODE".to_string())));
	assert!(!kitaro.contains(&("1985".to_string(), "B-EPISODE".to_string())));

	let urusei = labels_for("Urusei_Yatsura_DVD_Ep042.5_Simu");
	assert!(urusei.contains(&("Urusei".to_string(), "B-TITLE".to_string())));
	assert!(urusei.contains(&("042".to_string(), "B-EPISODE".to_string())));
	assert!(urusei.contains(&(".".to_string(), "B-EPISODE".to_string())));
	assert!(urusei.contains(&("5".to_string(), "B-EPISODE".to_string())));

	let lupin =
	labels_for("[Lupin The Thrid Jigen Daisuke no Bohyou][Logo][BDRIP][1080P][H264_FLAC]");
	assert!(lupin.contains(&("Lupin".to_string(), "B-TITLE".to_string())));
	assert!(!lupin.contains(&("Lupin".to_string(), "B-GROUP".to_string())));

	let mirumo = labels_for("【咪路fans】魔法咪路咪路第二季日语版 01[GB][MP4]");
	assert!(mirumo.contains(&("魔法咪路咪路".to_string(), "B-TITLE".to_string())));
	assert!(mirumo.contains(&("第二季".to_string(), "B-SEASON".to_string())));
	assert!(mirumo.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let doremi_bonus = labels_for(
	"おジャ魔女どれみナ・イ・ショ特典映像07「おジャ魔女どれみナ・イ・ショエンドテロップ集｣(DVD 640x480 )",
	);
	assert!(doremi_bonus.contains(&("おジャ魔女どれみナ".to_string(), "B-TITLE".to_string())));
	assert!(doremi_bonus.contains(&("07".to_string(), "B-SPECIAL".to_string())));
	assert!(!doremi_bonus.contains(&("07".to_string(), "B-EPISODE".to_string())));

	let bd_menu =
	labels_for("[HYSUB]Kuusen Madoushi Kouhosei no Kyoukan[BDMenu][01v1][MP4][1280X720]");
	assert!(bd_menu.contains(&("BDMenu".to_string(), "B-SPECIAL".to_string())));
	assert!(bd_menu.contains(&("01v1".to_string(), "B-SPECIAL".to_string())));
	assert!(!bd_menu.contains(&("BDMenu".to_string(), "B-TITLE".to_string())));

	let ura_on = labels_for("K-ON !! (TV S2 2010). URA-ON !! 01; 1080_h264_flac");
	assert!(ura_on.contains(&("K".to_string(), "B-TITLE".to_string())));
	assert!(ura_on.contains(&("01".to_string(), "B-EPISODE".to_string())));
	assert!(ura_on.contains(&("1080".to_string(), "B-RESOLUTION".to_string())));
	assert!(!ura_on.contains(&("1080".to_string(), "B-EPISODE".to_string())));

	let machikado = labels_for("[KTXP][Machikado_Mazoku_S2][Mini][01][GB][1080p][BDrip][HEVC]");
	assert!(machikado.contains(&("Machikado".to_string(), "B-TITLE".to_string())));
	assert!(machikado.contains(&("S2".to_string(), "B-SEASON".to_string())));
	assert!(machikado.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let ronin = labels_for("【蓝色狂想】魔神坛斗士国日双语第01集");
	assert!(ronin.contains(&("魔神坛斗士".to_string(), "B-TITLE".to_string())));
	assert!(ronin.contains(&("国日双语".to_string(), "B-SOURCE".to_string())));
	assert!(ronin.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let ghiblies = labels_for("Ghiblies - Episode 2 op");
	assert!(ghiblies.contains(&("Ghiblies".to_string(), "B-TITLE".to_string())));
	assert!(ghiblies.contains(&("2".to_string(), "B-TITLE".to_string())));
	assert!(!ghiblies.contains(&("2".to_string(), "B-EPISODE".to_string())));

	let tv_spot =
	labels_for("[RUELL-Next] Fruits Basket TV Spot 1 (DVD 768x576 x264 AAC) [49531416]");
	assert!(tv_spot.contains(&("TV".to_string(), "B-SPECIAL".to_string())));
	assert!(tv_spot.contains(&("1".to_string(), "B-SPECIAL".to_string())));
	assert!(!tv_spot.contains(&("1".to_string(), "B-EPISODE".to_string())));

	let preview_seconds =
	labels_for("[DVD] 鋼鉄天使くるみ予告第03話 30秒バージョン (640x480 WMV9)");
	assert!(preview_seconds.contains(&("03".to_string(), "B-EPISODE".to_string())));
	assert!(!preview_seconds.contains(&("30".to_string(), "B-EPISODE".to_string())));

	let hi10_source =
	labels_for("[POPGO][Shigatsu wa Kimi no Uso] [01][Hi10][720P][GB][A964DA24]");
	assert!(hi10_source.contains(&("Hi10".to_string(), "B-SOURCE".to_string())));
	assert!(!hi10_source.contains(&("Hi10".to_string(), "B-GROUP".to_string())));

	let souten = labels_for(
	"[苍天之拳].[Fosky_Fansub][Souten_No_Ken][DVDRIP][01][H.264_FLAC][848x480][CDD495FC]",
	);
	assert!(souten.contains(&("Fosky".to_string(), "B-GROUP".to_string())));
	assert!(!souten.contains(&("苍天之拳".to_string(), "B-GROUP".to_string())));
	assert!(souten.contains(&("Souten".to_string(), "B-TITLE".to_string())));

	let bonjour = labels_for(
	"(2014Q4) Bonjour♪恋味パティスリー第01話「Lesson 1」 (1280x720 x265 10bit AAC)",
	);
	assert!(bonjour.contains(&("01".to_string(), "B-EPISODE".to_string())));
	assert!(!bonjour.contains(&("1".to_string(), "B-EPISODE".to_string())));

	let durarara =
	labels_for("[VCB-Studio] Durarara!!×2 Ketsu [Menu01][Ma10p_1080p][x265_flac]");
	assert!(durarara.contains(&("Durarara".to_string(), "B-TITLE".to_string())));
	assert!(durarara.contains(&("2".to_string(), "B-TITLE".to_string())));
	assert!(!durarara.contains(&("2".to_string(), "B-EPISODE".to_string())));

	let bd_spot =
	labels_for("[Moozzi2] Amanchu! [SP05] BD-Spot - 01 (BD 1920x1080 x.264 Flac)");
	assert!(bd_spot.contains(&("Spot".to_string(), "B-SPECIAL".to_string())));
	assert!(bd_spot.contains(&("01".to_string(), "B-SPECIAL".to_string())));
	assert!(!bd_spot.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let preview_number =
	labels_for("[Snow-Raws] 刀使ノ巫女第02話予告01 (BD 1920x1080 HEVC-YUV420P10 FLAC)");
	assert!(preview_number.contains(&("02".to_string(), "B-EPISODE".to_string())));
	assert!(preview_number.contains(&("01".to_string(), "B-SPECIAL".to_string())));

	let bleach_movie = labels_for("Bleach the Movie 3 - Fade to Black, I Call Your Name");
	assert!(bleach_movie.contains(&("3".to_string(), "B-TITLE".to_string())));
	assert!(!bleach_movie.contains(&("3".to_string(), "B-EPISODE".to_string())));

	let conan_movie = labels_for(
	"[DBD-Raws][Detective Conan Movie 27 The Million-Dollar Pentagram][PV][01][1080P]",
	);
	assert!(conan_movie.contains(&("27".to_string(), "B-TITLE".to_string())));
	assert!(conan_movie.contains(&("PV".to_string(), "B-SPECIAL".to_string())));

	let madoka_movie = labels_for(
	"[DBD-Raws][Puella Magi Madoka Magica the Movie 01 Beginnings][NCED][1080P]",
	);
	assert!(madoka_movie.contains(&("01".to_string(), "B-TITLE".to_string())));
	assert!(madoka_movie.contains(&("Beginnings".to_string(), "B-TITLE".to_string())));

	let fate_first_order =
	labels_for("[DBD-Raws][Fate Grand Order ‐First Order‐][PV][01][1080P]");
	assert!(fate_first_order.contains(&("Fate".to_string(), "B-TITLE".to_string())));
	assert!(fate_first_order.contains(&("‐".to_string(), "B-TITLE".to_string())));
	assert!(fate_first_order.contains(&("First".to_string(), "B-TITLE".to_string())));

	let trillion_game = labels_for("[ANi] 一兆＄遊戲 - 03 [1080P][Baha][WEB-DL][AAC AVC][CHT]");
	assert!(trillion_game.contains(&("一兆".to_string(), "B-TITLE".to_string())));
	assert!(trillion_game.contains(&("＄".to_string(), "B-TITLE".to_string())));
	assert!(trillion_game.contains(&("遊戲".to_string(), "B-TITLE".to_string())));

	let lapis = labels_for("[Nekomoe kissaten&LoliHouse] Lapis Re꞉LiGHTs - PV01 [BDRip 1080p]");
	assert!(lapis.contains(&("Re".to_string(), "B-TITLE".to_string())));
	assert!(lapis.contains(&("꞉".to_string(), "B-TITLE".to_string())));
	assert!(lapis.contains(&("LiGHTs".to_string(), "B-TITLE".to_string())));

	let rezero =
	labels_for("TVアニメ『Re：ゼロから始める異世界生活』第10話「鬼がかったやり方」予告");
	assert!(!rezero.contains(&("TV".to_string(), "B-TITLE".to_string())));
	assert!(!rezero.contains(&("アニメ".to_string(), "B-TITLE".to_string())));
	assert!(rezero.contains(&("Re".to_string(), "B-TITLE".to_string())));
	assert!(rezero.contains(&("第".to_string(), "B-EPISODE".to_string())));
	assert!(rezero.contains(&("話".to_string(), "B-EPISODE".to_string())));

	let shark = labels_for("アニメ『おでかけ子ザメ』第10話「かじゅえん」");
	assert!(!shark.contains(&("アニメ".to_string(), "B-TITLE".to_string())));
	assert!(shark.contains(&("おでかけ子ザメ".to_string(), "B-TITLE".to_string())));

	let creditless =
	labels_for("[ANK-Raws] デート・ア・ライブⅡ Creditless ED (Bdrip 1920x1080 HEVC FLAC)");
	assert!(creditless.contains(&("Creditless".to_string(), "B-SPECIAL".to_string())));
	assert!(creditless.contains(&("ED".to_string(), "B-SPECIAL".to_string())));

	let no_number = labels_for("[甜甜圈字幕组] 小讨厌 081「爷爷的礼物 No.1」");
	assert!(no_number.contains(&("081".to_string(), "B-EPISODE".to_string())));
	assert!(!no_number.contains(&("1".to_string(), "B-EPISODE".to_string())));

	let bilingual = labels_for(
	"辉夜大小姐想让我告白~天才们的恋爱头脑战~.S2-01.中日双语.云光字幕组.[1080p]",
	);
	assert!(bilingual.contains(&("中日".to_string(), "B-SOURCE".to_string())));
	assert!(!bilingual.contains(&("中日".to_string(), "B-TITLE".to_string())));

	let nekomoe_lang = labels_for("[Nekomoe kissaten][UniteUp!][05][720p][JPTC]");
	assert!(nekomoe_lang.contains(&("JPTC".to_string(), "B-SOURCE".to_string())));
	assert!(!nekomoe_lang.contains(&("JPTC".to_string(), "B-TITLE".to_string())));

	let hayate =
	labels_for("[漏勺rip][Hayate_the_combat_butler_2nd_Season][23][BDrip X264 AAC 720P]");
	assert!(hayate.contains(&("Hayate".to_string(), "B-TITLE".to_string())));
	assert!(hayate.contains(&("2nd".to_string(), "B-SEASON".to_string())));
	assert!(hayate.contains(&("Season".to_string(), "B-SEASON".to_string())));
	assert!(hayate.contains(&("23".to_string(), "B-EPISODE".to_string())));

	let yama = labels_for("[A.I.R.nesSub][Yama_no_Susume_Second_Season][08][720p]");
	assert!(yama.contains(&("Yama".to_string(), "B-TITLE".to_string())));
	assert!(yama.contains(&("Second".to_string(), "B-SEASON".to_string())));
	assert!(yama.contains(&("Season".to_string(), "B-SEASON".to_string())));

	let one_room = labels_for("[DMG][One Room Second Season][00][1080P][BIG5]");
	assert!(one_room.contains(&("One".to_string(), "B-TITLE".to_string())));
	assert!(one_room.contains(&("Second".to_string(), "B-SEASON".to_string())));
	assert!(one_room.contains(&("Season".to_string(), "B-SEASON".to_string())));

	let jade =
	labels_for("[GM-Team][国漫][诛仙第2季][Jade Dynasty Ⅱ][2024][12][AVC][GB][1080P]");
	assert!(jade.contains(&("Jade".to_string(), "B-TITLE".to_string())));
	assert!(jade.contains(&("Dynasty".to_string(), "B-TITLE".to_string())));
	assert!(jade.contains(&("Ⅱ".to_string(), "B-SEASON".to_string())));
	assert!(jade.contains(&("12".to_string(), "B-EPISODE".to_string())));

	let yu_no = labels_for(
	"[JYFanSub][Kono_Yo_no_Hate_de_Koi_wo_Utau_Shoujo_YU-NO][23][BIG5][720P][AVC]",
	);
	assert!(yu_no.contains(&("NO".to_string(), "B-TITLE".to_string())));
	assert!(yu_no.contains(&("23".to_string(), "B-EPISODE".to_string())));

	let yu_no_dash =
	labels_for("[LowPower-Raws] この世の果てで恋を唄う少女YU-NO - 01 (BD 1080P x264 FLAC)");
	assert!(yu_no_dash.contains(&("NO".to_string(), "B-TITLE".to_string())));
	assert!(yu_no_dash.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let fox = labels_for(
	"[GM-Team][国漫][狐妖小红娘尾生篇][Fox Spirit Matchmaker Ⅷ][2019][05][AVC][GB][1080P]",
	);
	assert!(fox.contains(&("Fox".to_string(), "B-TITLE".to_string())));
	assert!(fox.contains(&("Ⅷ".to_string(), "B-SEASON".to_string())));

	let kage =
	labels_for("[LKSUB][Kage no Jitsuryokusha ni Naritakute! 2nd Season][03][GB][720P]");
	assert!(kage.contains(&("2nd".to_string(), "B-SEASON".to_string())));
	assert!(kage.contains(&(" ".to_string(), "B-SEASON".to_string())));
	assert!(kage.contains(&("Season".to_string(), "B-SEASON".to_string())));

	let tiger = labels_for("[虎面人W][Tiger Mask W][01][简日][720p]");
	assert!(tiger.contains(&("Tiger".to_string(), "B-TITLE".to_string())));
	assert!(tiger.contains(&("W".to_string(), "B-TITLE".to_string())));
	assert!(tiger.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let date_live_special =
	labels_for("[ANK-Raws] デート・ア・ライブⅡ CM01 (BDrip 1920x1080 HEVC-YUV420P10 FLAC)");
	assert!(date_live_special.contains(&("Ⅱ".to_string(), "B-SEASON".to_string())));
	assert!(date_live_special.contains(&("CM01".to_string(), "B-SPECIAL".to_string())));

	let lupin_part = labels_for("[SnowDream][Part 5_Lupin Sansei Part 5][01][BIG5][720P]");
	assert!(lupin_part.contains(&("Lupin".to_string(), "B-TITLE".to_string())));
	assert!(lupin_part.contains(&("Sansei".to_string(), "B-TITLE".to_string())));
	assert!(!lupin_part.contains(&("Part".to_string(), "B-TITLE".to_string())));
	assert!(lupin_part.contains(&("5".to_string(), "B-SEASON".to_string())));
	assert!(!lupin_part.contains(&("5".to_string(), "B-SPECIAL".to_string())));

	let roman_leaf = dmhy_record(
	"Ⅰ 001 魯邦燃起了鬥志",
	"tpl_test",
	&suggested_roles("TEXT SEP EPISODE SEP TEXT"),
	)
	.unwrap();
	assert!(roman_leaf
	.tokens
	.iter()
	.zip(roman_leaf.labels.iter())
	.any(\|(token, label)\| token == "Ⅰ" && label == "B-SEASON"));
	assert!(audit_warnings(&roman_leaf).contains(&"no_title".to_string()));

	let hallow = labels_for("[c.c动漫 ccwzz.cc][驱魔少年HALLOW][第09话][GB][720p]");
	assert!(hallow.contains(&("驱魔少年HALLOW".to_string(), "B-TITLE".to_string())));
	assert!(hallow.contains(&("第09话".to_string(), "B-EPISODE".to_string())));

	let fairy = labels_for("[魔導少年最終章][EP35][繁体][1080P]");
	assert!(fairy.contains(&("魔導少年".to_string(), "B-TITLE".to_string())));
	assert!(fairy.contains(&("EP35".to_string(), "B-EPISODE".to_string())));

	let mebius = labels_for("【CXRAW】【ウルトラマンメビウス】【22】【日々の未来】【DVDrip】【x264 Hi10P AAC】【MP4】");
	assert!(mebius.contains(&("ウルトラマンメビウス".to_string(), "B-TITLE".to_string())));
	assert!(mebius.contains(&("22".to_string(), "B-EPISODE".to_string())));

	let battle = labels_for("斗破苍穹三年之约第01话");
	assert!(battle.contains(&("斗破苍穹三年之约".to_string(), "B-TITLE".to_string())));
	assert!(battle.contains(&("第".to_string(), "B-EPISODE".to_string())));
	assert!(battle.contains(&("01".to_string(), "B-EPISODE".to_string())));
	assert!(battle.contains(&("话".to_string(), "B-EPISODE".to_string())));

	let hakumei = labels_for("妖精森林的小不点01");
	assert!(hakumei.contains(&("妖精森林的小不点".to_string(), "B-TITLE".to_string())));
	assert!(hakumei.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let decimal_episode_title = labels_for("无限系统树：第1话可能性的起点");
	assert!(decimal_episode_title.contains(&("无限系统树".to_string(), "B-TITLE".to_string())));
	assert!(decimal_episode_title.contains(&("第".to_string(), "B-EPISODE".to_string())));
	assert!(decimal_episode_title.contains(&("1".to_string(), "B-EPISODE".to_string())));

	let hash_range = labels_for("花田少年史#1-3");
	assert!(hash_range.contains(&("花田少年史".to_string(), "B-TITLE".to_string())));
	assert!(hash_range.contains(&("1".to_string(), "B-EPISODE".to_string())));
	assert!(hash_range.contains(&("-".to_string(), "B-EPISODE".to_string())));
	assert!(hash_range.contains(&("3".to_string(), "B-EPISODE".to_string())));

	let movie_number = labels_for("[Kamigami] Haikyuu!! Movie - 01 [BD 1080p x265 Ma10p AAC]");
	assert!(movie_number.contains(&("Haikyuu".to_string(), "B-TITLE".to_string())));
	assert!(movie_number.contains(&("01".to_string(), "B-SPECIAL".to_string())));
	assert!(!movie_number.contains(&("01".to_string(), "B-EPISODE".to_string())));

	let ajin_movie = labels_for("[Moozzi2] Ajin The Movie - 01 (BD 1920x1080 x.264 FLACx2)");
	assert!(ajin_movie.contains(&("Ajin".to_string(), "B-TITLE".to_string())));
	assert!(ajin_movie.contains(&("01".to_string(), "B-SPECIAL".to_string())));

	let eien = labels_for(
	"[Nekomoe kissaten&LoliHouse] Eien no 831 [WebRip 1080p HEVC-10bit AAC ASSx2]",
	);
	assert!(eien.contains(&("Eien".to_string(), "B-TITLE".to_string())));
	assert!(eien.contains(&("831".to_string(), "B-TITLE".to_string())));

	let ep_only =
	dmhy_record("Ep.25", "tpl_test", &suggested_roles("TEXT SEP EPISODE")).unwrap();
	assert!(audit_warnings(&ep_only).contains(&"no_title".to_string()));
	}
	}