import re from collections import Counter from urllib.request import Request, urlopen BAM_URL = "https://huggingface.co/saliacoel/chars/resolve/main/BAM.txt" # Matches ID markers like: # 1. # 0001. # and also works when the next entry starts after a space instead of a line break. ID_MARKER_RE = re.compile(r"(? str: req = Request( url, headers={ "User-Agent": "Mozilla/5.0", "Accept": "*/*", }, ) with urlopen(req, timeout=60) as resp: return resp.read().decode("utf-8", errors="replace") def _iter_entry_segments(text: str): """ Yield each BAM entry body between ID markers. Example: '0001. Adali, tags... 0002. Petra, tags...' yields: 'Adali, tags...' 'Petra, tags...' """ prev_match = None for match in ID_MARKER_RE.finditer(text): if prev_match is not None: yield text[prev_match.end() : match.start()] prev_match = match if prev_match is not None: yield text[prev_match.end() :] def _extract_name_from_segment(segment: str) -> str: """ Name = first string in the entry, until the first comma. """ segment = segment.strip() if not segment: return "" comma_index = segment.find(",") if comma_index == -1: name = segment.strip() else: name = segment[:comma_index].strip() # Normalize whitespace inside the name name = " ".join(name.split()) return name def _find_duplicate_names(text: str) -> str: """ Returns a comma-separated string of all names that appear more than once. Preserves order of first appearance. """ counts = Counter() ordered_names = [] seen_once = set() for segment in _iter_entry_segments(text): name = _extract_name_from_segment(segment) if not name: continue counts[name] += 1 if name not in seen_once: seen_once.add(name) ordered_names.append(name) duplicates = [name for name in ordered_names if counts[name] > 1] return ", ".join(duplicates) class Salia_BAM_Get_Duplicate_Names: @classmethod def INPUT_TYPES(cls): return { "required": {} } RETURN_TYPES = ("STRING",) RETURN_NAMES = ("duplicate_names",) FUNCTION = "get_duplicate_names" CATEGORY = "Salia" def get_duplicate_names(self): try: bam_text = _download_text(BAM_URL) except Exception as e: raise ValueError(f"Failed to download BAM.txt:\n{BAM_URL}\n\n{e}") from e result = _find_duplicate_names(bam_text) return (result,) NODE_CLASS_MAPPINGS = { "Salia_BAM_Get_Duplicate_Names": Salia_BAM_Get_Duplicate_Names, } NODE_DISPLAY_NAME_MAPPINGS = { "Salia_BAM_Get_Duplicate_Names": "Salia_BAM_Get_Duplicate_Names", }