| import re
|
| from collections import Counter
|
| from urllib.request import Request, urlopen
|
|
|
|
|
| BAM_URL = "https://huggingface.co/saliacoel/chars/resolve/main/BAM.txt"
|
|
|
|
|
|
|
|
|
|
|
| ID_MARKER_RE = re.compile(r"(?<!\S)0*\d+\.\s*")
|
|
|
|
|
| def _download_text(url: str) -> str:
|
| req = Request(
|
| url,
|
| headers={
|
| "User-Agent": "Mozilla/5.0",
|
| "Accept": "*/*",
|
| },
|
| )
|
| with urlopen(req, timeout=60) as resp:
|
| return resp.read().decode("utf-8", errors="replace")
|
|
|
|
|
| def _iter_entry_segments(text: str):
|
| """
|
| Yield each BAM entry body between ID markers.
|
|
|
| Example:
|
| '0001. Adali, tags... 0002. Petra, tags...'
|
| yields:
|
| 'Adali, tags...'
|
| 'Petra, tags...'
|
| """
|
| prev_match = None
|
|
|
| for match in ID_MARKER_RE.finditer(text):
|
| if prev_match is not None:
|
| yield text[prev_match.end() : match.start()]
|
| prev_match = match
|
|
|
| if prev_match is not None:
|
| yield text[prev_match.end() :]
|
|
|
|
|
| def _extract_name_from_segment(segment: str) -> str:
|
| """
|
| Name = first string in the entry, until the first comma.
|
| """
|
| segment = segment.strip()
|
| if not segment:
|
| return ""
|
|
|
| comma_index = segment.find(",")
|
| if comma_index == -1:
|
| name = segment.strip()
|
| else:
|
| name = segment[:comma_index].strip()
|
|
|
|
|
| name = " ".join(name.split())
|
| return name
|
|
|
|
|
| def _find_duplicate_names(text: str) -> str:
|
| """
|
| Returns a comma-separated string of all names that appear more than once.
|
| Preserves order of first appearance.
|
| """
|
| counts = Counter()
|
| ordered_names = []
|
| seen_once = set()
|
|
|
| for segment in _iter_entry_segments(text):
|
| name = _extract_name_from_segment(segment)
|
| if not name:
|
| continue
|
|
|
| counts[name] += 1
|
| if name not in seen_once:
|
| seen_once.add(name)
|
| ordered_names.append(name)
|
|
|
| duplicates = [name for name in ordered_names if counts[name] > 1]
|
| return ", ".join(duplicates)
|
|
|
|
|
| class Salia_BAM_Get_Duplicate_Names:
|
| @classmethod
|
| def INPUT_TYPES(cls):
|
| return {
|
| "required": {}
|
| }
|
|
|
| RETURN_TYPES = ("STRING",)
|
| RETURN_NAMES = ("duplicate_names",)
|
| FUNCTION = "get_duplicate_names"
|
| CATEGORY = "Salia"
|
|
|
| def get_duplicate_names(self):
|
| try:
|
| bam_text = _download_text(BAM_URL)
|
| except Exception as e:
|
| raise ValueError(f"Failed to download BAM.txt:\n{BAM_URL}\n\n{e}") from e
|
|
|
| result = _find_duplicate_names(bam_text)
|
| return (result,)
|
|
|
|
|
| NODE_CLASS_MAPPINGS = {
|
| "Salia_BAM_Get_Duplicate_Names": Salia_BAM_Get_Duplicate_Names,
|
| }
|
|
|
| NODE_DISPLAY_NAME_MAPPINGS = {
|
| "Salia_BAM_Get_Duplicate_Names": "Salia_BAM_Get_Duplicate_Names",
|
| } |