x
File size: 3,038 Bytes
2e40f58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import re
from collections import Counter
from urllib.request import Request, urlopen


BAM_URL = "https://huggingface.co/saliacoel/chars/resolve/main/BAM.txt"

# Matches ID markers like:
# 1.
# 0001.
# and also works when the next entry starts after a space instead of a line break.
ID_MARKER_RE = re.compile(r"(?<!\S)0*\d+\.\s*")


def _download_text(url: str) -> str:
    req = Request(
        url,
        headers={
            "User-Agent": "Mozilla/5.0",
            "Accept": "*/*",
        },
    )
    with urlopen(req, timeout=60) as resp:
        return resp.read().decode("utf-8", errors="replace")


def _iter_entry_segments(text: str):
    """

    Yield each BAM entry body between ID markers.



    Example:

    '0001. Adali, tags... 0002. Petra, tags...'

    yields:

      'Adali, tags...'

      'Petra, tags...'

    """
    prev_match = None

    for match in ID_MARKER_RE.finditer(text):
        if prev_match is not None:
            yield text[prev_match.end() : match.start()]
        prev_match = match

    if prev_match is not None:
        yield text[prev_match.end() :]


def _extract_name_from_segment(segment: str) -> str:
    """

    Name = first string in the entry, until the first comma.

    """
    segment = segment.strip()
    if not segment:
        return ""

    comma_index = segment.find(",")
    if comma_index == -1:
        name = segment.strip()
    else:
        name = segment[:comma_index].strip()

    # Normalize whitespace inside the name
    name = " ".join(name.split())
    return name


def _find_duplicate_names(text: str) -> str:
    """

    Returns a comma-separated string of all names that appear more than once.

    Preserves order of first appearance.

    """
    counts = Counter()
    ordered_names = []
    seen_once = set()

    for segment in _iter_entry_segments(text):
        name = _extract_name_from_segment(segment)
        if not name:
            continue

        counts[name] += 1
        if name not in seen_once:
            seen_once.add(name)
            ordered_names.append(name)

    duplicates = [name for name in ordered_names if counts[name] > 1]
    return ", ".join(duplicates)


class Salia_BAM_Get_Duplicate_Names:
    @classmethod
    def INPUT_TYPES(cls):
        return {
            "required": {}
        }

    RETURN_TYPES = ("STRING",)
    RETURN_NAMES = ("duplicate_names",)
    FUNCTION = "get_duplicate_names"
    CATEGORY = "Salia"

    def get_duplicate_names(self):
        try:
            bam_text = _download_text(BAM_URL)
        except Exception as e:
            raise ValueError(f"Failed to download BAM.txt:\n{BAM_URL}\n\n{e}") from e

        result = _find_duplicate_names(bam_text)
        return (result,)


NODE_CLASS_MAPPINGS = {
    "Salia_BAM_Get_Duplicate_Names": Salia_BAM_Get_Duplicate_Names,
}

NODE_DISPLAY_NAME_MAPPINGS = {
    "Salia_BAM_Get_Duplicate_Names": "Salia_BAM_Get_Duplicate_Names",
}