MyCustomNodes / Bam_GPT_Parser.py

Upload Bam_GPT_Parser.py

b377f38 verified 3 days ago

19.5 kB

	from __future__ import annotations

	import re
	from typing import Dict, List, Tuple


	# =============================================================================
	# Outputs (order matters)
	# =============================================================================
	_OUTPUT_DEFS: List[Tuple[str, str]] = [
	# Core
	("gender_str", "STRING"),
	("gender_int", "INT"),
	("age_str", "STRING"),
	("age_int", "INT"),
	("identity_str", "STRING"),
	("eyecolor_str", "STRING"),
	("hairstyle_str", "STRING"),

	# Equipment
	("topwear_str", "STRING"),
	("bellywear_str", "STRING"),
	("breastwear_str", "STRING"),

	("handwear_left_str", "STRING"),
	("handwear_right_str", "STRING"),
	("wristwear_left_str", "STRING"),
	("wristwear_right_str", "STRING"),
	("forearm_left_str", "STRING"),
	("forearm_right_str", "STRING"),
	("elbow_left_str", "STRING"),
	("elbow_right_str", "STRING"),
	("upperarm_left_str", "STRING"),
	("upperarm_right_str", "STRING"),
	("shoulder_left_str", "STRING"),
	("shoulder_right_str", "STRING"),

	("shank_left_str", "STRING"),
	("shank_right_str", "STRING"),

	("knee_left_str", "STRING"),
	("knee_right_str", "STRING"),

	("foot_left_str", "STRING"),
	("foot_right_str", "STRING"),

	("necklace_str", "STRING"),
	("earring_left_str", "STRING"),
	("earring_right_str", "STRING"),

	("kneewear_str", "STRING"),
	("headwear_str", "STRING"),
	("facemask_str", "STRING"),
	("sunglasses_str", "STRING"),
	("glasses_str", "STRING"),

	("crotch_str", "STRING"),
	("belt_str", "STRING"),
	("skirt_str", "STRING"),
	("one_piece_str", "STRING"),

	# Tags
	("aesthetic_tag1", "STRING"),
	("aesthetic_tag2", "STRING"),
	("aesthetic_tag3", "STRING"),
	("aesthetic_tag4", "STRING"),
	("aesthetic_tag5", "STRING"),

	("skin_tag1", "STRING"),
	("skin_tag2", "STRING"),
	("skin_tag3", "STRING"),
	("skin_tag4", "STRING"),
	("skin_tag5", "STRING"),

	("expression_tag1", "STRING"),
	("expression_tag2", "STRING"),
	("expression_tag3", "STRING"),
	("expression_tag4", "STRING"),
	("expression_tag5", "STRING"),

	# Unique extra headwear slot
	("headwear_str_2", "STRING"),

	# Flattened equipment values (equip.* values only, in order, unique)
	("all_equip", "STRING"),

	# Converted ancient BAM string
	("bam_ancient", "STRING"),
	]

	RETURN_NAMES_TUPLE = tuple(n for n, _t in _OUTPUT_DEFS)
	RETURN_TYPES_TUPLE = tuple(_t for _n, _t in _OUTPUT_DEFS)


	# =============================================================================
	# Constants
	# =============================================================================
	_NEGATIVE_PROMPT_1 = (
	"monochrome, sketch, colorless, (asymmetrical face:1.5), "
	"(asymmetrical tail-arched eyebrows:1.0), (terribly drawn eyes:1.2), "
	"(heterochromia:1.5), watermark, text, visible background objects, visible floor, "
	"(floor-effects:1.5), (background-effects:1.5), non-character, character-shadow, floor-shadow"
	)


	# =============================================================================
	# Helpers
	# =============================================================================
	def _strip_quotes(v: str) -> str:
	v = (v or "").strip()
	if len(v) >= 2 and ((v[0] == v[-1] == '"') or (v[0] == v[-1] == "'")):
	return v[1:-1].strip()
	return v


	def _norm_key(k: str) -> str:
	k = (k or "").strip().lower()
	k = k.replace(" ", "_").replace("-", "_")
	k = re.sub(r"_+", "_", k)
	return k


	def _safe_int(s: str, default: int = 0) -> int:
	try:
	return int((s or "").strip())
	except Exception:
	return default


	def _norm_spaces(s: str) -> str:
	s = (s or "").replace("\r", " ").replace("\n", " ")
	s = re.sub(r"\s+", " ", s).strip()
	return s


	def _extract_gpt_bam_block(text: str) -> str:
	"""
	Extract first GPT_BAM block payload (between markers).
	If markers are missing, returns the whole text (still attempts key=value parsing).
	"""
	text = text or ""
	m = re.search(r"GPT_BAM_START###(.*?)###GPT_BAM_END", text, flags=re.S \| re.I)
	return m.group(1) if m else text


	# =============================================================================
	# Ancient conversion cleaners (match your expected example)
	# - Underscores -> spaces for non-equip textual fields
	# - Equipment keeps underscores and internal comma formatting as provided
	# =============================================================================
	def _clean_identity_like(s: str) -> str:
	s = (s or "").strip().replace("_", " ")
	return _norm_spaces(s)


	def _clean_eyes(s: str) -> str:
	s = (s or "").strip()
	s = s.replace("_eyes", "")
	s = s.replace("_", " ")
	return _norm_spaces(s)


	def _clean_hair(s: str) -> str:
	s = (s or "").strip()
	parts = [p.strip() for p in s.split(",") if p.strip()]
	cleaned: List[str] = []
	for p in parts:
	# remove common suffixes
	for suf in ("_hairstyle", "_hairsyle", "_hair"):
	if p.endswith(suf):
	p = p[: -len(suf)]
	# also remove occurrences inside
	p = p.replace("_hairstyle", "").replace("_hairsyle", "").replace("_hair", "")
	p = p.replace("_", " ")
	p = _norm_spaces(p)
	if p:
	cleaned.append(p)
	return ", ".join(cleaned)


	def _clean_tag(s: str, kind: str) -> str:
	s = (s or "").strip()
	if kind == "aesthetic":
	s = s.replace("_aesthetic", "").replace("aesthetic_", "").replace("aesthetic", "")
	elif kind == "skin":
	s = s.replace("_skin", "").replace("skin_", "").replace("skin", "")
	elif kind == "expression":
	s = s.replace("_expression", "").replace("expression_", "").replace("expression", "")
	s = s.replace("_", " ")
	return _norm_spaces(s)


	def _zero_if_empty(s: str) -> str:
	s = _norm_spaces(s)
	return s if s else "0"


	# =============================================================================
	# Equipment mapping
	# =============================================================================
	_KEY_CANONICAL: Dict[str, str] = {
	"topwear": "topwear",
	"belly": "bellywear",
	"bellywear": "bellywear",
	"breast": "breastwear",
	"breastwear": "breastwear",

	"hand": "handwear",
	"handwear": "handwear",
	"wrist": "wristwear",
	"wristwear": "wristwear",

	"forearm": "forearm",
	"elbow": "elbow",
	"upperarm": "upperarm",
	"upper_arm": "upperarm",
	"shoulder": "shoulder",

	"shank": "shank",
	"knee": "knee",

	"foot": "foot",
	"footwear": "foot",
	"shoe": "foot",
	"shoes": "foot",

	"necklace": "necklace",

	"earring": "earring",
	"earrings": "earring",

	"kneewear": "kneewear",
	"headwear": "headwear",
	"headwear2": "headwear2",

	"facemask": "facemask",
	"face_mask": "facemask",
	"mask": "facemask",

	"sunglasses": "sunglasses",
	"glasses": "glasses",

	"crotch": "crotch",
	"belt": "belt",
	"skirt": "skirt",

	"onepiece": "one_piece",
	"one_piece": "one_piece",
	"one_piecewear": "one_piece",
	}

	_SIDE_FIELDS: Dict[str, Tuple[str, str]] = {
	"handwear": ("handwear_left_str", "handwear_right_str"),
	"wristwear": ("wristwear_left_str", "wristwear_right_str"),
	"forearm": ("forearm_left_str", "forearm_right_str"),
	"elbow": ("elbow_left_str", "elbow_right_str"),
	"upperarm": ("upperarm_left_str", "upperarm_right_str"),
	"shoulder": ("shoulder_left_str", "shoulder_right_str"),
	"shank": ("shank_left_str", "shank_right_str"),
	"knee": ("knee_left_str", "knee_right_str"),
	"foot": ("foot_left_str", "foot_right_str"),
	"earring": ("earring_left_str", "earring_right_str"),
	}

	_SINGLE_FIELDS: Dict[str, str] = {
	"topwear": "topwear_str",
	"bellywear": "bellywear_str",
	"breastwear": "breastwear_str",
	"necklace": "necklace_str",
	"kneewear": "kneewear_str",
	"headwear": "headwear_str",
	"facemask": "facemask_str",
	"sunglasses": "sunglasses_str",
	"glasses": "glasses_str",
	"crotch": "crotch_str",
	"belt": "belt_str",
	"skirt": "skirt_str",
	"one_piece": "one_piece_str",
	"headwear2": "headwear_str_2",
	}

	_ALL_EQUIP_OUTPUTS = set(_SINGLE_FIELDS.values())
	for lf, rf in _SIDE_FIELDS.values():
	_ALL_EQUIP_OUTPUTS.add(lf)
	_ALL_EQUIP_OUTPUTS.add(rf)


	def _assign_equip(
	out: Dict[str, object],
	equip_values_in_order: List[str],
	raw_key: str,
	val: str,
	) -> None:
	"""
	Assign equipment into structured outputs.

	Precedence rule:
	- sided keys (.left/.right or _left/_right) overwrite that side
	- unsided keys fill only empty sides (so sided values win even if unsided appears later)

	Also collects ALL equip values (even unknown keys) into equip_values_in_order.
	"""
	val = (val or "").strip()
	k = _norm_key(raw_key)

	# detect side
	side = None
	base = k

	if base.endswith(".left"):
	side = "left"
	base = base[:-5]
	elif base.endswith(".right"):
	side = "right"
	base = base[:-6]

	if base.endswith("_left"):
	side = "left"
	base = base[:-5]
	elif base.endswith("_right"):
	side = "right"
	base = base[:-6]

	base = base.strip("._")
	base_for_lookup = base.replace(".", "_")
	canonical = _KEY_CANONICAL.get(base_for_lookup, base_for_lookup)

	# collect equip values (even unknown keys) for all_equip
	if val:
	equip_values_in_order.append(val)

	if canonical in _SIDE_FIELDS:
	left_name, right_name = _SIDE_FIELDS[canonical]
	if side == "left":
	out[left_name] = val
	elif side == "right":
	out[right_name] = val
	else:
	# unsided: fill only empties
	if not out.get(left_name, ""):
	out[left_name] = val
	if not out.get(right_name, ""):
	out[right_name] = val

	elif canonical in _SINGLE_FIELDS:
	out[_SINGLE_FIELDS[canonical]] = val

	else:
	# unknown equip key -> ignored for structured outputs
	pass


	# =============================================================================
	# GPT_BAM parsing + ancient conversion
	# =============================================================================
	def _parse_gpt_bam(text: str) -> Dict[str, object]:
	payload = _extract_gpt_bam_block(text)
	segments = [s.strip() for s in payload.split("###") if s.strip()]

	# defaults
	out: Dict[str, object] = {name: (0 if t == "INT" else "") for name, t in _OUTPUT_DEFS}
	for k in _ALL_EQUIP_OUTPUTS:
	out[k] = ""

	equip_values_in_order: List[str] = []

	g_int = None

	for seg in segments:
	if "=" in seg:
	k, v = seg.split("=", 1)
	elif ":" in seg:
	k, v = seg.split(":", 1)
	else:
	continue

	k = _norm_key(k)
	v = _strip_quotes(v)

	# core
	if k in ("gender", "sex", "gender_int", "gender_num"):
	vv = v.strip().lower()
	if vv in ("1", "boy", "male", "m"):
	g_int = 1
	elif vv in ("2", "girl", "female", "f"):
	g_int = 2

	elif k in ("age", "age_str"):
	out["age_str"] = v.strip()
	out["age_int"] = _safe_int(out["age_str"], 0)

	elif k in ("identity", "identity_str", "job", "role"):
	out["identity_str"] = v.strip()

	elif k in ("eyecolor", "eye_color", "eye", "eyecolor_str"):
	out["eyecolor_str"] = v.strip()

	elif k in ("hairstyle", "hair", "hairstyle_str"):
	out["hairstyle_str"] = v.strip()

	# equipment
	elif k.startswith("equip.") or k.startswith("equipment."):
	raw_equip_key = k.split(".", 1)[1] # remove equip.
	_assign_equip(out, equip_values_in_order, raw_equip_key, v)

	# tag slots
	elif k.startswith("aesthetic.") or k.startswith("aesthetic_tag"):
	num = None
	if k.startswith("aesthetic."):
	suf = k.split(".", 1)[1]
	if suf.isdigit():
	num = int(suf)
	else:
	m = re.search(r"aesthetic_tag(\d+)", k)
	if m:
	num = int(m.group(1))
	if num and 1 <= num <= 5:
	out[f"aesthetic_tag{num}"] = v.strip()

	elif k.startswith("skin.") or k.startswith("skin_tag"):
	num = None
	if k.startswith("skin."):
	suf = k.split(".", 1)[1]
	if suf.isdigit():
	num = int(suf)
	else:
	m = re.search(r"skin_tag(\d+)", k)
	if m:
	num = int(m.group(1))
	if num and 1 <= num <= 5:
	out[f"skin_tag{num}"] = v.strip()

	elif k.startswith("expression.") or k.startswith("expression_tag"):
	num = None
	if k.startswith("expression."):
	suf = k.split(".", 1)[1]
	if suf.isdigit():
	num = int(suf)
	else:
	m = re.search(r"expression_tag(\d+)", k)
	if m:
	num = int(m.group(1))
	if num and 1 <= num <= 5:
	out[f"expression_tag{num}"] = v.strip()

	# headwear2 aliases
	elif k in ("headwear2", "headwear_tag2", "headwear_str_2", "equip_headwear2"):
	out["headwear_str_2"] = v.strip()

	else:
	# explicitly ignore name (and anything else not recognized)
	# e.g. name=mirela_vance should not affect anything
	pass

	# finalize gender
	if g_int is None:
	g_int = 2 # default: if not "1" => girl (matches your ancient rule)
	out["gender_int"] = int(g_int)
	out["gender_str"] = "boy" if g_int == 1 else "girl"

	# all_equip = unique equip values, in order (includes unknown equip keys)
	seen = set()
	equip_unique: List[str] = []
	for v in equip_values_in_order:
	v = (v or "").strip()
	if v and v not in seen:
	equip_unique.append(v)
	seen.add(v)
	out["all_equip"] = ", ".join(equip_unique)

	# bam_ancient conversion
	out["bam_ancient"] = _convert_to_ancient(out, equip_unique)

	return out


	def _convert_to_ancient(parsed: Dict[str, object], equip_unique: List[str]) -> str:
	gender_int = int(parsed.get("gender_int", 2) or 2)

	age_str = str(parsed.get("age_str", "") or "").strip()
	if not age_str:
	age_str = str(parsed.get("age_int", 0) or 0)

	identity = _clean_identity_like(str(parsed.get("identity_str", "") or ""))
	eyes = _clean_eyes(str(parsed.get("eyecolor_str", "") or ""))
	hair = _clean_hair(str(parsed.get("hairstyle_str", "") or ""))

	# Equipment defaults / additions
	equip_list: List[str] = list(equip_unique)

	def add_unique(val: str) -> None:
	val = (val or "").strip()
	if not val:
	return
	if val not in equip_list:
	equip_list.append(val)

	# (No footwear parsed) => add ",bare foot"
	foot_l = str(parsed.get("foot_left_str", "") or "").strip()
	foot_r = str(parsed.get("foot_right_str", "") or "").strip()
	if not foot_l and not foot_r:
	add_unique("bare foot")

	# (No handwear parsed) => add ",bare hands"
	hand_l = str(parsed.get("handwear_left_str", "") or "").strip()
	hand_r = str(parsed.get("handwear_right_str", "") or "").strip()
	if not hand_l and not hand_r:
	add_unique("bare hands")

	# (No topwear AND no breastwear AND no one_piece) => add ",naked breasts"
	top = str(parsed.get("topwear_str", "") or "").strip()
	breast = str(parsed.get("breastwear_str", "") or "").strip()
	one_piece = str(parsed.get("one_piece_str", "") or "").strip()
	if not top and not breast and not one_piece:
	add_unique("naked breasts")

	# (No topwear AND no one_piece AND no crotch AND no skirt) => exposed crotch (gendered)
	crotch = str(parsed.get("crotch_str", "") or "").strip()
	skirt = str(parsed.get("skirt_str", "") or "").strip()
	if not top and not one_piece and not crotch and not skirt:
	if gender_int == 1:
	add_unique("naked crotch exposed penis")
	else:
	add_unique("naked crotch exposed vagina")

	equip_str = ", ".join([e for e in equip_list if (e or "").strip()])

	# Tags
	aest = [_clean_tag(str(parsed.get(f"aesthetic_tag{i}", "") or ""), "aesthetic") for i in range(1, 6)]
	skin = [_clean_tag(str(parsed.get(f"skin_tag{i}", "") or ""), "skin") for i in range(1, 6)]
	expr = [_clean_tag(str(parsed.get(f"expression_tag{i}", "") or ""), "expression") for i in range(1, 6)]

	hw_extra = _clean_identity_like(str(parsed.get("headwear_str_2", "") or ""))

	# Fill missing with 0 according to your template
	fields = [
	"START",
	str(gender_int),
	_zero_if_empty(age_str),
	_zero_if_empty(identity),
	_zero_if_empty(eyes),
	_zero_if_empty(hair),
	_zero_if_empty(equip_str),
	*(_zero_if_empty(a) for a in aest),
	*(_zero_if_empty(s) for s in skin),
	*(_zero_if_empty(e) for e in expr),
	_zero_if_empty(hw_extra),
	"0", # POSITIVE_PROMPT_0
	"0", # POSITIVE_PROMPT_1
	"0", # NEGATIVE_PROMPT_0
	_NEGATIVE_PROMPT_1, # NEGATIVE_PROMPT_1 (constant)
	"0", # NEGATIVE_PROMPT_2
	"END",
	]

	# Build exactly: START###...###END###
	out = "###".join(fields[:-1]) + "###" + fields[-1] + "###"
	out = _norm_spaces(out) # remove linebreaks, collapse double spaces
	return out


	# =============================================================================
	# ComfyUI Node
	# =============================================================================
	class BAMParser_Ancestral:
	"""
	Parses GPT_BAM v1 (key=value fields separated by ###) and also outputs bam_ancient.
	"""

	@classmethod
	def INPUT_TYPES(cls):
	return {
	"required": {
	"gpt_bam_string": ("STRING", {"multiline": True, "default": ""}),
	}
	}

	RETURN_TYPES = RETURN_TYPES_TUPLE
	RETURN_NAMES = RETURN_NAMES_TUPLE
	FUNCTION = "parse"
	CATEGORY = "BAM"

	def parse(self, gpt_bam_string: str):
	parsed = _parse_gpt_bam(gpt_bam_string)

	# ensure all outputs exist
	for name, t in _OUTPUT_DEFS:
	if name not in parsed:
	parsed[name] = 0 if t == "INT" else ""

	return tuple(parsed[name] for name in RETURN_NAMES_TUPLE)


	NODE_CLASS_MAPPINGS = {
	"BAMParser_Ancestral": BAMParser_Ancestral,
	}

	NODE_DISPLAY_NAME_MAPPINGS = {
	"BAMParser_Ancestral": "BAMParser_Ancestral",
	}