"""Persona JSON parsing — ported from woid's agent-sandbox/woid-core/persona/parse.js. LLMs wrap persona JSON in noise (code fences, preambles, trailing prose, multi-object emissions). These helpers defensively extract the first bracket-balanced JSON object and sanitize the standard fields. `about` is load-bearing; the rest are optional. """ import json import re _NAME_TRIM = re.compile(r'^[\s"\'“”‘’`]+|[\s"\'“”‘’`]+$') _NAME_KV = re.compile(r'^(name|character|persona)\s*[:=]', re.I) _FENCE = re.compile(r'```(?:json)?\s*([\s\S]*?)```', re.I) def sanitize_name(raw): s = re.sub(r'\s+', ' ', _NAME_TRIM.sub('', str(raw or ''))).strip() if len(s) < 2 or len(s) > 40: return '' if _NAME_KV.match(s): return '' return s def trim_tag(raw): if not isinstance(raw, str): return None s = re.sub(r'\.\s*$', '', raw.strip()) if not s: return None return (s[:46].strip() + '…') if len(s) > 48 else s def extract_first_json_object(raw): """Walk forward from each `{` until a bracket-balanced, string-aware `}`. First successful parse wins — tolerates trailing prose and `}` inside string literals.""" n = len(raw) for i in range(n): if raw[i] != '{': continue depth = 0 in_str = False esc = False for j in range(i, n): ch = raw[j] if in_str: if esc: esc = False elif ch == '\\': esc = True elif ch == '"': in_str = False continue if ch == '"': in_str = True elif ch == '{': depth += 1 elif ch == '}': depth -= 1 if depth == 0: try: return json.loads(raw[i:j + 1]) except Exception: break return None def parse_persona_json(raw): """Strip ```json fences, bracket-balance-extract, sanitize. Raises ValueError if no parseable JSON or no `about`.""" raw = str(raw or '') m = _FENCE.search(raw) candidate = (m.group(1) if m else raw).strip() parsed = extract_first_json_object(candidate) if not isinstance(parsed, dict): raise ValueError('model did not return a parseable JSON object') name = sanitize_name(parsed.get('name') or parsed.get('callSign') or '') about_raw = parsed.get('about') about = (about_raw.strip() if isinstance(about_raw, str) else '')[:1000] if not about: raise ValueError('model did not return an about') return { 'name': name or None, 'about': about, 'avatar_hint': str(parsed.get('avatar_hint') or parsed.get('avatarHint') or '')[:200], 'vibe': str(parsed.get('vibe') or '')[:40], 'specialty': trim_tag(parsed.get('specialty') or parsed.get('role') or parsed.get('job')), 'personality': trim_tag(parsed.get('personality') or parsed.get('personalityTag')), }