tiny-army / persona_parse.py
polats's picture
Personas + war-diary via llama.cpp (reusing woid's persona SSE protocol)
67f4321
"""Persona JSON parsing — ported from woid's agent-sandbox/woid-core/persona/parse.js.
LLMs wrap persona JSON in noise (code fences, preambles, trailing prose, multi-object
emissions). These helpers defensively extract the first bracket-balanced JSON object
and sanitize the standard fields. `about` is load-bearing; the rest are optional.
"""
import json
import re
_NAME_TRIM = re.compile(r'^[\s"\'“”‘’`]+|[\s"\'“”‘’`]+$')
_NAME_KV = re.compile(r'^(name|character|persona)\s*[:=]', re.I)
_FENCE = re.compile(r'```(?:json)?\s*([\s\S]*?)```', re.I)
def sanitize_name(raw):
s = re.sub(r'\s+', ' ', _NAME_TRIM.sub('', str(raw or ''))).strip()
if len(s) < 2 or len(s) > 40:
return ''
if _NAME_KV.match(s):
return ''
return s
def trim_tag(raw):
if not isinstance(raw, str):
return None
s = re.sub(r'\.\s*$', '', raw.strip())
if not s:
return None
return (s[:46].strip() + '…') if len(s) > 48 else s
def extract_first_json_object(raw):
"""Walk forward from each `{` until a bracket-balanced, string-aware `}`. First
successful parse wins — tolerates trailing prose and `}` inside string literals."""
n = len(raw)
for i in range(n):
if raw[i] != '{':
continue
depth = 0
in_str = False
esc = False
for j in range(i, n):
ch = raw[j]
if in_str:
if esc:
esc = False
elif ch == '\\':
esc = True
elif ch == '"':
in_str = False
continue
if ch == '"':
in_str = True
elif ch == '{':
depth += 1
elif ch == '}':
depth -= 1
if depth == 0:
try:
return json.loads(raw[i:j + 1])
except Exception:
break
return None
def parse_persona_json(raw):
"""Strip ```json fences, bracket-balance-extract, sanitize. Raises ValueError if no
parseable JSON or no `about`."""
raw = str(raw or '')
m = _FENCE.search(raw)
candidate = (m.group(1) if m else raw).strip()
parsed = extract_first_json_object(candidate)
if not isinstance(parsed, dict):
raise ValueError('model did not return a parseable JSON object')
name = sanitize_name(parsed.get('name') or parsed.get('callSign') or '')
about_raw = parsed.get('about')
about = (about_raw.strip() if isinstance(about_raw, str) else '')[:1000]
if not about:
raise ValueError('model did not return an about')
return {
'name': name or None,
'about': about,
'avatar_hint': str(parsed.get('avatar_hint') or parsed.get('avatarHint') or '')[:200],
'vibe': str(parsed.get('vibe') or '')[:40],
'specialty': trim_tag(parsed.get('specialty') or parsed.get('role') or parsed.get('job')),
'personality': trim_tag(parsed.get('personality') or parsed.get('personalityTag')),
}