File size: 3,091 Bytes
67f4321
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
"""Persona JSON parsing — ported from woid's agent-sandbox/woid-core/persona/parse.js.

LLMs wrap persona JSON in noise (code fences, preambles, trailing prose, multi-object
emissions). These helpers defensively extract the first bracket-balanced JSON object
and sanitize the standard fields. `about` is load-bearing; the rest are optional.
"""
import json
import re

_NAME_TRIM = re.compile(r'^[\s"\'“”‘’`]+|[\s"\'“”‘’`]+$')
_NAME_KV = re.compile(r'^(name|character|persona)\s*[:=]', re.I)
_FENCE = re.compile(r'```(?:json)?\s*([\s\S]*?)```', re.I)


def sanitize_name(raw):
    s = re.sub(r'\s+', ' ', _NAME_TRIM.sub('', str(raw or ''))).strip()
    if len(s) < 2 or len(s) > 40:
        return ''
    if _NAME_KV.match(s):
        return ''
    return s


def trim_tag(raw):
    if not isinstance(raw, str):
        return None
    s = re.sub(r'\.\s*$', '', raw.strip())
    if not s:
        return None
    return (s[:46].strip() + '…') if len(s) > 48 else s


def extract_first_json_object(raw):
    """Walk forward from each `{` until a bracket-balanced, string-aware `}`. First
    successful parse wins — tolerates trailing prose and `}` inside string literals."""
    n = len(raw)
    for i in range(n):
        if raw[i] != '{':
            continue
        depth = 0
        in_str = False
        esc = False
        for j in range(i, n):
            ch = raw[j]
            if in_str:
                if esc:
                    esc = False
                elif ch == '\\':
                    esc = True
                elif ch == '"':
                    in_str = False
                continue
            if ch == '"':
                in_str = True
            elif ch == '{':
                depth += 1
            elif ch == '}':
                depth -= 1
                if depth == 0:
                    try:
                        return json.loads(raw[i:j + 1])
                    except Exception:
                        break
    return None


def parse_persona_json(raw):
    """Strip ```json fences, bracket-balance-extract, sanitize. Raises ValueError if no
    parseable JSON or no `about`."""
    raw = str(raw or '')
    m = _FENCE.search(raw)
    candidate = (m.group(1) if m else raw).strip()
    parsed = extract_first_json_object(candidate)
    if not isinstance(parsed, dict):
        raise ValueError('model did not return a parseable JSON object')
    name = sanitize_name(parsed.get('name') or parsed.get('callSign') or '')
    about_raw = parsed.get('about')
    about = (about_raw.strip() if isinstance(about_raw, str) else '')[:1000]
    if not about:
        raise ValueError('model did not return an about')
    return {
        'name': name or None,
        'about': about,
        'avatar_hint': str(parsed.get('avatar_hint') or parsed.get('avatarHint') or '')[:200],
        'vibe': str(parsed.get('vibe') or '')[:40],
        'specialty': trim_tag(parsed.get('specialty') or parsed.get('role') or parsed.get('job')),
        'personality': trim_tag(parsed.get('personality') or parsed.get('personalityTag')),
    }