|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -268,19 +268,14 @@ |
|
|
|
|
|
return self.rng.choice(elements) if elements else "empty_prompt" |
|
|
return f"[{' | '.join(elements)}]" |
|
|
|
|
|
+ |
|
|
def emphasized(self, args): |
|
|
- # args may include literal punctuation in some parser configurations; |
|
|
- # extract inner prompt and weight robustly. |
|
|
import lark as _l |
|
|
prompt_text = "" |
|
|
weight = 1.1 |
|
|
- # Try typical forms: [prompt], [prompt, NUMBER], or with literals mixed in |
|
|
- # Collect prompt-like parts |
|
|
prompt_parts = [a for a in args if isinstance(a, _l.Tree) or isinstance(a, str)] |
|
|
if prompt_parts: |
|
|
- # first prompt-like resolves to inner text |
|
|
prompt_text = resolve_tree(prompt_parts[0], keep_spacing=True) |
|
|
- # Try to detect a numeric token in args |
|
|
for a in args[::-1]: |
|
|
if isinstance(a, _l.Token) and getattr(a, "type", "") == "NUMBER": |
|
|
try: |
|
|
@@ -289,7 +284,6 @@ |
|
|
|
|
|
weight = 1.1 |
|
|
break |
|
|
else: |
|
|
- # Fallback: if second prompt-like exists, try to parse it as a number |
|
|
if len(prompt_parts) > 1: |
|
|
wt_str = resolve_tree(prompt_parts[1], keep_spacing=True) |
|
|
try: |
|
|
@@ -738,27 +732,29 @@ |
|
|
|
|
|
return [[self.steps, self.prefix + choice + self.suffix]] |
|
|
|
|
|
|
|
|
+ |
|
|
def visit_alternate_distinct(self, tree): |
|
|
+ import lark as _l |
|
|
options = [] |
|
|
for child in tree.children: |
|
|
- if isinstance(child, lark.Token) and child.type == "WHITESPACE": |
|
|
+ if isinstance(child, _l.Token): |
|
|
+ tok = str(child).strip() |
|
|
+ if tok and all(ch in "[]!" for ch in tok): |
|
|
+ continue |
|
|
+ if tok.strip(" ,|"): |
|
|
+ options.append([tok.strip(" ,|")]) |
|
|
continue |
|
|
child_schedules = self.visit(child) |
|
|
- child_options = [ |
|
|
- sched[1].strip(" ,|") |
|
|
- for sched in child_schedules |
|
|
- if sched[1].strip(" ,|") |
|
|
- ] |
|
|
- options.append( |
|
|
- child_options |
|
|
- or [resolve_tree(child, keep_spacing=True).strip(" ,|")] |
|
|
- ) |
|
|
- |
|
|
- # сплющиваем все варианты |
|
|
+ child_options = [sched[1].strip(" ,|") for sched in child_schedules if sched[1].strip(" ,|")] |
|
|
+ if child_options: |
|
|
+ options.append(child_options) |
|
|
+ else: |
|
|
+ txt = resolve_tree(child, keep_spacing=True).strip(" ,|") |
|
|
+ if txt: |
|
|
+ options.append([txt]) |
|
|
flat = [opt for group in options for opt in group] |
|
|
if not flat: |
|
|
return [[self.steps, self.prefix + "empty_prompt" + self.suffix]] |
|
|
- |
|
|
selected = self.rng.choice(flat) |
|
|
return [[self.steps, self.prefix + selected + self.suffix]] |
|
|
|
|
|
@@ -873,17 +869,16 @@ |
|
|
|
|
|
text = " and ".join(resolve_tree(c, keep_spacing=True) for c in tree.children if resolve_tree(c, keep_spacing=True)) |
|
|
return [[self.steps, self.prefix + text + self.suffix]] |
|
|
|
|
|
+ |
|
|
def visit_emphasized(self, tree): |
|
|
- # Robust parse of "(text[:weight])" regardless of how Earley grouped children |
|
|
import lark as _l |
|
|
children = list(tree.children) |
|
|
prompt_text = "" |
|
|
weight = 1.1 |
|
|
|
|
|
- # Case A: compact form -> children like [prompt, NUMBER] (no literal tokens) |
|
|
+ # компактная форма: [prompt, NUMBER] или [prompt, prompt-as-number] |
|
|
if len(children) == 2: |
|
|
prompt_text = resolve_tree(children[0], keep_spacing=True) |
|
|
- # second can be NUMBER token or prompt with digits |
|
|
if isinstance(children[1], _l.Token) and getattr(children[1], "type", "") == "NUMBER": |
|
|
try: |
|
|
weight = float(children[1]) |
|
|
@@ -895,14 +890,11 @@ |
|
|
|
|
|
weight = float(wt_str) |
|
|
except Exception: |
|
|
weight = 1.1 |
|
|
- |
|
|
- # Case B: verbose form -> children like ['(', prompt, ':', prompt/NUMBER, ')'] |
|
|
else: |
|
|
- # pick first prompt as inner text |
|
|
+ # подробная форма с литералами: '(', prompt, ':', prompt/NUMBER, ')' |
|
|
prompts = [ch for ch in children if isinstance(ch, _l.Tree) and getattr(ch, "data", None) == "prompt"] |
|
|
if prompts: |
|
|
prompt_text = resolve_tree(prompts[0], keep_spacing=True) |
|
|
- # pick second prompt (if any) as weight, else NUMBER token after ':' |
|
|
wt_node = prompts[1] if len(prompts) > 1 else next((ch for ch in children if isinstance(ch, _l.Token) and getattr(ch, "type", "") == "NUMBER"), None) |
|
|
if wt_node is not None: |
|
|
wt_str = resolve_tree(wt_node, keep_spacing=True) if isinstance(wt_node, _l.Tree) else str(wt_node) |
|
|
@@ -1051,7 +1043,6 @@ |
|
|
|
|
|
boundary = clamp(boundary_f if boundary_f > 1.0 else boundary_f * steps) |
|
|
if len(prompts) == 1: |
|
|
return [[boundary, f"{pre}{post}"], [steps, f"{pre}{prompts[0]}{post}"]] |
|
|
- # Special-case: exactly two prompts -> use boundary as first segment end directly |
|
|
if len(prompts) == 2: |
|
|
return [[boundary, f"{pre}{prompts[0]}{post}"], [steps, f"{pre}{prompts[1]}{post}"]] |
|
|
if prompts: |