--- prompt_parser_patched (4).py +++ prompt_parser_patched (5).py @@ -268,19 +268,14 @@ return self.rng.choice(elements) if elements else "empty_prompt" return f"[{' | '.join(elements)}]" + def emphasized(self, args): - # args may include literal punctuation in some parser configurations; - # extract inner prompt and weight robustly. import lark as _l prompt_text = "" weight = 1.1 - # Try typical forms: [prompt], [prompt, NUMBER], or with literals mixed in - # Collect prompt-like parts prompt_parts = [a for a in args if isinstance(a, _l.Tree) or isinstance(a, str)] if prompt_parts: - # first prompt-like resolves to inner text prompt_text = resolve_tree(prompt_parts[0], keep_spacing=True) - # Try to detect a numeric token in args for a in args[::-1]: if isinstance(a, _l.Token) and getattr(a, "type", "") == "NUMBER": try: @@ -289,7 +284,6 @@ weight = 1.1 break else: - # Fallback: if second prompt-like exists, try to parse it as a number if len(prompt_parts) > 1: wt_str = resolve_tree(prompt_parts[1], keep_spacing=True) try: @@ -738,27 +732,29 @@ return [[self.steps, self.prefix + choice + self.suffix]] + def visit_alternate_distinct(self, tree): + import lark as _l options = [] for child in tree.children: - if isinstance(child, lark.Token) and child.type == "WHITESPACE": + if isinstance(child, _l.Token): + tok = str(child).strip() + if tok and all(ch in "[]!" for ch in tok): + continue + if tok.strip(" ,|"): + options.append([tok.strip(" ,|")]) continue child_schedules = self.visit(child) - child_options = [ - sched[1].strip(" ,|") - for sched in child_schedules - if sched[1].strip(" ,|") - ] - options.append( - child_options - or [resolve_tree(child, keep_spacing=True).strip(" ,|")] - ) - - # сплющиваем все варианты + child_options = [sched[1].strip(" ,|") for sched in child_schedules if sched[1].strip(" ,|")] + if child_options: + options.append(child_options) + else: + txt = resolve_tree(child, keep_spacing=True).strip(" ,|") + if txt: + options.append([txt]) flat = [opt for group in options for opt in group] if not flat: return [[self.steps, self.prefix + "empty_prompt" + self.suffix]] - selected = self.rng.choice(flat) return [[self.steps, self.prefix + selected + self.suffix]] @@ -873,17 +869,16 @@ text = " and ".join(resolve_tree(c, keep_spacing=True) for c in tree.children if resolve_tree(c, keep_spacing=True)) return [[self.steps, self.prefix + text + self.suffix]] + def visit_emphasized(self, tree): - # Robust parse of "(text[:weight])" regardless of how Earley grouped children import lark as _l children = list(tree.children) prompt_text = "" weight = 1.1 - # Case A: compact form -> children like [prompt, NUMBER] (no literal tokens) + # компактная форма: [prompt, NUMBER] или [prompt, prompt-as-number] if len(children) == 2: prompt_text = resolve_tree(children[0], keep_spacing=True) - # second can be NUMBER token or prompt with digits if isinstance(children[1], _l.Token) and getattr(children[1], "type", "") == "NUMBER": try: weight = float(children[1]) @@ -895,14 +890,11 @@ weight = float(wt_str) except Exception: weight = 1.1 - - # Case B: verbose form -> children like ['(', prompt, ':', prompt/NUMBER, ')'] else: - # pick first prompt as inner text + # подробная форма с литералами: '(', prompt, ':', prompt/NUMBER, ')' prompts = [ch for ch in children if isinstance(ch, _l.Tree) and getattr(ch, "data", None) == "prompt"] if prompts: prompt_text = resolve_tree(prompts[0], keep_spacing=True) - # pick second prompt (if any) as weight, else NUMBER token after ':' wt_node = prompts[1] if len(prompts) > 1 else next((ch for ch in children if isinstance(ch, _l.Token) and getattr(ch, "type", "") == "NUMBER"), None) if wt_node is not None: wt_str = resolve_tree(wt_node, keep_spacing=True) if isinstance(wt_node, _l.Tree) else str(wt_node) @@ -1051,7 +1043,6 @@ boundary = clamp(boundary_f if boundary_f > 1.0 else boundary_f * steps) if len(prompts) == 1: return [[boundary, f"{pre}{post}"], [steps, f"{pre}{prompts[0]}{post}"]] - # Special-case: exactly two prompts -> use boundary as first segment end directly if len(prompts) == 2: return [[boundary, f"{pre}{prompts[0]}{post}"], [steps, f"{pre}{prompts[1]}{post}"]] if prompts: