File size: 5,492 Bytes
cb099ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
--- prompt_parser_patched (4).py

+++ prompt_parser_patched (5).py

@@ -268,19 +268,14 @@

             return self.rng.choice(elements) if elements else "empty_prompt"
         return f"[{' | '.join(elements)}]"
     
+
     def emphasized(self, args):
-        # args may include literal punctuation in some parser configurations;
-        # extract inner prompt and weight robustly.
         import lark as _l
         prompt_text = ""
         weight = 1.1
-        # Try typical forms: [prompt], [prompt, NUMBER], or with literals mixed in
-        # Collect prompt-like parts
         prompt_parts = [a for a in args if isinstance(a, _l.Tree) or isinstance(a, str)]
         if prompt_parts:
-            # first prompt-like resolves to inner text
             prompt_text = resolve_tree(prompt_parts[0], keep_spacing=True)
-        # Try to detect a numeric token in args
         for a in args[::-1]:
             if isinstance(a, _l.Token) and getattr(a, "type", "") == "NUMBER":
                 try:
@@ -289,7 +284,6 @@

                     weight = 1.1
                 break
         else:
-            # Fallback: if second prompt-like exists, try to parse it as a number
             if len(prompt_parts) > 1:
                 wt_str = resolve_tree(prompt_parts[1], keep_spacing=True)
                 try:
@@ -738,27 +732,29 @@

             return [[self.steps, self.prefix + choice + self.suffix]]
 
 
+
     def visit_alternate_distinct(self, tree):
+        import lark as _l
         options = []
         for child in tree.children:
-            if isinstance(child, lark.Token) and child.type == "WHITESPACE":
+            if isinstance(child, _l.Token):
+                tok = str(child).strip()
+                if tok and all(ch in "[]!" for ch in tok):
+                    continue
+                if tok.strip(" ,|"):
+                    options.append([tok.strip(" ,|")])
                 continue
             child_schedules = self.visit(child)
-            child_options = [
-                sched[1].strip(" ,|")
-                for sched in child_schedules
-                if sched[1].strip(" ,|")
-            ]
-            options.append(
-                child_options
-                or [resolve_tree(child, keep_spacing=True).strip(" ,|")]
-            )
-
-        # сплющиваем все варианты
+            child_options = [sched[1].strip(" ,|") for sched in child_schedules if sched[1].strip(" ,|")]
+            if child_options:
+                options.append(child_options)
+            else:
+                txt = resolve_tree(child, keep_spacing=True).strip(" ,|")
+                if txt:
+                    options.append([txt])
         flat = [opt for group in options for opt in group]
         if not flat:
             return [[self.steps, self.prefix + "empty_prompt" + self.suffix]]
-
         selected = self.rng.choice(flat)
         return [[self.steps, self.prefix + selected + self.suffix]]
 
@@ -873,17 +869,16 @@

         text = " and ".join(resolve_tree(c, keep_spacing=True) for c in tree.children if resolve_tree(c, keep_spacing=True))
         return [[self.steps, self.prefix + text + self.suffix]]
 
+
     def visit_emphasized(self, tree):
-        # Robust parse of "(text[:weight])" regardless of how Earley grouped children
         import lark as _l
         children = list(tree.children)
         prompt_text = ""
         weight = 1.1
 
-        # Case A: compact form -> children like [prompt, NUMBER] (no literal tokens)
+        # компактная форма: [prompt, NUMBER] или [prompt, prompt-as-number]
         if len(children) == 2:
             prompt_text = resolve_tree(children[0], keep_spacing=True)
-            # second can be NUMBER token or prompt with digits
             if isinstance(children[1], _l.Token) and getattr(children[1], "type", "") == "NUMBER":
                 try:
                     weight = float(children[1])
@@ -895,14 +890,11 @@

                     weight = float(wt_str)
                 except Exception:
                     weight = 1.1
-
-        # Case B: verbose form -> children like ['(', prompt, ':', prompt/NUMBER, ')']
         else:
-            # pick first prompt as inner text
+            # подробная форма с литералами: '(', prompt, ':', prompt/NUMBER, ')'
             prompts = [ch for ch in children if isinstance(ch, _l.Tree) and getattr(ch, "data", None) == "prompt"]
             if prompts:
                 prompt_text = resolve_tree(prompts[0], keep_spacing=True)
-            # pick second prompt (if any) as weight, else NUMBER token after ':'
             wt_node = prompts[1] if len(prompts) > 1 else next((ch for ch in children if isinstance(ch, _l.Token) and getattr(ch, "type", "") == "NUMBER"), None)
             if wt_node is not None:
                 wt_str = resolve_tree(wt_node, keep_spacing=True) if isinstance(wt_node, _l.Tree) else str(wt_node)
@@ -1051,7 +1043,6 @@

                 boundary = clamp(boundary_f if boundary_f > 1.0 else boundary_f * steps)
                 if len(prompts) == 1:
                     return [[boundary, f"{pre}{post}"], [steps, f"{pre}{prompts[0]}{post}"]]
-                # Special-case: exactly two prompts -> use boundary as first segment end directly
                 if len(prompts) == 2:
                     return [[boundary, f"{pre}{prompts[0]}{post}"], [steps, f"{pre}{prompts[1]}{post}"]]
                 if prompts: