File size: 7,306 Bytes
f4c5608
 
 
 
05a7853
f4c5608
05a7853
f4c5608
 
 
4abfd20
 
 
 
f4c5608
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4abfd20
 
 
 
 
 
 
f4c5608
 
 
 
4abfd20
 
 
 
 
 
f4c5608
 
 
 
 
 
 
 
 
54e6475
 
 
f4c5608
54e6475
 
 
f4c5608
 
 
54e6475
4abfd20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54e6475
4abfd20
 
 
f4c5608
4abfd20
 
 
 
 
f4c5608
4abfd20
 
 
 
 
f4c5608
4abfd20
 
 
 
f4c5608
4abfd20
 
 
 
f4c5608
4abfd20
 
 
 
 
f4c5608
4abfd20
 
 
 
 
 
 
 
 
 
 
 
 
f4c5608
4abfd20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4c5608
 
4abfd20
 
54e6475
4abfd20
 
f4c5608
 
 
 
 
4abfd20
 
f4c5608
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import re
from typing import Dict, Optional
from dataclasses import dataclass
from enum import Enum
# from prompt_configs import *

# SAMPLE
class ModelType(Enum):
    O1_MINI = "O1_MINI"
    GPT_4O_MINI = "GPT_4O_MINI"
class UIComponentType(Enum):
    TEXTBOX = "textbox"
    MARKDOWN = "markdown"
    DATAFRAME = "dataframe"

@dataclass
class UIConfig:
    component_type: str
    label: str
    default_value: Optional[str]
    visible: bool
    interactive: bool
    lines: Optional[int]
    description: str
    show_copy_button: bool
    elem_classes: Optional[list]

@dataclass
class PromptConfig:
    prompt: str
    description: str
    step: str
    sub_step: str
    inputs: list
    outputs: list
    model: ModelType
    thoughts: Optional[list]
    ui: Dict[str, UIConfig]

safe_globals = {
"PromptConfig": PromptConfig,
"ModelType": ModelType,
"UIConfig": UIConfig,
"UIComponentType": UIComponentType,
}

class ConfigParser:
    def __init__(self, file_path: str):
        self.file_path = file_path

    def clean_enum_values(self, text: str) -> str:
        """Clean enum values in the text to make them evaluatable"""
        # Replace enum patterns like <TEXTBOX: 'textbox'> with just the enum name
        enum_pattern = r'<(\w+):\s*\'[^\']+\'>'
        return re.sub(enum_pattern, r'UIComponentType.\1', text)

    def parse_config(self) -> Dict[str, PromptConfig]:
        """Parse the config file and return a dictionary of PromptConfig objects"""
        print("Starting parse_config...")
        with open(self.file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        print(f"Read {len(content)} characters from file")

        # Split content into individual prompt configs
        # prompt_pattern = r'"([^"]+)":\s*PromptConfig\(\s*prompt="""([^"]*?)""",\s*description="([^"]*)",\s*step="([^"]*)",\s*sub_step="([^"]*)",\s*inputs=\[([^\]]*)\],\s*outputs=\[([^\]]*)\],\s*model=([^,]*),\s*thoughts=\[([^\]]*)\],\s*ui=({[^}]+})\s*\)'
       
        # # Option 1 : Match all configs by $$ marker
        # regex_pattern = r"\$\$(.*?)\$\$"

        # configs = {}
        # for i, match in enumerate(re.finditer(prompt_pattern, content, re.DOTALL)):
        #     print(f"Processing config {i+1} of {len(configs)}")

        # print("Using regex pattern to find prompt configs...")
        
        # Option 2 : Matches configs, without $$ markers therefor very unstable
        # prompt_pattern = (
        #     r'"([^"]+)":\s*'                    # Key
        #     r'PromptConfig\(\s*'                # PromptConfig start
        #     r'prompt="""(.*?)""",\s*'           # Prompt text (non-greedy match)
        #     r'(?:description="([^"]*)",\s*)?'   # Optional description
        #     r'(?:step="([^"]*)",\s*)?'         # Optional step
        #     r'(?:sub_step="([^"]*)",\s*)?'     # Optional sub_step
        #     r'(?:inputs=\[(.*?)\],\s*)?'       # Optional inputs list
        #     r'(?:outputs=\[(.*?)\],\s*)?'      # Optional outputs list
        #     r'(?:model=([^,\s]*),\s*)?'        # Optional model
        #     r'(?:thoughts=\[(.*?)\],\s*)?'     # Optional thoughts list
        #     r'(?:ui=({[^}]+})\s*)?'           # Optional UI config
        #     r'\)'                               # PromptConfig end
        # )
        # configs = {}

        # for match in re.finditer(prompt_pattern, content, re.DOTALL):
        #     key = match.group(1)
        #     print(f"\nProcessing config for key: {key}")
            
        #     prompt = match.group(2).strip()
        #     description = match.group(3)
        #     step = match.group(4)
        #     sub_step = match.group(5)
        #     print(f"Found description: {description}, step: {step}, sub_step: {sub_step}")
            
        #     # Parse lists
        #     inputs = [x.strip("'") for x in match.group(6).split(",") if x.strip()]
        #     outputs = [x.strip("'") for x in match.group(7).split(",") if x.strip()]
        #     print(f"Parsed inputs: {inputs}")
        #     print(f"Parsed outputs: {outputs}")
            
        #     # Parse model type
        #     model_str = match.group(8).strip()
        #     model = ModelType[model_str.split(".")[-1]] if model_str else ModelType.O1_MINI
        #     print(f"Using model: {model}")
            
        #     # Parse thoughts
        #     thoughts_str = match.group(9)
        #     thoughts = [x.strip("'") for x in thoughts_str.split(",") if x.strip()] if thoughts_str else None
        #     print(f"Found {len(thoughts) if thoughts else 0} thoughts")
            
        #     # Parse UI config
        #     ui_text = match.group(10)
        #     print("Parsing UI config...")
        #     ui = self.parse_ui_config(ui_text)
        #     print(f"Found {len(ui)} UI components")
            
        #     configs[key] = PromptConfig(
        #         prompt=prompt,
        #         description=description,
        #         step=step,
        #         sub_step=sub_step,
        #         inputs=inputs,
        #         outputs=outputs,
        #         model=model,
        #         thoughts=thoughts,
        #         ui=ui
        #     )
        #     print(f"Successfully created PromptConfig for {key}")
        # return configs

        # Option 3, Block and ast method

        # Extract each block wrapped by $$ markers.
        block_pattern = r"\$\$(.*?)\$\$"
        blocks = re.findall(block_pattern, content, re.DOTALL)
        print(f"Found {len(blocks)} config blocks.")

        configs = {}
        error_log_path = 'config_parser_errors.txt'
        with open(error_log_path, 'w') as error_log:
            for i, block in enumerate(blocks, 1):
                block = block.strip()
                dict_str = "{" + block + "}"
                try:
                    # Clean the enum values before evaluation
                    cleaned_dict_str = self.clean_enum_values(dict_str)
                    config_dict = eval(cleaned_dict_str, safe_globals)
                    configs.update(config_dict)
                    key = list(config_dict.keys())[0]
                    print(f"Block {i}: Successfully parsed config for key: {key}")
                except Exception as e:
                    error_log.write(f"Block {i}: Error evaluating block: {e}\n")
                    error_log.write("Block content:\n")
                    error_log.write(dict_str + "\n")
                    error_log.write("-" * 40 + "\n")
        print(f"\nFinished parsing {len(configs)} configs")
        return configs

# Example usage
# Option 1 : with $$ markers
parser = ConfigParser('db_config_log_20250326_195828.txt')

# Option 2 : without $$ markers
# parser = ConfigParser('db_config_log_20250326_193344.txt')
try:
    config = parser.parse_config()
    # print(f"Config: {config}")
    # Print total number of configs parsed
    print(f"Total configs parsed: {len(config)}")
    questioning_agent = config.get('questioning_agent')
    print(f"Questioning Agent: {questioning_agent}")
    # if questioning_agent:
    #     print("Prompt:", questioning_agent.prompt[:100])
    #     print("Model:", questioning_agent.model)
    #     print("Inputs:", questioning_agent.inputs)
        
except Exception as e:
    print(f"Error parsing config: {str(e)}")