import re from typing import Dict, Optional from dataclasses import dataclass from enum import Enum # from prompt_configs import * # SAMPLE class ModelType(Enum): O1_MINI = "O1_MINI" GPT_4O_MINI = "GPT_4O_MINI" class UIComponentType(Enum): TEXTBOX = "textbox" MARKDOWN = "markdown" DATAFRAME = "dataframe" @dataclass class UIConfig: component_type: str label: str default_value: Optional[str] visible: bool interactive: bool lines: Optional[int] description: str show_copy_button: bool elem_classes: Optional[list] @dataclass class PromptConfig: prompt: str description: str step: str sub_step: str inputs: list outputs: list model: ModelType thoughts: Optional[list] ui: Dict[str, UIConfig] safe_globals = { "PromptConfig": PromptConfig, "ModelType": ModelType, "UIConfig": UIConfig, "UIComponentType": UIComponentType, } class ConfigParser: def __init__(self, file_path: str): self.file_path = file_path def clean_enum_values(self, text: str) -> str: """Clean enum values in the text to make them evaluatable""" # Replace enum patterns like with just the enum name enum_pattern = r'<(\w+):\s*\'[^\']+\'>' return re.sub(enum_pattern, r'UIComponentType.\1', text) def parse_config(self) -> Dict[str, PromptConfig]: """Parse the config file and return a dictionary of PromptConfig objects""" print("Starting parse_config...") with open(self.file_path, 'r', encoding='utf-8') as f: content = f.read() print(f"Read {len(content)} characters from file") # Split content into individual prompt configs # prompt_pattern = r'"([^"]+)":\s*PromptConfig\(\s*prompt="""([^"]*?)""",\s*description="([^"]*)",\s*step="([^"]*)",\s*sub_step="([^"]*)",\s*inputs=\[([^\]]*)\],\s*outputs=\[([^\]]*)\],\s*model=([^,]*),\s*thoughts=\[([^\]]*)\],\s*ui=({[^}]+})\s*\)' # # Option 1 : Match all configs by $$ marker # regex_pattern = r"\$\$(.*?)\$\$" # configs = {} # for i, match in enumerate(re.finditer(prompt_pattern, content, re.DOTALL)): # print(f"Processing config {i+1} of {len(configs)}") # print("Using regex pattern to find prompt configs...") # Option 2 : Matches configs, without $$ markers therefor very unstable # prompt_pattern = ( # r'"([^"]+)":\s*' # Key # r'PromptConfig\(\s*' # PromptConfig start # r'prompt="""(.*?)""",\s*' # Prompt text (non-greedy match) # r'(?:description="([^"]*)",\s*)?' # Optional description # r'(?:step="([^"]*)",\s*)?' # Optional step # r'(?:sub_step="([^"]*)",\s*)?' # Optional sub_step # r'(?:inputs=\[(.*?)\],\s*)?' # Optional inputs list # r'(?:outputs=\[(.*?)\],\s*)?' # Optional outputs list # r'(?:model=([^,\s]*),\s*)?' # Optional model # r'(?:thoughts=\[(.*?)\],\s*)?' # Optional thoughts list # r'(?:ui=({[^}]+})\s*)?' # Optional UI config # r'\)' # PromptConfig end # ) # configs = {} # for match in re.finditer(prompt_pattern, content, re.DOTALL): # key = match.group(1) # print(f"\nProcessing config for key: {key}") # prompt = match.group(2).strip() # description = match.group(3) # step = match.group(4) # sub_step = match.group(5) # print(f"Found description: {description}, step: {step}, sub_step: {sub_step}") # # Parse lists # inputs = [x.strip("'") for x in match.group(6).split(",") if x.strip()] # outputs = [x.strip("'") for x in match.group(7).split(",") if x.strip()] # print(f"Parsed inputs: {inputs}") # print(f"Parsed outputs: {outputs}") # # Parse model type # model_str = match.group(8).strip() # model = ModelType[model_str.split(".")[-1]] if model_str else ModelType.O1_MINI # print(f"Using model: {model}") # # Parse thoughts # thoughts_str = match.group(9) # thoughts = [x.strip("'") for x in thoughts_str.split(",") if x.strip()] if thoughts_str else None # print(f"Found {len(thoughts) if thoughts else 0} thoughts") # # Parse UI config # ui_text = match.group(10) # print("Parsing UI config...") # ui = self.parse_ui_config(ui_text) # print(f"Found {len(ui)} UI components") # configs[key] = PromptConfig( # prompt=prompt, # description=description, # step=step, # sub_step=sub_step, # inputs=inputs, # outputs=outputs, # model=model, # thoughts=thoughts, # ui=ui # ) # print(f"Successfully created PromptConfig for {key}") # return configs # Option 3, Block and ast method # Extract each block wrapped by $$ markers. block_pattern = r"\$\$(.*?)\$\$" blocks = re.findall(block_pattern, content, re.DOTALL) print(f"Found {len(blocks)} config blocks.") configs = {} error_log_path = 'config_parser_errors.txt' with open(error_log_path, 'w') as error_log: for i, block in enumerate(blocks, 1): block = block.strip() dict_str = "{" + block + "}" try: # Clean the enum values before evaluation cleaned_dict_str = self.clean_enum_values(dict_str) config_dict = eval(cleaned_dict_str, safe_globals) configs.update(config_dict) key = list(config_dict.keys())[0] print(f"Block {i}: Successfully parsed config for key: {key}") except Exception as e: error_log.write(f"Block {i}: Error evaluating block: {e}\n") error_log.write("Block content:\n") error_log.write(dict_str + "\n") error_log.write("-" * 40 + "\n") print(f"\nFinished parsing {len(configs)} configs") return configs # Example usage # Option 1 : with $$ markers parser = ConfigParser('db_config_log_20250326_195828.txt') # Option 2 : without $$ markers # parser = ConfigParser('db_config_log_20250326_193344.txt') try: config = parser.parse_config() # print(f"Config: {config}") # Print total number of configs parsed print(f"Total configs parsed: {len(config)}") questioning_agent = config.get('questioning_agent') print(f"Questioning Agent: {questioning_agent}") # if questioning_agent: # print("Prompt:", questioning_agent.prompt[:100]) # print("Model:", questioning_agent.model) # print("Inputs:", questioning_agent.inputs) except Exception as e: print(f"Error parsing config: {str(e)}")