Spaces:

jesshewyz
/

QuotationChatbot_v5

Runtime error

File size: 7,306 Bytes

import re
from typing import Dict, Optional
from dataclasses import dataclass
from enum import Enum
# from prompt_configs import *

# SAMPLE
class ModelType(Enum):
    O1_MINI = "O1_MINI"
    GPT_4O_MINI = "GPT_4O_MINI"
class UIComponentType(Enum):
    TEXTBOX = "textbox"
    MARKDOWN = "markdown"
    DATAFRAME = "dataframe"

@dataclass
class UIConfig:
    component_type: str
    label: str
    default_value: Optional[str]
    visible: bool
    interactive: bool
    lines: Optional[int]
    description: str
    show_copy_button: bool
    elem_classes: Optional[list]

@dataclass
class PromptConfig:
    prompt: str
    description: str
    step: str
    sub_step: str
    inputs: list
    outputs: list
    model: ModelType
    thoughts: Optional[list]
    ui: Dict[str, UIConfig]

safe_globals = {
"PromptConfig": PromptConfig,
"ModelType": ModelType,
"UIConfig": UIConfig,
"UIComponentType": UIComponentType,
}

class ConfigParser:
    def __init__(self, file_path: str):
        self.file_path = file_path

    def clean_enum_values(self, text: str) -> str:
        """Clean enum values in the text to make them evaluatable"""
        # Replace enum patterns like <TEXTBOX: 'textbox'> with just the enum name
        enum_pattern = r'<(\w+):\s*\'[^\']+\'>'
        return re.sub(enum_pattern, r'UIComponentType.\1', text)

    def parse_config(self) -> Dict[str, PromptConfig]:
        """Parse the config file and return a dictionary of PromptConfig objects"""
        print("Starting parse_config...")
        with open(self.file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        print(f"Read {len(content)} characters from file")

        # Split content into individual prompt configs
        # prompt_pattern = r'"([^"]+)":\s*PromptConfig\(\s*prompt="""([^"]*?)""",\s*description="([^"]*)",\s*step="([^"]*)",\s*sub_step="([^"]*)",\s*inputs=\[([^\]]*)\],\s*outputs=\[([^\]]*)\],\s*model=([^,]*),\s*thoughts=\[([^\]]*)\],\s*ui=({[^}]+})\s*\)'
       
        # # Option 1 : Match all configs by $$ marker
        # regex_pattern = r"\$\$(.*?)\$\$"

        # configs = {}
        # for i, match in enumerate(re.finditer(prompt_pattern, content, re.DOTALL)):
        #     print(f"Processing config {i+1} of {len(configs)}")

        # print("Using regex pattern to find prompt configs...")
        
        # Option 2 : Matches configs, without $$ markers therefor very unstable
        # prompt_pattern = (
        #     r'"([^"]+)":\s*'                    # Key
        #     r'PromptConfig\(\s*'                # PromptConfig start
        #     r'prompt="""(.*?)""",\s*'           # Prompt text (non-greedy match)
        #     r'(?:description="([^"]*)",\s*)?'   # Optional description
        #     r'(?:step="([^"]*)",\s*)?'         # Optional step
        #     r'(?:sub_step="([^"]*)",\s*)?'     # Optional sub_step
        #     r'(?:inputs=\[(.*?)\],\s*)?'       # Optional inputs list
        #     r'(?:outputs=\[(.*?)\],\s*)?'      # Optional outputs list
        #     r'(?:model=([^,\s]*),\s*)?'        # Optional model
        #     r'(?:thoughts=\[(.*?)\],\s*)?'     # Optional thoughts list
        #     r'(?:ui=({[^}]+})\s*)?'           # Optional UI config
        #     r'\)'                               # PromptConfig end
        # )
        # configs = {}

        # for match in re.finditer(prompt_pattern, content, re.DOTALL):
        #     key = match.group(1)
        #     print(f"\nProcessing config for key: {key}")
            
        #     prompt = match.group(2).strip()
        #     description = match.group(3)
        #     step = match.group(4)
        #     sub_step = match.group(5)
        #     print(f"Found description: {description}, step: {step}, sub_step: {sub_step}")
            
        #     # Parse lists
        #     inputs = [x.strip("'") for x in match.group(6).split(",") if x.strip()]
        #     outputs = [x.strip("'") for x in match.group(7).split(",") if x.strip()]
        #     print(f"Parsed inputs: {inputs}")
        #     print(f"Parsed outputs: {outputs}")
            
        #     # Parse model type
        #     model_str = match.group(8).strip()
        #     model = ModelType[model_str.split(".")[-1]] if model_str else ModelType.O1_MINI
        #     print(f"Using model: {model}")
            
        #     # Parse thoughts
        #     thoughts_str = match.group(9)
        #     thoughts = [x.strip("'") for x in thoughts_str.split(",") if x.strip()] if thoughts_str else None
        #     print(f"Found {len(thoughts) if thoughts else 0} thoughts")
            
        #     # Parse UI config
        #     ui_text = match.group(10)
        #     print("Parsing UI config...")
        #     ui = self.parse_ui_config(ui_text)
        #     print(f"Found {len(ui)} UI components")
            
        #     configs[key] = PromptConfig(
        #         prompt=prompt,
        #         description=description,
        #         step=step,
        #         sub_step=sub_step,
        #         inputs=inputs,
        #         outputs=outputs,
        #         model=model,
        #         thoughts=thoughts,
        #         ui=ui
        #     )
        #     print(f"Successfully created PromptConfig for {key}")
        # return configs

        # Option 3, Block and ast method

        # Extract each block wrapped by $$ markers.
        block_pattern = r"\$\$(.*?)\$\$"
        blocks = re.findall(block_pattern, content, re.DOTALL)
        print(f"Found {len(blocks)} config blocks.")

        configs = {}
        error_log_path = 'config_parser_errors.txt'
        with open(error_log_path, 'w') as error_log:
            for i, block in enumerate(blocks, 1):
                block = block.strip()
                dict_str = "{" + block + "}"
                try:
                    # Clean the enum values before evaluation
                    cleaned_dict_str = self.clean_enum_values(dict_str)
                    config_dict = eval(cleaned_dict_str, safe_globals)
                    configs.update(config_dict)
                    key = list(config_dict.keys())[0]
                    print(f"Block {i}: Successfully parsed config for key: {key}")
                except Exception as e:
                    error_log.write(f"Block {i}: Error evaluating block: {e}\n")
                    error_log.write("Block content:\n")
                    error_log.write(dict_str + "\n")
                    error_log.write("-" * 40 + "\n")
        print(f"\nFinished parsing {len(configs)} configs")
        return configs

# Example usage
# Option 1 : with $$ markers
parser = ConfigParser('db_config_log_20250326_195828.txt')

# Option 2 : without $$ markers
# parser = ConfigParser('db_config_log_20250326_193344.txt')
try:
    config = parser.parse_config()
    # print(f"Config: {config}")
    # Print total number of configs parsed
    print(f"Total configs parsed: {len(config)}")
    questioning_agent = config.get('questioning_agent')
    print(f"Questioning Agent: {questioning_agent}")
    # if questioning_agent:
    #     print("Prompt:", questioning_agent.prompt[:100])
    #     print("Model:", questioning_agent.model)
    #     print("Inputs:", questioning_agent.inputs)
        
except Exception as e:
    print(f"Error parsing config: {str(e)}")