QuotationChatbot_v5 / test_config_parser.py
jess
add: WORKING load_config_from_db, ConfigParser that uses variables instead of text
05a7853
import re
from typing import Dict, Optional
from dataclasses import dataclass
from enum import Enum
# from prompt_configs import *
# SAMPLE
class ModelType(Enum):
O1_MINI = "O1_MINI"
GPT_4O_MINI = "GPT_4O_MINI"
class UIComponentType(Enum):
TEXTBOX = "textbox"
MARKDOWN = "markdown"
DATAFRAME = "dataframe"
@dataclass
class UIConfig:
component_type: str
label: str
default_value: Optional[str]
visible: bool
interactive: bool
lines: Optional[int]
description: str
show_copy_button: bool
elem_classes: Optional[list]
@dataclass
class PromptConfig:
prompt: str
description: str
step: str
sub_step: str
inputs: list
outputs: list
model: ModelType
thoughts: Optional[list]
ui: Dict[str, UIConfig]
safe_globals = {
"PromptConfig": PromptConfig,
"ModelType": ModelType,
"UIConfig": UIConfig,
"UIComponentType": UIComponentType,
}
class ConfigParser:
def __init__(self, file_path: str):
self.file_path = file_path
def clean_enum_values(self, text: str) -> str:
"""Clean enum values in the text to make them evaluatable"""
# Replace enum patterns like <TEXTBOX: 'textbox'> with just the enum name
enum_pattern = r'<(\w+):\s*\'[^\']+\'>'
return re.sub(enum_pattern, r'UIComponentType.\1', text)
def parse_config(self) -> Dict[str, PromptConfig]:
"""Parse the config file and return a dictionary of PromptConfig objects"""
print("Starting parse_config...")
with open(self.file_path, 'r', encoding='utf-8') as f:
content = f.read()
print(f"Read {len(content)} characters from file")
# Split content into individual prompt configs
# prompt_pattern = r'"([^"]+)":\s*PromptConfig\(\s*prompt="""([^"]*?)""",\s*description="([^"]*)",\s*step="([^"]*)",\s*sub_step="([^"]*)",\s*inputs=\[([^\]]*)\],\s*outputs=\[([^\]]*)\],\s*model=([^,]*),\s*thoughts=\[([^\]]*)\],\s*ui=({[^}]+})\s*\)'
# # Option 1 : Match all configs by $$ marker
# regex_pattern = r"\$\$(.*?)\$\$"
# configs = {}
# for i, match in enumerate(re.finditer(prompt_pattern, content, re.DOTALL)):
# print(f"Processing config {i+1} of {len(configs)}")
# print("Using regex pattern to find prompt configs...")
# Option 2 : Matches configs, without $$ markers therefor very unstable
# prompt_pattern = (
# r'"([^"]+)":\s*' # Key
# r'PromptConfig\(\s*' # PromptConfig start
# r'prompt="""(.*?)""",\s*' # Prompt text (non-greedy match)
# r'(?:description="([^"]*)",\s*)?' # Optional description
# r'(?:step="([^"]*)",\s*)?' # Optional step
# r'(?:sub_step="([^"]*)",\s*)?' # Optional sub_step
# r'(?:inputs=\[(.*?)\],\s*)?' # Optional inputs list
# r'(?:outputs=\[(.*?)\],\s*)?' # Optional outputs list
# r'(?:model=([^,\s]*),\s*)?' # Optional model
# r'(?:thoughts=\[(.*?)\],\s*)?' # Optional thoughts list
# r'(?:ui=({[^}]+})\s*)?' # Optional UI config
# r'\)' # PromptConfig end
# )
# configs = {}
# for match in re.finditer(prompt_pattern, content, re.DOTALL):
# key = match.group(1)
# print(f"\nProcessing config for key: {key}")
# prompt = match.group(2).strip()
# description = match.group(3)
# step = match.group(4)
# sub_step = match.group(5)
# print(f"Found description: {description}, step: {step}, sub_step: {sub_step}")
# # Parse lists
# inputs = [x.strip("'") for x in match.group(6).split(",") if x.strip()]
# outputs = [x.strip("'") for x in match.group(7).split(",") if x.strip()]
# print(f"Parsed inputs: {inputs}")
# print(f"Parsed outputs: {outputs}")
# # Parse model type
# model_str = match.group(8).strip()
# model = ModelType[model_str.split(".")[-1]] if model_str else ModelType.O1_MINI
# print(f"Using model: {model}")
# # Parse thoughts
# thoughts_str = match.group(9)
# thoughts = [x.strip("'") for x in thoughts_str.split(",") if x.strip()] if thoughts_str else None
# print(f"Found {len(thoughts) if thoughts else 0} thoughts")
# # Parse UI config
# ui_text = match.group(10)
# print("Parsing UI config...")
# ui = self.parse_ui_config(ui_text)
# print(f"Found {len(ui)} UI components")
# configs[key] = PromptConfig(
# prompt=prompt,
# description=description,
# step=step,
# sub_step=sub_step,
# inputs=inputs,
# outputs=outputs,
# model=model,
# thoughts=thoughts,
# ui=ui
# )
# print(f"Successfully created PromptConfig for {key}")
# return configs
# Option 3, Block and ast method
# Extract each block wrapped by $$ markers.
block_pattern = r"\$\$(.*?)\$\$"
blocks = re.findall(block_pattern, content, re.DOTALL)
print(f"Found {len(blocks)} config blocks.")
configs = {}
error_log_path = 'config_parser_errors.txt'
with open(error_log_path, 'w') as error_log:
for i, block in enumerate(blocks, 1):
block = block.strip()
dict_str = "{" + block + "}"
try:
# Clean the enum values before evaluation
cleaned_dict_str = self.clean_enum_values(dict_str)
config_dict = eval(cleaned_dict_str, safe_globals)
configs.update(config_dict)
key = list(config_dict.keys())[0]
print(f"Block {i}: Successfully parsed config for key: {key}")
except Exception as e:
error_log.write(f"Block {i}: Error evaluating block: {e}\n")
error_log.write("Block content:\n")
error_log.write(dict_str + "\n")
error_log.write("-" * 40 + "\n")
print(f"\nFinished parsing {len(configs)} configs")
return configs
# Example usage
# Option 1 : with $$ markers
parser = ConfigParser('db_config_log_20250326_195828.txt')
# Option 2 : without $$ markers
# parser = ConfigParser('db_config_log_20250326_193344.txt')
try:
config = parser.parse_config()
# print(f"Config: {config}")
# Print total number of configs parsed
print(f"Total configs parsed: {len(config)}")
questioning_agent = config.get('questioning_agent')
print(f"Questioning Agent: {questioning_agent}")
# if questioning_agent:
# print("Prompt:", questioning_agent.prompt[:100])
# print("Model:", questioning_agent.model)
# print("Inputs:", questioning_agent.inputs)
except Exception as e:
print(f"Error parsing config: {str(e)}")