Spaces:

jesshewyz
/

QuotationChatbot_v5

Runtime error

QuotationChatbot_v5 / test_config_parser.py

jess

add: WORKING load_config_from_db, ConfigParser that uses variables instead of text

05a7853 about 1 year ago

7.31 kB

	import re
	from typing import Dict, Optional
	from dataclasses import dataclass
	from enum import Enum
	# from prompt_configs import *

	# SAMPLE
	class ModelType(Enum):
	O1_MINI = "O1_MINI"
	GPT_4O_MINI = "GPT_4O_MINI"
	class UIComponentType(Enum):
	TEXTBOX = "textbox"
	MARKDOWN = "markdown"
	DATAFRAME = "dataframe"

	@dataclass
	class UIConfig:
	component_type: str
	label: str
	default_value: Optional[str]
	visible: bool
	interactive: bool
	lines: Optional[int]
	description: str
	show_copy_button: bool
	elem_classes: Optional[list]

	@dataclass
	class PromptConfig:
	prompt: str
	description: str
	step: str
	sub_step: str
	inputs: list
	outputs: list
	model: ModelType
	thoughts: Optional[list]
	ui: Dict[str, UIConfig]

	safe_globals = {
	"PromptConfig": PromptConfig,
	"ModelType": ModelType,
	"UIConfig": UIConfig,
	"UIComponentType": UIComponentType,
	}

	class ConfigParser:
	def __init__(self, file_path: str):
	self.file_path = file_path

	def clean_enum_values(self, text: str) -> str:
	"""Clean enum values in the text to make them evaluatable"""
	# Replace enum patterns like <TEXTBOX: 'textbox'> with just the enum name
	enum_pattern = r'<(\w+):\s*\'[^\']+\'>'
	return re.sub(enum_pattern, r'UIComponentType.\1', text)

	def parse_config(self) -> Dict[str, PromptConfig]:
	"""Parse the config file and return a dictionary of PromptConfig objects"""
	print("Starting parse_config...")
	with open(self.file_path, 'r', encoding='utf-8') as f:
	content = f.read()
	print(f"Read {len(content)} characters from file")

	# Split content into individual prompt configs
	# prompt_pattern = r'"([^"]+)":\sPromptConfig$\sprompt="""([^"]?)""",\sdescription="([^"])",\sstep="([^"])",\ssub_step="([^"])",\sinputs=\[([^\]])\],\soutputs=\[([^\]])\],\smodel=([^,]),\sthoughts=\[([^\]])\],\sui=({[^}]+})\s*$'

	# # Option 1 : Match all configs by $$ marker
	# regex_pattern = r"\$\$(.*?)\$\$"

	# configs = {}
	# for i, match in enumerate(re.finditer(prompt_pattern, content, re.DOTALL)):
	# print(f"Processing config {i+1} of {len(configs)}")

	# print("Using regex pattern to find prompt configs...")

	# Option 2 : Matches configs, without $$ markers therefor very unstable
	# prompt_pattern = (
	# r'"([^"]+)":\s*' # Key
	# r'PromptConfig\(\s*' # PromptConfig start
	# r'prompt="""(.?)""",\s' # Prompt text (non-greedy match)
	# r'(?:description="([^"])",\s)?' # Optional description
	# r'(?:step="([^"])",\s)?' # Optional step
	# r'(?:sub_step="([^"])",\s)?' # Optional sub_step
	# r'(?:inputs=\[(.?)\],\s)?' # Optional inputs list
	# r'(?:outputs=\[(.?)\],\s)?' # Optional outputs list
	# r'(?:model=([^,\s]),\s)?' # Optional model
	# r'(?:thoughts=\[(.?)\],\s)?' # Optional thoughts list
	# r'(?:ui=({[^}]+})\s*)?' # Optional UI config
	# r'\)' # PromptConfig end
	# )
	# configs = {}

	# for match in re.finditer(prompt_pattern, content, re.DOTALL):
	# key = match.group(1)
	# print(f"\nProcessing config for key: {key}")

	# prompt = match.group(2).strip()
	# description = match.group(3)
	# step = match.group(4)
	# sub_step = match.group(5)
	# print(f"Found description: {description}, step: {step}, sub_step: {sub_step}")

	# # Parse lists
	# inputs = [x.strip("'") for x in match.group(6).split(",") if x.strip()]
	# outputs = [x.strip("'") for x in match.group(7).split(",") if x.strip()]
	# print(f"Parsed inputs: {inputs}")
	# print(f"Parsed outputs: {outputs}")

	# # Parse model type
	# model_str = match.group(8).strip()
	# model = ModelType[model_str.split(".")[-1]] if model_str else ModelType.O1_MINI
	# print(f"Using model: {model}")

	# # Parse thoughts
	# thoughts_str = match.group(9)
	# thoughts = [x.strip("'") for x in thoughts_str.split(",") if x.strip()] if thoughts_str else None
	# print(f"Found {len(thoughts) if thoughts else 0} thoughts")

	# # Parse UI config
	# ui_text = match.group(10)
	# print("Parsing UI config...")
	# ui = self.parse_ui_config(ui_text)
	# print(f"Found {len(ui)} UI components")

	# configs[key] = PromptConfig(
	# prompt=prompt,
	# description=description,
	# step=step,
	# sub_step=sub_step,
	# inputs=inputs,
	# outputs=outputs,
	# model=model,
	# thoughts=thoughts,
	# ui=ui
	# )
	# print(f"Successfully created PromptConfig for {key}")
	# return configs

	# Option 3, Block and ast method

	# Extract each block wrapped by $$ markers.
	block_pattern = r"\$\$(.*?)\$\$"
	blocks = re.findall(block_pattern, content, re.DOTALL)
	print(f"Found {len(blocks)} config blocks.")

	configs = {}
	error_log_path = 'config_parser_errors.txt'
	with open(error_log_path, 'w') as error_log:
	for i, block in enumerate(blocks, 1):
	block = block.strip()
	dict_str = "{" + block + "}"
	try:
	# Clean the enum values before evaluation
	cleaned_dict_str = self.clean_enum_values(dict_str)
	config_dict = eval(cleaned_dict_str, safe_globals)
	configs.update(config_dict)
	key = list(config_dict.keys())[0]
	print(f"Block {i}: Successfully parsed config for key: {key}")
	except Exception as e:
	error_log.write(f"Block {i}: Error evaluating block: {e}\n")
	error_log.write("Block content:\n")
	error_log.write(dict_str + "\n")
	error_log.write("-" * 40 + "\n")
	print(f"\nFinished parsing {len(configs)} configs")
	return configs

	# Example usage
	# Option 1 : with $$ markers
	parser = ConfigParser('db_config_log_20250326_195828.txt')

	# Option 2 : without $$ markers
	# parser = ConfigParser('db_config_log_20250326_193344.txt')
	try:
	config = parser.parse_config()
	# print(f"Config: {config}")
	# Print total number of configs parsed
	print(f"Total configs parsed: {len(config)}")
	questioning_agent = config.get('questioning_agent')
	print(f"Questioning Agent: {questioning_agent}")
	# if questioning_agent:
	# print("Prompt:", questioning_agent.prompt[:100])
	# print("Model:", questioning_agent.model)
	# print("Inputs:", questioning_agent.inputs)

	except Exception as e:
	print(f"Error parsing config: {str(e)}")