Spaces:
Runtime error
Runtime error
jess commited on
Commit ·
4abfd20
1
Parent(s): 54e6475
add: WORKING config parser
Browse files- config_parser.py +104 -78
- config_parser_errors.txt +0 -0
config_parser.py
CHANGED
|
@@ -6,6 +6,10 @@ from enum import Enum
|
|
| 6 |
class ModelType(Enum):
|
| 7 |
O1_MINI = "O1_MINI"
|
| 8 |
GPT_4O_MINI = "GPT_4O_MINI"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
@dataclass
|
| 11 |
class UIConfig:
|
|
@@ -31,6 +35,13 @@ class PromptConfig:
|
|
| 31 |
thoughts: Optional[list]
|
| 32 |
ui: Dict[str, UIConfig]
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
class ConfigParser:
|
| 35 |
def __init__(self, file_path: str):
|
| 36 |
self.file_path = file_path
|
|
@@ -68,6 +79,12 @@ class ConfigParser:
|
|
| 68 |
|
| 69 |
return ui_configs
|
| 70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
def parse_config(self) -> Dict[str, PromptConfig]:
|
| 72 |
"""Parse the config file and return a dictionary of PromptConfig objects"""
|
| 73 |
print("Starting parse_config...")
|
|
@@ -77,21 +94,6 @@ class ConfigParser:
|
|
| 77 |
|
| 78 |
# Split content into individual prompt configs
|
| 79 |
# prompt_pattern = r'"([^"]+)":\s*PromptConfig\(\s*prompt="""([^"]*?)""",\s*description="([^"]*)",\s*step="([^"]*)",\s*sub_step="([^"]*)",\s*inputs=\[([^\]]*)\],\s*outputs=\[([^\]]*)\],\s*model=([^,]*),\s*thoughts=\[([^\]]*)\],\s*ui=({[^}]+})\s*\)'
|
| 80 |
-
# Build regex pattern to match PromptConfig structure with optional fields
|
| 81 |
-
prompt_pattern = (
|
| 82 |
-
r'"([^"]+)":\s*' # Key
|
| 83 |
-
r'PromptConfig\(\s*' # PromptConfig start
|
| 84 |
-
r'prompt="""(.*?)""",\s*' # Prompt text (non-greedy match)
|
| 85 |
-
r'(?:description="([^"]*)",\s*)?' # Optional description
|
| 86 |
-
r'(?:step="([^"]*)",\s*)?' # Optional step
|
| 87 |
-
r'(?:sub_step="([^"]*)",\s*)?' # Optional sub_step
|
| 88 |
-
r'(?:inputs=\[(.*?)\],\s*)?' # Optional inputs list
|
| 89 |
-
r'(?:outputs=\[(.*?)\],\s*)?' # Optional outputs list
|
| 90 |
-
r'(?:model=([^,\s]*),\s*)?' # Optional model
|
| 91 |
-
r'(?:thoughts=\[(.*?)\],\s*)?' # Optional thoughts list
|
| 92 |
-
r'(?:ui=({[^}]+})\s*)?' # Optional UI config
|
| 93 |
-
r'\)' # PromptConfig end
|
| 94 |
-
)
|
| 95 |
|
| 96 |
# # Option 1 : Match all configs by $$ marker
|
| 97 |
# regex_pattern = r"\$\$(.*?)\$\$"
|
|
@@ -103,85 +105,109 @@ class ConfigParser:
|
|
| 103 |
# print("Using regex pattern to find prompt configs...")
|
| 104 |
|
| 105 |
# Option 2 : Matches configs, without $$ markers therefor very unstable
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
-
for match in re.finditer(prompt_pattern, content, re.DOTALL):
|
| 109 |
-
|
| 110 |
-
|
| 111 |
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
return configs
|
| 153 |
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
|
| 173 |
# Example usage
|
| 174 |
-
# $$
|
| 175 |
-
|
| 176 |
|
| 177 |
-
|
|
|
|
| 178 |
try:
|
| 179 |
config = parser.parse_config()
|
| 180 |
# print(f"Config: {config}")
|
| 181 |
# Print total number of configs parsed
|
| 182 |
print(f"Total configs parsed: {len(config)}")
|
| 183 |
-
|
| 184 |
-
|
| 185 |
# if questioning_agent:
|
| 186 |
# print("Prompt:", questioning_agent.prompt[:100])
|
| 187 |
# print("Model:", questioning_agent.model)
|
|
|
|
| 6 |
class ModelType(Enum):
|
| 7 |
O1_MINI = "O1_MINI"
|
| 8 |
GPT_4O_MINI = "GPT_4O_MINI"
|
| 9 |
+
class UIComponentType(Enum):
|
| 10 |
+
TEXTBOX = "textbox"
|
| 11 |
+
MARKDOWN = "markdown"
|
| 12 |
+
DATAFRAME = "dataframe"
|
| 13 |
|
| 14 |
@dataclass
|
| 15 |
class UIConfig:
|
|
|
|
| 35 |
thoughts: Optional[list]
|
| 36 |
ui: Dict[str, UIConfig]
|
| 37 |
|
| 38 |
+
safe_globals = {
|
| 39 |
+
"PromptConfig": PromptConfig,
|
| 40 |
+
"ModelType": ModelType,
|
| 41 |
+
"UIConfig": UIConfig,
|
| 42 |
+
"UIComponentType": UIComponentType,
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
class ConfigParser:
|
| 46 |
def __init__(self, file_path: str):
|
| 47 |
self.file_path = file_path
|
|
|
|
| 79 |
|
| 80 |
return ui_configs
|
| 81 |
|
| 82 |
+
def clean_enum_values(self, text: str) -> str:
|
| 83 |
+
"""Clean enum values in the text to make them evaluatable"""
|
| 84 |
+
# Replace enum patterns like <TEXTBOX: 'textbox'> with just the enum name
|
| 85 |
+
enum_pattern = r'<(\w+):\s*\'[^\']+\'>'
|
| 86 |
+
return re.sub(enum_pattern, r'UIComponentType.\1', text)
|
| 87 |
+
|
| 88 |
def parse_config(self) -> Dict[str, PromptConfig]:
|
| 89 |
"""Parse the config file and return a dictionary of PromptConfig objects"""
|
| 90 |
print("Starting parse_config...")
|
|
|
|
| 94 |
|
| 95 |
# Split content into individual prompt configs
|
| 96 |
# prompt_pattern = r'"([^"]+)":\s*PromptConfig\(\s*prompt="""([^"]*?)""",\s*description="([^"]*)",\s*step="([^"]*)",\s*sub_step="([^"]*)",\s*inputs=\[([^\]]*)\],\s*outputs=\[([^\]]*)\],\s*model=([^,]*),\s*thoughts=\[([^\]]*)\],\s*ui=({[^}]+})\s*\)'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
# # Option 1 : Match all configs by $$ marker
|
| 99 |
# regex_pattern = r"\$\$(.*?)\$\$"
|
|
|
|
| 105 |
# print("Using regex pattern to find prompt configs...")
|
| 106 |
|
| 107 |
# Option 2 : Matches configs, without $$ markers therefor very unstable
|
| 108 |
+
# prompt_pattern = (
|
| 109 |
+
# r'"([^"]+)":\s*' # Key
|
| 110 |
+
# r'PromptConfig\(\s*' # PromptConfig start
|
| 111 |
+
# r'prompt="""(.*?)""",\s*' # Prompt text (non-greedy match)
|
| 112 |
+
# r'(?:description="([^"]*)",\s*)?' # Optional description
|
| 113 |
+
# r'(?:step="([^"]*)",\s*)?' # Optional step
|
| 114 |
+
# r'(?:sub_step="([^"]*)",\s*)?' # Optional sub_step
|
| 115 |
+
# r'(?:inputs=\[(.*?)\],\s*)?' # Optional inputs list
|
| 116 |
+
# r'(?:outputs=\[(.*?)\],\s*)?' # Optional outputs list
|
| 117 |
+
# r'(?:model=([^,\s]*),\s*)?' # Optional model
|
| 118 |
+
# r'(?:thoughts=\[(.*?)\],\s*)?' # Optional thoughts list
|
| 119 |
+
# r'(?:ui=({[^}]+})\s*)?' # Optional UI config
|
| 120 |
+
# r'\)' # PromptConfig end
|
| 121 |
+
# )
|
| 122 |
+
# configs = {}
|
| 123 |
|
| 124 |
+
# for match in re.finditer(prompt_pattern, content, re.DOTALL):
|
| 125 |
+
# key = match.group(1)
|
| 126 |
+
# print(f"\nProcessing config for key: {key}")
|
| 127 |
|
| 128 |
+
# prompt = match.group(2).strip()
|
| 129 |
+
# description = match.group(3)
|
| 130 |
+
# step = match.group(4)
|
| 131 |
+
# sub_step = match.group(5)
|
| 132 |
+
# print(f"Found description: {description}, step: {step}, sub_step: {sub_step}")
|
| 133 |
|
| 134 |
+
# # Parse lists
|
| 135 |
+
# inputs = [x.strip("'") for x in match.group(6).split(",") if x.strip()]
|
| 136 |
+
# outputs = [x.strip("'") for x in match.group(7).split(",") if x.strip()]
|
| 137 |
+
# print(f"Parsed inputs: {inputs}")
|
| 138 |
+
# print(f"Parsed outputs: {outputs}")
|
| 139 |
|
| 140 |
+
# # Parse model type
|
| 141 |
+
# model_str = match.group(8).strip()
|
| 142 |
+
# model = ModelType[model_str.split(".")[-1]] if model_str else ModelType.O1_MINI
|
| 143 |
+
# print(f"Using model: {model}")
|
| 144 |
|
| 145 |
+
# # Parse thoughts
|
| 146 |
+
# thoughts_str = match.group(9)
|
| 147 |
+
# thoughts = [x.strip("'") for x in thoughts_str.split(",") if x.strip()] if thoughts_str else None
|
| 148 |
+
# print(f"Found {len(thoughts) if thoughts else 0} thoughts")
|
| 149 |
|
| 150 |
+
# # Parse UI config
|
| 151 |
+
# ui_text = match.group(10)
|
| 152 |
+
# print("Parsing UI config...")
|
| 153 |
+
# ui = self.parse_ui_config(ui_text)
|
| 154 |
+
# print(f"Found {len(ui)} UI components")
|
| 155 |
|
| 156 |
+
# configs[key] = PromptConfig(
|
| 157 |
+
# prompt=prompt,
|
| 158 |
+
# description=description,
|
| 159 |
+
# step=step,
|
| 160 |
+
# sub_step=sub_step,
|
| 161 |
+
# inputs=inputs,
|
| 162 |
+
# outputs=outputs,
|
| 163 |
+
# model=model,
|
| 164 |
+
# thoughts=thoughts,
|
| 165 |
+
# ui=ui
|
| 166 |
+
# )
|
| 167 |
+
# print(f"Successfully created PromptConfig for {key}")
|
| 168 |
+
# return configs
|
| 169 |
|
| 170 |
+
# Option 3, Block and ast method
|
| 171 |
+
|
| 172 |
+
# Extract each block wrapped by $$ markers.
|
| 173 |
+
block_pattern = r"\$\$(.*?)\$\$"
|
| 174 |
+
blocks = re.findall(block_pattern, content, re.DOTALL)
|
| 175 |
+
print(f"Found {len(blocks)} config blocks.")
|
| 176 |
+
|
| 177 |
+
configs = {}
|
| 178 |
+
error_log_path = 'config_parser_errors.txt'
|
| 179 |
+
with open(error_log_path, 'w') as error_log:
|
| 180 |
+
for i, block in enumerate(blocks, 1):
|
| 181 |
+
block = block.strip()
|
| 182 |
+
dict_str = "{" + block + "}"
|
| 183 |
+
try:
|
| 184 |
+
# Clean the enum values before evaluation
|
| 185 |
+
cleaned_dict_str = self.clean_enum_values(dict_str)
|
| 186 |
+
config_dict = eval(cleaned_dict_str, safe_globals)
|
| 187 |
+
configs.update(config_dict)
|
| 188 |
+
key = list(config_dict.keys())[0]
|
| 189 |
+
print(f"Block {i}: Successfully parsed config for key: {key}")
|
| 190 |
+
except Exception as e:
|
| 191 |
+
error_log.write(f"Block {i}: Error evaluating block: {e}\n")
|
| 192 |
+
error_log.write("Block content:\n")
|
| 193 |
+
error_log.write(dict_str + "\n")
|
| 194 |
+
error_log.write("-" * 40 + "\n")
|
| 195 |
+
print(f"\nFinished parsing {len(configs)} configs")
|
| 196 |
+
return configs
|
| 197 |
|
| 198 |
# Example usage
|
| 199 |
+
# Option 1 : with $$ markers
|
| 200 |
+
parser = ConfigParser('db_config_log_20250326_195828.txt')
|
| 201 |
|
| 202 |
+
# Option 2 : without $$ markers
|
| 203 |
+
# parser = ConfigParser('db_config_log_20250326_193344.txt')
|
| 204 |
try:
|
| 205 |
config = parser.parse_config()
|
| 206 |
# print(f"Config: {config}")
|
| 207 |
# Print total number of configs parsed
|
| 208 |
print(f"Total configs parsed: {len(config)}")
|
| 209 |
+
questioning_agent = config.get('questioning_agent')
|
| 210 |
+
print(f"Questioning Agent: {questioning_agent}")
|
| 211 |
# if questioning_agent:
|
| 212 |
# print("Prompt:", questioning_agent.prompt[:100])
|
| 213 |
# print("Model:", questioning_agent.model)
|
config_parser_errors.txt
ADDED
|
File without changes
|