JatinAutonomousLabs commited on
Commit
06de9e0
·
verified ·
1 Parent(s): 99b1c2e

Update graph.py

Browse files
Files changed (1) hide show
  1. graph.py +78 -67
graph.py CHANGED
@@ -87,43 +87,47 @@ llm = ChatOpenAI(model="gpt-4o", temperature=0.1, max_retries=3, request_timeout
87
 
88
  def parse_json_from_llm(llm_output: str) -> Optional[dict]:
89
  """
90
- Robust JSON extraction from LLM text.
91
- Tries these strategies in order:
92
- 1. extract explicit ```json ... ``` fenced block
93
- 2. extract best-balanced {...} substring
94
- 3. json.loads()
95
- 4. ast.literal_eval() -> convert to JSON
96
- 5. conservative single-quote to double-quote + remove trailing commas -> json.loads()
97
-
98
- Returns parsed dict or None on failure. Logs the original content for debugging.
99
  """
100
- import json
101
  import re
 
102
  import ast
103
  from logging import getLogger
104
 
105
  logger = getLogger(__name__)
106
 
107
- if not llm_output or not llm_output.strip():
108
  return None
109
 
110
  text = llm_output.strip()
111
 
112
- # 1) Try explicit ```json ... ``` codeblock first (best)
113
- m = re.search(r"```json\s*(\{.*?\})\s*```", text, re.DOTALL | re.IGNORECASE)
114
- if m:
115
- candidate = m.group(1).strip()
116
  try:
117
  return json.loads(candidate)
118
  except Exception as e:
119
  logger.debug(f"json.loads failed on triple-backtick json block: {e}")
120
 
121
- # 2) Try to extract a balanced JSON-like substring (first balanced {...})
122
- # This is safer than taking first '{' to last '}'.
 
 
 
 
 
 
 
 
123
  def find_balanced_brace_substring(s: str):
124
  start_idx = None
125
  depth = 0
126
- best = None
127
  for i, ch in enumerate(s):
128
  if ch == '{':
129
  if start_idx is None:
@@ -133,19 +137,12 @@ def parse_json_from_llm(llm_output: str) -> Optional[dict]:
133
  if depth > 0:
134
  depth -= 1
135
  if depth == 0 and start_idx is not None:
136
- best = s[start_idx:i+1]
137
- return best
138
  return None
139
 
140
- candidate = None
141
- # Try extracting from ```...``` blocks as fallback (any code fences)
142
- m2 = re.search(r"```(?:json|python|text)?\s*(\{.*?\})\s*```", text, re.DOTALL | re.IGNORECASE)
143
- if m2:
144
- candidate = m2.group(1).strip()
145
- if not candidate:
146
- candidate = find_balanced_brace_substring(text)
147
 
148
- # If still nothing, as last resort take substring between first { and last }
149
  if not candidate:
150
  first = text.find('{')
151
  last = text.rfind('}')
@@ -159,45 +156,39 @@ def parse_json_from_llm(llm_output: str) -> Optional[dict]:
159
  except Exception as e:
160
  logger.debug(f"json.loads failed on candidate substring: {e}")
161
 
162
- # try ast.literal_eval (handles single quotes, Python tuples, etc.)
163
  try:
164
  parsed = ast.literal_eval(candidate)
165
- # ast.literal_eval may return non-dict (list etc.). Coerce to dict if needed.
166
  if isinstance(parsed, (dict, list)):
 
167
  return json.loads(json.dumps(parsed))
168
- else:
169
- logger.debug("ast.literal_eval produced non-dict result; returning None.")
170
  except Exception as e:
171
  logger.debug(f"ast.literal_eval failed: {e}")
172
 
173
- # final attempt: conservative cleanup for common LLM deviations:
174
- # - replace single quotes with double quotes only where safe-ish
175
- # - remove trailing commas before } or ]
176
  cleaned = candidate
177
-
178
- # Remove JavaScript/JSON-like line comments (// ...) and block comments (/* ... */)
179
- cleaned = re.sub(r"//.*?$", "", cleaned, flags=re.MULTILINE)
180
- cleaned = re.sub(r"/\*.*?\*/", "", cleaned, flags=re.DOTALL)
181
-
182
- # Remove trailing commas (",\s*}" or ",\s*]")
183
- cleaned = re.sub(r",\s*([}\]])", r"\1", cleaned)
184
-
185
- # Replace single-quoted strings with double-quoted strings where it looks like a string token
186
- # This is conservative: replace only when a single quote is preceded by : or [, or { or start-of-string.
187
- def single_to_double(match):
188
- inner = match.group(1)
189
- inner_escaped = inner.replace('"', '\\"')
190
- return f'"{inner_escaped}"'
191
- cleaned = re.sub(r"(?<=[:\{\[,]\s*)'([^']*?)'", single_to_double, cleaned)
192
-
193
- # Try json.loads again
194
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  return json.loads(cleaned)
196
  except Exception as e:
197
  logger.debug(f"json.loads still failed after cleanup: {e}")
198
 
199
- # As last safety, log the original LLM output for inspection (do not raise)
200
- logger.error("parse_json_from_llm failed to parse LLM output. LLM output preview (200 chars): %s", text[:200])
201
  return None
202
 
203
 
@@ -830,7 +821,6 @@ def run_qa_agent(state: AgentState):
830
  if state.get('experimentResults'):
831
  qa_context.append(f"\n=== ARTIFACTS ===\n{json.dumps(state.get('experimentResults', {}).get('paths', {}), indent=2)}")
832
 
833
- # MODIFIED PROMPT: Encourage convergence
834
  prompt = f"""You are a QA reviewer. Review the draft response against the user's objective.
835
  {chr(10).join(qa_context)}
836
 
@@ -842,24 +832,45 @@ def run_qa_agent(state: AgentState):
842
 
843
  If this is a re-submission (rework cycle > 1), has the previous feedback been successfully addressed?
844
 
845
- Response Format:
846
 
847
- If the work is complete and high-quality, respond ONLY with the word 'APPROVED'.
 
848
 
849
- Otherwise, provide SPECIFIC, ACTIONABLE, and NOVEL feedback on what must be changed. Do not repeat previous feedback if it has already been actioned.
 
 
 
 
 
850
  """
851
 
852
- response = llm.invoke(prompt)
853
- content = getattr(response, "content", "") or ""
 
 
 
 
 
854
 
855
- if "APPROVED" in content.upper():
 
856
  return {"approved": True, "qaFeedback": None, "execution_path": path, "status_update": "Approved"}
857
- else:
858
- # Sanitize feedback to ensure it's a useful string
859
- feedback = content.replace("APPROVED", "").strip()
860
- if not feedback:
861
- feedback = "General quality improvements required."
862
- return {"approved": False, "qaFeedback": feedback, "execution_path": path, "status_update": "Needs improvement"}
 
 
 
 
 
 
 
 
 
863
 
864
  def run_archivist_agent(state: AgentState):
865
  log.info("--- ARCHIVIST ---")
 
87
 
88
  def parse_json_from_llm(llm_output: str) -> Optional[dict]:
89
  """
90
+ More robust JSON extraction:
91
+ - Looks for explicit ```json {} ``` blocks
92
+ - Falls back to the last balanced {...} substring in the output
93
+ - Tries ast.literal_eval for Python-like dicts
94
+ - Performs conservative cleanup (remove trailing commas, comments, safe single->double quote) and retries
95
+ Returns dict or None. Logs failures for debugging.
 
 
 
96
  """
 
97
  import re
98
+ import json
99
  import ast
100
  from logging import getLogger
101
 
102
  logger = getLogger(__name__)
103
 
104
+ if not llm_output or not isinstance(llm_output, str) or not llm_output.strip():
105
  return None
106
 
107
  text = llm_output.strip()
108
 
109
+ # 1) explicit fenced JSON block
110
+ match = re.search(r"```json\s*({.*?})\s*```", text, re.DOTALL | re.IGNORECASE)
111
+ if match:
112
+ candidate = match.group(1).strip()
113
  try:
114
  return json.loads(candidate)
115
  except Exception as e:
116
  logger.debug(f"json.loads failed on triple-backtick json block: {e}")
117
 
118
+ # 2) any code-fence containing a JSON-like object
119
+ match2 = re.search(r"```(?:json|python|text)?\s*({.*?})\s*```", text, re.DOTALL | re.IGNORECASE)
120
+ if match2:
121
+ candidate = match2.group(1).strip()
122
+ try:
123
+ return json.loads(candidate)
124
+ except Exception as e:
125
+ logger.debug(f"json.loads failed on fenced candidate: {e}")
126
+
127
+ # 3) find first balanced {...} substring
128
  def find_balanced_brace_substring(s: str):
129
  start_idx = None
130
  depth = 0
 
131
  for i, ch in enumerate(s):
132
  if ch == '{':
133
  if start_idx is None:
 
137
  if depth > 0:
138
  depth -= 1
139
  if depth == 0 and start_idx is not None:
140
+ return s[start_idx:i+1]
 
141
  return None
142
 
143
+ candidate = find_balanced_brace_substring(text)
 
 
 
 
 
 
144
 
145
+ # 4) fallback: last { ... } block heuristically
146
  if not candidate:
147
  first = text.find('{')
148
  last = text.rfind('}')
 
156
  except Exception as e:
157
  logger.debug(f"json.loads failed on candidate substring: {e}")
158
 
159
+ # try ast.literal_eval (handles single quotes & Python literals)
160
  try:
161
  parsed = ast.literal_eval(candidate)
 
162
  if isinstance(parsed, (dict, list)):
163
+ # convert to a strict JSON-compatible dict/list
164
  return json.loads(json.dumps(parsed))
 
 
165
  except Exception as e:
166
  logger.debug(f"ast.literal_eval failed: {e}")
167
 
168
+ # conservative cleanup: remove comments, trailing commas, and handle simple single-quote strings
 
 
169
  cleaned = candidate
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  try:
171
+ # remove line comments //...
172
+ cleaned = re.sub(r"//.*?$", "", cleaned, flags=re.MULTILINE)
173
+ # remove block comments /* ... */
174
+ cleaned = re.sub(r"/\*.*?\*/", "", cleaned, flags=re.DOTALL)
175
+ # remove trailing commas before } or ]
176
+ cleaned = re.sub(r",\s*([}\]])", r"\1", cleaned)
177
+
178
+ # replace single-quoted strings with double quotes when likely safe
179
+ def _single_to_double(m):
180
+ inner = m.group(1)
181
+ inner_escaped = inner.replace('"', '\\"')
182
+ return f'"{inner_escaped}"'
183
+ cleaned = re.sub(r"(?<=[:\{\[,]\s*)'([^']*?)'", _single_to_double, cleaned)
184
+
185
+ # final attempt
186
  return json.loads(cleaned)
187
  except Exception as e:
188
  logger.debug(f"json.loads still failed after cleanup: {e}")
189
 
190
+ # nothing parsed log preview and return None
191
+ logger.error("parse_json_from_llm failed to parse LLM output. LLM output preview (200 chars): %s", text[:200].replace("\n","\\n"))
192
  return None
193
 
194
 
 
821
  if state.get('experimentResults'):
822
  qa_context.append(f"\n=== ARTIFACTS ===\n{json.dumps(state.get('experimentResults', {}).get('paths', {}), indent=2)}")
823
 
 
824
  prompt = f"""You are a QA reviewer. Review the draft response against the user's objective.
825
  {chr(10).join(qa_context)}
826
 
 
832
 
833
  If this is a re-submission (rework cycle > 1), has the previous feedback been successfully addressed?
834
 
835
+ Response Format (required JSON or a single word 'APPROVED'):
836
 
837
+ Either return EXACTLY the single word:
838
+ APPROVED
839
 
840
+ Or return JSON like:
841
+ {{
842
+ "approved": false,
843
+ "feedback": "Specific, actionable items to fix (bullet list or numbered).",
844
+ "required_changes": ["..."]
845
+ }}
846
  """
847
 
848
+ try:
849
+ response = llm.invoke(prompt)
850
+ content = getattr(response, "content", "") or ""
851
+ except Exception as e:
852
+ log.exception("QA LLM call failed: %s", e)
853
+ # Fail-safe: mark as not approved with conservative feedback
854
+ return {"approved": False, "qaFeedback": "QA LLM failed; manual review required.", "execution_path": path, "status_update": "QA failed"}
855
 
856
+ # If LLM returned APPROVED word, treat as approved
857
+ if "APPROVED" in content.strip().upper() and len(content.strip()) <= 20:
858
  return {"approved": True, "qaFeedback": None, "execution_path": path, "status_update": "Approved"}
859
+
860
+ # Else try JSON parse
861
+ parsed = parse_json_from_llm(content)
862
+ if isinstance(parsed, dict):
863
+ approved = bool(parsed.get("approved", False))
864
+ feedback = parsed.get("feedback") or parsed.get("qaFeedback") or parsed.get("required_changes") or ""
865
+ # Normalize feedback to string
866
+ if isinstance(feedback, list):
867
+ feedback = "\n".join([str(x) for x in feedback])
868
+ elif not isinstance(feedback, str):
869
+ feedback = str(feedback)
870
+ return {"approved": approved, "qaFeedback": feedback if not approved else None, "execution_path": path, "status_update": "QA completed"}
871
+ # Fallback: return raw text as feedback (not approved)
872
+ safe_feedback = content.strip()[:2000] or "QA produced no actionable output."
873
+ return {"approved": False, "qaFeedback": safe_feedback, "execution_path": path, "status_update": "QA needs rework"}
874
 
875
  def run_archivist_agent(state: AgentState):
876
  log.info("--- ARCHIVIST ---")