JatinAutonomousLabs commited on
Commit
01801a1
·
verified ·
1 Parent(s): 8a812fa

Update graph.py

Browse files
Files changed (1) hide show
  1. graph.py +84 -77
graph.py CHANGED
@@ -1,4 +1,4 @@
1
- # graph.py (patched: artifact generation using nbformat, python-docx, pandas/openpyxl, reportlab)
2
  import json
3
  import re
4
  import math
@@ -13,7 +13,7 @@ from memory_manager import memory_manager
13
  from code_executor import execute_python_code
14
  from logging_config import setup_logging, get_logger
15
 
16
- # External artifact libs
17
  import nbformat
18
  from nbformat.v4 import new_notebook, new_markdown_cell, new_code_cell
19
  import pandas as pd
@@ -23,6 +23,7 @@ from reportlab.lib.styles import getSampleStyleSheet
23
 
24
  # --- Helpers ---
25
  def ensure_list(state, key):
 
26
  v = state.get(key) if state else None
27
  if v is None:
28
  return []
@@ -33,6 +34,7 @@ def ensure_list(state, key):
33
  return [v]
34
 
35
  def ensure_int(state, key, default=0):
 
36
  try:
37
  v = state.get(key) if state else None
38
  if v is None:
@@ -42,10 +44,10 @@ def ensure_int(state, key, default=0):
42
  return default
43
 
44
  def sanitize_path(path: str) -> str:
45
- # On HF Spaces you may want to move to a served directory. Keep as-is here.
46
- return path
47
 
48
- # --- Logging & constants ---
49
  setup_logging()
50
  log = get_logger(__name__)
51
  INITIAL_MAX_REWORK_CYCLES = 3
@@ -74,8 +76,11 @@ class AgentState(TypedDict):
74
  llm = ChatOpenAI(model="gpt-4o", temperature=0.1, max_retries=3, request_timeout=60)
75
 
76
  def parse_json_from_llm(llm_output: str) -> Optional[dict]:
 
77
  try:
78
- match = re.search(r"```json\n({.*?})\n```", llm_output, re.DOTALL)
 
 
79
  if match:
80
  json_str = match.group(1)
81
  else:
@@ -86,11 +91,14 @@ def parse_json_from_llm(llm_output: str) -> Optional[dict]:
86
  json_str = llm_output[start:end+1]
87
  return json.loads(json_str)
88
  except Exception as e:
89
- log.error(f"JSON parsing failed. Error: {e}. Raw: {llm_output[:300]}")
90
  return None
91
 
92
- # --- Artifact detection ---
 
 
93
  def detect_requested_output_types(text: str) -> Dict:
 
94
  if not text:
95
  return {"requires_artifact": False, "artifact_type": None, "artifact_hint": None}
96
  t = text.lower()
@@ -106,23 +114,42 @@ def detect_requested_output_types(text: str) -> Dict:
106
  return {"requires_artifact": True, "artifact_type": "image", "artifact_hint": "image/plot"}
107
  if any(k in t for k in ["repo", "repository", "app repo", "dockerfile", "requirements.txt", "package.json"]):
108
  return {"requires_artifact": True, "artifact_type": "repo", "artifact_hint": "application repository (zip)"}
109
- # scripts for languages
110
  if any(k in t for k in [".py", "python script", "r script", ".R", ".r", "java", ".java", "javascript", ".js"]):
111
- # heuristic: choose 'script' and later infer language
112
  return {"requires_artifact": True, "artifact_type": "script", "artifact_hint": "language script (py/r/java/js/etc.)"}
113
  return {"requires_artifact": False, "artifact_type": None, "artifact_hint": None}
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  # --- Notebook & artifact builders ---
116
  def write_notebook_from_text(llm_text: str, out_dir: str="/tmp") -> str:
117
- """
118
- Build a notebook via nbformat from llm_text using fenced python code blocks as code cells and other text as markdown.
119
- """
120
- code_blocks = re.findall(r"```python\n(.*?)\n```", llm_text, re.DOTALL)
121
- # fallback to any fenced blocks
122
  if not code_blocks:
123
- code_blocks = re.findall(r"```\n(.*?)\n```", llm_text, re.DOTALL)
124
- # split markdown by removing code blocks
125
- md_parts = re.split(r"```(?:python)?\n.*?\n```", llm_text, flags=re.DOTALL)
126
  nb = new_notebook()
127
  cells = []
128
  max_len = max(len(md_parts), len(code_blocks))
@@ -145,11 +172,11 @@ def write_script(code_text: str, language_hint: Optional[str]=None, out_dir: str
145
  l = language_hint.lower()
146
  if "python" in l or ".py" in l:
147
  ext = ".py"
148
- elif l in ("r", ".r"):
149
  ext = ".R"
150
  elif "java" in l or ".java" in l:
151
  ext = ".java"
152
- elif "javascript" in l or "node" in l or ".js" in l:
153
  ext = ".js"
154
  elif "bash" in l or "sh" in l:
155
  ext = ".sh"
@@ -161,7 +188,6 @@ def write_script(code_text: str, language_hint: Optional[str]=None, out_dir: str
161
 
162
  def write_docx_from_text(text: str, out_dir: str="/tmp") -> str:
163
  doc = Document()
164
- # naive: split into paragraphs on double-newline
165
  for para in [p.strip() for p in text.split("\n\n") if p.strip()]:
166
  doc.add_paragraph(para)
167
  uid = uuid.uuid4().hex[:10]
@@ -170,27 +196,18 @@ def write_docx_from_text(text: str, out_dir: str="/tmp") -> str:
170
  return filename
171
 
172
  def write_excel_from_tables(maybe_table_text: str, out_dir: str="/tmp") -> str:
173
- """
174
- Heuristic: If LLM returns a JSON-convertible table or CSV snippet, attempt to form a DataFrame.
175
- Otherwise write a small DataFrame with the provided text.
176
- """
177
  uid = uuid.uuid4().hex[:10]
178
  filename = os.path.join(out_dir, f"generated_excel_{uid}.xlsx")
179
  try:
180
- # try JSON parse
181
- parsed = None
182
  try:
183
  parsed = json.loads(maybe_table_text)
184
- # if parsed is list of dicts
185
  if isinstance(parsed, list):
186
  df = pd.DataFrame(parsed)
187
  elif isinstance(parsed, dict):
188
- # dict of lists or single mapping
189
  df = pd.DataFrame([parsed])
190
  else:
191
  df = pd.DataFrame({"content":[str(maybe_table_text)]})
192
  except Exception:
193
- # fallback: look for CSV text
194
  if "," in maybe_table_text or "\t" in maybe_table_text:
195
  from io import StringIO
196
  df = pd.read_csv(StringIO(maybe_table_text))
@@ -200,7 +217,6 @@ def write_excel_from_tables(maybe_table_text: str, out_dir: str="/tmp") -> str:
200
  return filename
201
  except Exception as e:
202
  log.error(f"Excel creation failed: {e}")
203
- # write fallback docx with text
204
  return write_docx_from_text(f"Failed to create excel. Error: {e}\n\nOriginal:\n{maybe_table_text}", out_dir=out_dir)
205
 
206
  def write_pdf_from_text(text: str, out_dir: str="/tmp") -> str:
@@ -217,14 +233,9 @@ def write_pdf_from_text(text: str, out_dir: str="/tmp") -> str:
217
  return filename
218
  except Exception as e:
219
  log.error(f"PDF creation failed: {e}")
220
- # fallback to docx
221
  return write_docx_from_text(f"Failed to create PDF. Error: {e}\n\nOriginal:\n{text}", out_dir=out_dir)
222
 
223
  def build_repo_zip(files_map: Dict[str,str], repo_name: str="generated_app", out_dir: str="/tmp") -> str:
224
- """
225
- files_map: dict of relative path -> absolute local file path/content.
226
- If the value is a string and exists as a path, include file. If not a path, create a file with that content.
227
- """
228
  uid = uuid.uuid4().hex[:8]
229
  repo_dir = os.path.join(out_dir, f"{repo_name}_{uid}")
230
  os.makedirs(repo_dir, exist_ok=True)
@@ -234,7 +245,6 @@ def build_repo_zip(files_map: Dict[str,str], repo_name: str="generated_app", out
234
  if isinstance(content, str) and os.path.exists(content):
235
  shutil.copyfile(content, dest)
236
  else:
237
- # treat content as file content
238
  with open(dest, "w", encoding="utf-8") as fh:
239
  fh.write(str(content))
240
  zip_path = os.path.join(out_dir, f"{repo_name}_{uid}.zip")
@@ -246,14 +256,13 @@ def build_repo_zip(files_map: Dict[str,str], repo_name: str="generated_app", out
246
  zf.write(full, arc)
247
  return zip_path
248
 
249
- # --- Node functions (triage/planner/memory/intent/pm/experimenter/synthesis/qa/archivist/disclaimer) ---
250
- # For brevity reuse earlier implementations but with artifact creation in experimenter
251
-
252
  def run_triage_agent(state: AgentState):
253
  log.info("--- triage ---")
254
  prompt = f"Analyze the user input. Is it a simple conversational greeting or a task? Respond with 'greeting' or 'task'.\n\nUser Input: \"{state.get('userInput','')}\""
255
  response = llm.invoke(prompt)
256
- if 'greeting' in response.content.lower():
 
257
  log.info("Triage result: Simple Greeting.")
258
  return {"draftResponse": "Hello! How can I help you today?", "execution_path": ["Triage Agent"], "status_update": "Responding to greeting."}
259
  else:
@@ -268,7 +277,7 @@ def run_planner_agent(state: AgentState):
268
  f"User Request: \"{state.get('userInput','')}\". Respond in JSON with keys: 'plan' (list of strings), 'estimated_llm_calls_per_loop' (integer)."
269
  )
270
  response = llm.invoke(prompt)
271
- plan_data = parse_json_from_llm(response.content)
272
  if not plan_data:
273
  return {"pmPlan": {"error": "Failed to create a valid plan."}, "execution_path": path, "status_update": "Error: Could not create a plan."}
274
  calls_per_loop = plan_data.get('estimated_llm_calls_per_loop', 3)
@@ -302,7 +311,7 @@ def run_intent_agent(state: AgentState):
302
  path = ensure_list(state, 'execution_path') + ["Intent Agent"]
303
  prompt = (f"Refine the user's request into a clear, actionable 'core objective prompt'.\n\nRelevant Memory:\n{state.get('retrievedMemory')}\n\nUser Request: \"{state.get('userInput','')}\"\n\nCore Objective:")
304
  response = llm.invoke(prompt)
305
- core_obj = response.content
306
  detection = detect_requested_output_types(core_obj or state.get('userInput',''))
307
  extras = {}
308
  if detection.get('requires_artifact'):
@@ -322,25 +331,28 @@ def run_pm_agent(state: AgentState):
322
  f"Respond in JSON with keys: 'plan_steps' (list), 'experiment_needed' (bool), 'experiment_type' (optional string), and 'experiment_goal' (str if needed)."
323
  )
324
  response = llm.invoke(prompt)
325
- plan = parse_json_from_llm(response.content)
326
  if not plan:
327
  log.warning("PM Agent did not produce JSON — applying heuristic fallback.")
328
  plan = {"plan_steps": ["Analyze files", "Create notebook if requested", "Synthesize answers"], "experiment_needed": False}
329
- intent_detector = state.get('artifact_detection') or {}
330
- if intent_detector.get('requires_artifact'):
331
- plan['experiment_needed'] = True
332
- plan['experiment_type'] = intent_detector.get('artifact_type')
333
- plan['experiment_goal'] = f"Produce an artifact: {intent_detector.get('artifact_hint')}. Use document reading and test edge cases for messy files in the folder. {state.get('userInput','')}"
334
- if plan.get('experiment_needed') and not plan.get('experiment_type'):
335
- detection = detect_requested_output_types(state.get('coreObjectivePrompt','') or state.get('userInput',''))
336
- if detection.get('requires_artifact'):
337
- plan['experiment_type'] = detection.get('artifact_type')
338
- plan['experiment_goal'] = plan.get('experiment_goal') or f"Produce an artifact: {detection.get('artifact_hint')}."
339
  log.info(f"Generated Plan: Experiment Needed = {plan.get('experiment_needed', False)}, Type = {plan.get('experiment_type')}")
340
  return {"pmPlan": plan, "execution_path": path, "rework_cycles": current_cycles, "status_update": "Breaking down the objective into a detailed plan..."}
341
 
342
- def _extract_python_blocks(text: str) -> List[str]:
343
- return re.findall(r"```python\n(.*?)\n```", text, re.DOTALL) or re.findall(r"```\n(.*?)\n```", text, re.DOTALL)
 
 
 
 
 
 
344
 
345
  def run_experimenter_agent(state: AgentState):
346
  log.info("--- 🔬 Running Experimenter Agent ---")
@@ -348,20 +360,20 @@ def run_experimenter_agent(state: AgentState):
348
  pm = state.get('pmPlan', {}) or {}
349
  if not pm.get('experiment_needed'):
350
  return {"experimentCode": None, "experimentResults": None, "execution_path": path, "status_update": "Proceeding without a code experiment."}
351
- exp_type = pm.get('experiment_type') or 'notebook'
352
  goal = pm.get('experiment_goal', 'No goal specified.')
353
  response = llm.invoke(
354
  f"Produce content for artifact type '{exp_type}' to achieve: {goal}\n"
355
- "Return runnable code in fenced code blocks where appropriate, and explanatory text in plaintext."
356
  )
357
- llm_text = response.content or ""
358
  out_dir = "/tmp"
359
  results = {"success": False, "paths": {}, "stderr": "", "stdout": ""}
360
  try:
361
  if exp_type == 'notebook':
362
  nb_path = write_notebook_from_text(llm_text, out_dir=out_dir)
363
  results.update({"success": True, "paths": {"notebook": sanitize_path(nb_path)}})
364
- return {"experimentCode": None, "experimentResults": results, "experiment_llm_text": llm_text, "execution_path": path, "status_update": f"Notebook generated at {nb_path}"}
365
  elif exp_type == 'excel':
366
  excel_path = write_excel_from_tables(llm_text, out_dir=out_dir)
367
  results.update({"success": True, "paths": {"excel": sanitize_path(excel_path)}})
@@ -375,40 +387,34 @@ def run_experimenter_agent(state: AgentState):
375
  results.update({"success": True, "paths": {"pdf": sanitize_path(pdf_path)}})
376
  return {"experimentCode": None, "experimentResults": results, "execution_path": path, "status_update": f"PDF generated at {pdf_path}"}
377
  elif exp_type == 'script':
378
- # pick a language hint from plan or goal
379
  lang_hint = pm.get('experiment_language') or ("python" if ".py" in goal.lower() else None)
380
- # extract code blocks
381
- code_blocks = _extract_python_blocks(llm_text)
382
  if not code_blocks:
383
- # fallback: entire content
384
  code_text = llm_text
385
  else:
386
  code_text = "\n\n# === BLOCK ===\n\n".join(code_blocks)
387
  script_path = write_script(code_text, language_hint=lang_hint, out_dir=out_dir)
388
- # optionally execute python scripts
389
  exec_results = {}
390
  if script_path.endswith(".py"):
391
- exec_results = execute_python_code(open(script_path,"r",encoding="utf-8").read())
 
 
 
392
  results.update({"success": True, "paths": {"script": sanitize_path(script_path)}, "stdout": exec_results.get("stdout",""), "stderr": exec_results.get("stderr","")})
393
  return {"experimentCode": code_text, "experimentResults": results, "execution_path": path, "status_update": f"Script generated at {script_path}"}
394
  elif exp_type == 'repo':
395
- # build a minimal repo by calling LLM for file suggestions or using code blocks
396
- # Heuristic: create a simple app repo containing a notebook and README and requirements.txt
397
  repo_files = {}
398
- # README from first 400 chars as text
399
  readme = (llm_text[:1000] + "\n\n") if llm_text else "Generated repo"
400
  repo_files["README.md"] = readme
401
- # include generated notebook
402
  nb_path = write_notebook_from_text(llm_text, out_dir=out_dir)
403
  repo_files["analysis.ipynb"] = nb_path
404
- # requirements: keep minimal
405
  reqs = "nbformat\npandas\nopenpyxl\npython-docx\nreportlab"
406
  repo_files["requirements.txt"] = reqs
407
  zip_path = build_repo_zip(repo_files, repo_name="generated_app", out_dir=out_dir)
408
  results.update({"success": True, "paths": {"repo_zip": sanitize_path(zip_path)}})
409
  return {"experimentCode": None, "experimentResults": results, "execution_path": path, "status_update": f"Repository ZIP created at {zip_path}"}
410
  else:
411
- # fallback: create docx with llm_text
412
  fallback = write_docx_from_text(llm_text, out_dir=out_dir)
413
  results.update({"success": True, "paths": {"docx": sanitize_path(fallback)}})
414
  return {"experimentCode": None, "experimentResults": results, "execution_path": path, "status_update": f"Fallback DOCX generated at {fallback}"}
@@ -438,7 +444,7 @@ def run_synthesis_agent(state: AgentState):
438
  f"Plan: {state.get('pmPlan', {}).get('plan_steps')}\n\n{results_summary}\n\nFinal Response:"
439
  )
440
  response = llm.invoke(prompt)
441
- final_text = response.content or ""
442
  if artifact_message:
443
  final_text = final_text + "\n\n" + artifact_message
444
  return {"draftResponse": final_text, "execution_path": path, "status_update": "Putting together the final response..."}
@@ -449,10 +455,11 @@ def run_qa_agent(state: AgentState):
449
  prompt = (f"Review the draft response based on the core objective. Respond ONLY with 'APPROVED' or provide concise feedback for rework.\n\n"
450
  f"Core Objective: {state.get('coreObjectivePrompt')}\n\nDraft: {state.get('draftResponse')}")
451
  response = llm.invoke(prompt)
452
- if "APPROVED" in response.content.upper():
 
453
  return {"approved": True, "qaFeedback": None, "execution_path": path, "status_update": "Response approved!"}
454
  else:
455
- return {"approved": False, "qaFeedback": response.content or "No specific feedback.", "execution_path": path, "status_update": "Response needs improvement. Reworking..."}
456
 
457
  def run_archivist_agent(state: AgentState):
458
  log.info("--- 💾 Running Archivist Agent ---")
@@ -460,7 +467,7 @@ def run_archivist_agent(state: AgentState):
460
  summary_prompt = (f"Create a concise summary of this successful task for long-term memory.\n\n"
461
  f"Core Objective: {state.get('coreObjectivePrompt')}\n\nFinal Response: {state.get('draftResponse')}\n\nMemory Summary:")
462
  response = llm.invoke(summary_prompt)
463
- memory_manager.add_to_memory(response.content, {"objective": state.get('coreObjectivePrompt')})
464
  return {"execution_path": path, "status_update": "Saving key learnings for future reference..."}
465
 
466
  def run_disclaimer_agent(state: AgentState):
@@ -487,7 +494,7 @@ def should_run_experiment(state: AgentState):
487
  pm = state.get('pmPlan', {}) or {}
488
  return "experimenter_agent" if pm.get('experiment_needed') else "synthesis_agent"
489
 
490
- # --- Build graphs (same as before) ---
491
  triage_workflow = StateGraph(AgentState)
492
  triage_workflow.add_node("triage", run_triage_agent)
493
  triage_workflow.set_entry_point("triage")
 
1
+ # graph.py (final patched)
2
  import json
3
  import re
4
  import math
 
13
  from code_executor import execute_python_code
14
  from logging_config import setup_logging, get_logger
15
 
16
+ # Artifact libs
17
  import nbformat
18
  from nbformat.v4 import new_notebook, new_markdown_cell, new_code_cell
19
  import pandas as pd
 
23
 
24
  # --- Helpers ---
25
  def ensure_list(state, key):
26
+ """Return a list from state[key], default [] if missing/None/not-list."""
27
  v = state.get(key) if state else None
28
  if v is None:
29
  return []
 
34
  return [v]
35
 
36
  def ensure_int(state, key, default=0):
37
+ """Return an int from state[key], default if missing/invalid."""
38
  try:
39
  v = state.get(key) if state else None
40
  if v is None:
 
44
  return default
45
 
46
  def sanitize_path(path: str) -> str:
47
+ """Sanitize/normalize output path for return to UI."""
48
+ return os.path.abspath(path)
49
 
50
+ # --- Setup & constants ---
51
  setup_logging()
52
  log = get_logger(__name__)
53
  INITIAL_MAX_REWORK_CYCLES = 3
 
76
  llm = ChatOpenAI(model="gpt-4o", temperature=0.1, max_retries=3, request_timeout=60)
77
 
78
  def parse_json_from_llm(llm_output: str) -> Optional[dict]:
79
+ """Robustly try to extract JSON object from LLM text."""
80
  try:
81
+ if not llm_output:
82
+ return None
83
+ match = re.search(r"```json\s*({.*?})\s*```", llm_output, re.DOTALL)
84
  if match:
85
  json_str = match.group(1)
86
  else:
 
91
  json_str = llm_output[start:end+1]
92
  return json.loads(json_str)
93
  except Exception as e:
94
+ log.error(f"JSON parsing failed. Error: {e}. Raw head: {llm_output[:300]}")
95
  return None
96
 
97
+ # --- Artifact detection & normalization ---
98
+ KNOWN_ARTIFACT_TYPES = {"notebook","excel","word","pdf","image","repo","script"}
99
+
100
  def detect_requested_output_types(text: str) -> Dict:
101
+ """Heuristic detect requested artifact type from text."""
102
  if not text:
103
  return {"requires_artifact": False, "artifact_type": None, "artifact_hint": None}
104
  t = text.lower()
 
114
  return {"requires_artifact": True, "artifact_type": "image", "artifact_hint": "image/plot"}
115
  if any(k in t for k in ["repo", "repository", "app repo", "dockerfile", "requirements.txt", "package.json"]):
116
  return {"requires_artifact": True, "artifact_type": "repo", "artifact_hint": "application repository (zip)"}
 
117
  if any(k in t for k in [".py", "python script", "r script", ".R", ".r", "java", ".java", "javascript", ".js"]):
 
118
  return {"requires_artifact": True, "artifact_type": "script", "artifact_hint": "language script (py/r/java/js/etc.)"}
119
  return {"requires_artifact": False, "artifact_type": None, "artifact_hint": None}
120
 
121
+ def normalize_experiment_type(exp_type: Optional[str], goal_text: str) -> str:
122
+ """Map arbitrary LLM returned experiment_type into known set or infer from goal_text."""
123
+ if not exp_type:
124
+ detection = detect_requested_output_types(goal_text or "")
125
+ return detection.get("artifact_type") or "docx"
126
+ s = exp_type.strip().lower()
127
+ # direct mapping heuristics
128
+ if s in KNOWN_ARTIFACT_TYPES:
129
+ return s
130
+ # common synonyms
131
+ if "notebook" in s or "ipynb" in s:
132
+ return "notebook"
133
+ if "excel" in s or "xlsx" in s or "spreadsheet" in s:
134
+ return "excel"
135
+ if "word" in s or "docx" in s:
136
+ return "word"
137
+ if "pdf" in s:
138
+ return "pdf"
139
+ if "repo" in s or "repository" in s or "app" in s:
140
+ return "repo"
141
+ if "script" in s or "python" in s or ".py" in s:
142
+ return "script"
143
+ # fallback to detection from goal
144
+ detection = detect_requested_output_types(goal_text or "")
145
+ return detection.get("artifact_type") or "docx"
146
+
147
  # --- Notebook & artifact builders ---
148
  def write_notebook_from_text(llm_text: str, out_dir: str="/tmp") -> str:
149
+ code_blocks = re.findall(r"```python\s*(.*?)\s*```", llm_text, re.DOTALL)
 
 
 
 
150
  if not code_blocks:
151
+ code_blocks = re.findall(r"```\s*(.*?)\s*```", llm_text, re.DOTALL)
152
+ md_parts = re.split(r"```(?:python)?\s*.*?\s*```", llm_text, flags=re.DOTALL)
 
153
  nb = new_notebook()
154
  cells = []
155
  max_len = max(len(md_parts), len(code_blocks))
 
172
  l = language_hint.lower()
173
  if "python" in l or ".py" in l:
174
  ext = ".py"
175
+ elif l == "r" or l == ".r":
176
  ext = ".R"
177
  elif "java" in l or ".java" in l:
178
  ext = ".java"
179
+ elif "javascript" in l or ".js" in l:
180
  ext = ".js"
181
  elif "bash" in l or "sh" in l:
182
  ext = ".sh"
 
188
 
189
  def write_docx_from_text(text: str, out_dir: str="/tmp") -> str:
190
  doc = Document()
 
191
  for para in [p.strip() for p in text.split("\n\n") if p.strip()]:
192
  doc.add_paragraph(para)
193
  uid = uuid.uuid4().hex[:10]
 
196
  return filename
197
 
198
  def write_excel_from_tables(maybe_table_text: str, out_dir: str="/tmp") -> str:
 
 
 
 
199
  uid = uuid.uuid4().hex[:10]
200
  filename = os.path.join(out_dir, f"generated_excel_{uid}.xlsx")
201
  try:
 
 
202
  try:
203
  parsed = json.loads(maybe_table_text)
 
204
  if isinstance(parsed, list):
205
  df = pd.DataFrame(parsed)
206
  elif isinstance(parsed, dict):
 
207
  df = pd.DataFrame([parsed])
208
  else:
209
  df = pd.DataFrame({"content":[str(maybe_table_text)]})
210
  except Exception:
 
211
  if "," in maybe_table_text or "\t" in maybe_table_text:
212
  from io import StringIO
213
  df = pd.read_csv(StringIO(maybe_table_text))
 
217
  return filename
218
  except Exception as e:
219
  log.error(f"Excel creation failed: {e}")
 
220
  return write_docx_from_text(f"Failed to create excel. Error: {e}\n\nOriginal:\n{maybe_table_text}", out_dir=out_dir)
221
 
222
  def write_pdf_from_text(text: str, out_dir: str="/tmp") -> str:
 
233
  return filename
234
  except Exception as e:
235
  log.error(f"PDF creation failed: {e}")
 
236
  return write_docx_from_text(f"Failed to create PDF. Error: {e}\n\nOriginal:\n{text}", out_dir=out_dir)
237
 
238
  def build_repo_zip(files_map: Dict[str,str], repo_name: str="generated_app", out_dir: str="/tmp") -> str:
 
 
 
 
239
  uid = uuid.uuid4().hex[:8]
240
  repo_dir = os.path.join(out_dir, f"{repo_name}_{uid}")
241
  os.makedirs(repo_dir, exist_ok=True)
 
245
  if isinstance(content, str) and os.path.exists(content):
246
  shutil.copyfile(content, dest)
247
  else:
 
248
  with open(dest, "w", encoding="utf-8") as fh:
249
  fh.write(str(content))
250
  zip_path = os.path.join(out_dir, f"{repo_name}_{uid}.zip")
 
256
  zf.write(full, arc)
257
  return zip_path
258
 
259
+ # --- Node functions ---
 
 
260
  def run_triage_agent(state: AgentState):
261
  log.info("--- triage ---")
262
  prompt = f"Analyze the user input. Is it a simple conversational greeting or a task? Respond with 'greeting' or 'task'.\n\nUser Input: \"{state.get('userInput','')}\""
263
  response = llm.invoke(prompt)
264
+ content = getattr(response, "content", "") or ""
265
+ if 'greeting' in content.lower():
266
  log.info("Triage result: Simple Greeting.")
267
  return {"draftResponse": "Hello! How can I help you today?", "execution_path": ["Triage Agent"], "status_update": "Responding to greeting."}
268
  else:
 
277
  f"User Request: \"{state.get('userInput','')}\". Respond in JSON with keys: 'plan' (list of strings), 'estimated_llm_calls_per_loop' (integer)."
278
  )
279
  response = llm.invoke(prompt)
280
+ plan_data = parse_json_from_llm(getattr(response, "content", "") or "")
281
  if not plan_data:
282
  return {"pmPlan": {"error": "Failed to create a valid plan."}, "execution_path": path, "status_update": "Error: Could not create a plan."}
283
  calls_per_loop = plan_data.get('estimated_llm_calls_per_loop', 3)
 
311
  path = ensure_list(state, 'execution_path') + ["Intent Agent"]
312
  prompt = (f"Refine the user's request into a clear, actionable 'core objective prompt'.\n\nRelevant Memory:\n{state.get('retrievedMemory')}\n\nUser Request: \"{state.get('userInput','')}\"\n\nCore Objective:")
313
  response = llm.invoke(prompt)
314
+ core_obj = getattr(response, "content", "") or ""
315
  detection = detect_requested_output_types(core_obj or state.get('userInput',''))
316
  extras = {}
317
  if detection.get('requires_artifact'):
 
331
  f"Respond in JSON with keys: 'plan_steps' (list), 'experiment_needed' (bool), 'experiment_type' (optional string), and 'experiment_goal' (str if needed)."
332
  )
333
  response = llm.invoke(prompt)
334
+ plan = parse_json_from_llm(getattr(response, "content", "") or "")
335
  if not plan:
336
  log.warning("PM Agent did not produce JSON — applying heuristic fallback.")
337
  plan = {"plan_steps": ["Analyze files", "Create notebook if requested", "Synthesize answers"], "experiment_needed": False}
338
+ # normalize experiment type
339
+ exp_type_raw = plan.get('experiment_type') or ""
340
+ plan_goal = plan.get('experiment_goal') or state.get('userInput','') or state.get('coreObjectivePrompt','')
341
+ normalized = normalize_experiment_type(exp_type_raw, plan_goal)
342
+ plan['experiment_type'] = normalized
343
+ if plan.get('experiment_needed') and not plan.get('experiment_goal'):
344
+ plan['experiment_goal'] = plan_goal
 
 
 
345
  log.info(f"Generated Plan: Experiment Needed = {plan.get('experiment_needed', False)}, Type = {plan.get('experiment_type')}")
346
  return {"pmPlan": plan, "execution_path": path, "rework_cycles": current_cycles, "status_update": "Breaking down the objective into a detailed plan..."}
347
 
348
+ def _extract_code_blocks(text: str, lang_hint: Optional[str]=None) -> List[str]:
349
+ # prefer specific language fences, fallback to generic fenced blocks
350
+ if lang_hint and "python" in (lang_hint or "").lower():
351
+ blocks = re.findall(r"```python\s*(.*?)\s*```", text, re.DOTALL)
352
+ if blocks:
353
+ return blocks
354
+ blocks = re.findall(r"```(?:\w+)?\s*(.*?)\s*```", text, re.DOTALL)
355
+ return blocks
356
 
357
  def run_experimenter_agent(state: AgentState):
358
  log.info("--- 🔬 Running Experimenter Agent ---")
 
360
  pm = state.get('pmPlan', {}) or {}
361
  if not pm.get('experiment_needed'):
362
  return {"experimentCode": None, "experimentResults": None, "execution_path": path, "status_update": "Proceeding without a code experiment."}
363
+ exp_type = normalize_experiment_type(pm.get('experiment_type'), pm.get('experiment_goal',''))
364
  goal = pm.get('experiment_goal', 'No goal specified.')
365
  response = llm.invoke(
366
  f"Produce content for artifact type '{exp_type}' to achieve: {goal}\n"
367
+ "Return runnable code in fenced code blocks where appropriate, and explanatory text otherwise."
368
  )
369
+ llm_text = getattr(response, "content", "") or ""
370
  out_dir = "/tmp"
371
  results = {"success": False, "paths": {}, "stderr": "", "stdout": ""}
372
  try:
373
  if exp_type == 'notebook':
374
  nb_path = write_notebook_from_text(llm_text, out_dir=out_dir)
375
  results.update({"success": True, "paths": {"notebook": sanitize_path(nb_path)}})
376
+ return {"experimentCode": None, "experimentResults": results, "execution_path": path, "status_update": f"Notebook generated at {nb_path}"}
377
  elif exp_type == 'excel':
378
  excel_path = write_excel_from_tables(llm_text, out_dir=out_dir)
379
  results.update({"success": True, "paths": {"excel": sanitize_path(excel_path)}})
 
387
  results.update({"success": True, "paths": {"pdf": sanitize_path(pdf_path)}})
388
  return {"experimentCode": None, "experimentResults": results, "execution_path": path, "status_update": f"PDF generated at {pdf_path}"}
389
  elif exp_type == 'script':
 
390
  lang_hint = pm.get('experiment_language') or ("python" if ".py" in goal.lower() else None)
391
+ code_blocks = _extract_code_blocks(llm_text, lang_hint)
 
392
  if not code_blocks:
 
393
  code_text = llm_text
394
  else:
395
  code_text = "\n\n# === BLOCK ===\n\n".join(code_blocks)
396
  script_path = write_script(code_text, language_hint=lang_hint, out_dir=out_dir)
 
397
  exec_results = {}
398
  if script_path.endswith(".py"):
399
+ try:
400
+ exec_results = execute_python_code(open(script_path,"r",encoding="utf-8").read())
401
+ except Exception as e:
402
+ exec_results = {"stdout":"","stderr":str(e),"success":False}
403
  results.update({"success": True, "paths": {"script": sanitize_path(script_path)}, "stdout": exec_results.get("stdout",""), "stderr": exec_results.get("stderr","")})
404
  return {"experimentCode": code_text, "experimentResults": results, "execution_path": path, "status_update": f"Script generated at {script_path}"}
405
  elif exp_type == 'repo':
 
 
406
  repo_files = {}
 
407
  readme = (llm_text[:1000] + "\n\n") if llm_text else "Generated repo"
408
  repo_files["README.md"] = readme
 
409
  nb_path = write_notebook_from_text(llm_text, out_dir=out_dir)
410
  repo_files["analysis.ipynb"] = nb_path
 
411
  reqs = "nbformat\npandas\nopenpyxl\npython-docx\nreportlab"
412
  repo_files["requirements.txt"] = reqs
413
  zip_path = build_repo_zip(repo_files, repo_name="generated_app", out_dir=out_dir)
414
  results.update({"success": True, "paths": {"repo_zip": sanitize_path(zip_path)}})
415
  return {"experimentCode": None, "experimentResults": results, "execution_path": path, "status_update": f"Repository ZIP created at {zip_path}"}
416
  else:
417
+ # safe fallback: write docx
418
  fallback = write_docx_from_text(llm_text, out_dir=out_dir)
419
  results.update({"success": True, "paths": {"docx": sanitize_path(fallback)}})
420
  return {"experimentCode": None, "experimentResults": results, "execution_path": path, "status_update": f"Fallback DOCX generated at {fallback}"}
 
444
  f"Plan: {state.get('pmPlan', {}).get('plan_steps')}\n\n{results_summary}\n\nFinal Response:"
445
  )
446
  response = llm.invoke(prompt)
447
+ final_text = getattr(response, "content", "") or ""
448
  if artifact_message:
449
  final_text = final_text + "\n\n" + artifact_message
450
  return {"draftResponse": final_text, "execution_path": path, "status_update": "Putting together the final response..."}
 
455
  prompt = (f"Review the draft response based on the core objective. Respond ONLY with 'APPROVED' or provide concise feedback for rework.\n\n"
456
  f"Core Objective: {state.get('coreObjectivePrompt')}\n\nDraft: {state.get('draftResponse')}")
457
  response = llm.invoke(prompt)
458
+ content = getattr(response, "content", "") or ""
459
+ if "APPROVED" in content.upper():
460
  return {"approved": True, "qaFeedback": None, "execution_path": path, "status_update": "Response approved!"}
461
  else:
462
+ return {"approved": False, "qaFeedback": content or "No specific feedback.", "execution_path": path, "status_update": "Response needs improvement. Reworking..."}
463
 
464
  def run_archivist_agent(state: AgentState):
465
  log.info("--- 💾 Running Archivist Agent ---")
 
467
  summary_prompt = (f"Create a concise summary of this successful task for long-term memory.\n\n"
468
  f"Core Objective: {state.get('coreObjectivePrompt')}\n\nFinal Response: {state.get('draftResponse')}\n\nMemory Summary:")
469
  response = llm.invoke(summary_prompt)
470
+ memory_manager.add_to_memory(getattr(response,"content",""), {"objective": state.get('coreObjectivePrompt')})
471
  return {"execution_path": path, "status_update": "Saving key learnings for future reference..."}
472
 
473
  def run_disclaimer_agent(state: AgentState):
 
494
  pm = state.get('pmPlan', {}) or {}
495
  return "experimenter_agent" if pm.get('experiment_needed') else "synthesis_agent"
496
 
497
+ # --- Build graphs ---
498
  triage_workflow = StateGraph(AgentState)
499
  triage_workflow.add_node("triage", run_triage_agent)
500
  triage_workflow.set_entry_point("triage")