JatinAutonomousLabs commited on
Commit
16ed480
·
verified ·
1 Parent(s): 414b83b

Update graph.py

Browse files
Files changed (1) hide show
  1. graph.py +596 -528
graph.py CHANGED
@@ -1,4 +1,5 @@
1
- # graph.py - Domain-Agnostic Production Implementation
 
2
  import json
3
  import re
4
  import math
@@ -8,6 +9,7 @@ import shutil
8
  import zipfile
9
  import operator
10
  from typing import TypedDict, List, Dict, Optional, Annotated
 
11
  from langchain_openai import ChatOpenAI
12
  from langgraph.graph import StateGraph, END
13
  from memory_manager import memory_manager
@@ -18,13 +20,40 @@ from logging_config import setup_logging, get_logger
18
  import nbformat
19
  from nbformat.v4 import new_notebook, new_markdown_cell, new_code_cell
20
  import pandas as pd
21
- from docx import Document as DocxDocument
22
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
23
  from reportlab.lib.styles import getSampleStyleSheet
24
 
 
25
  OUT_DIR = os.environ.get("OUT_DIR", "/tmp")
26
  os.makedirs(OUT_DIR, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
 
 
 
 
28
  setup_logging()
29
  log = get_logger(__name__)
30
  INITIAL_MAX_REWORK_CYCLES = 3
@@ -32,6 +61,7 @@ GPT4O_INPUT_COST_PER_1K_TOKENS = 0.005
32
  GPT4O_OUTPUT_COST_PER_1K_TOKENS = 0.015
33
  AVG_TOKENS_PER_CALL = 2.0
34
 
 
35
  class AgentState(TypedDict):
36
  userInput: str
37
  chatHistory: List[str]
@@ -47,22 +77,13 @@ class AgentState(TypedDict):
47
  rework_cycles: int
48
  max_loops: int
49
  status_update: str
 
 
 
50
 
51
- llm = ChatOpenAI(model="gpt-4o", temperature=0.1, max_retries=3, request_timeout=60)
52
 
53
- def ensure_list(state, key):
54
- v = state.get(key) if state else None
55
- return [] if v is None else (list(v) if isinstance(v, (list, tuple)) else [v])
56
-
57
- def ensure_int(state, key, default=0):
58
- try:
59
- v = state.get(key) if state else None
60
- return int(v) if v is not None else default
61
- except Exception:
62
- return default
63
-
64
- def sanitize_path(path: str) -> str:
65
- return os.path.abspath(path)
66
 
67
  def parse_json_from_llm(llm_output: str) -> Optional[dict]:
68
  try:
@@ -82,375 +103,149 @@ def parse_json_from_llm(llm_output: str) -> Optional[dict]:
82
  log.error(f"JSON parsing failed: {e}")
83
  return None
84
 
85
- KNOWN_ARTIFACT_TYPES = {"notebook", "excel", "word", "pdf", "repo", "script"}
 
86
 
87
  def detect_requested_output_types(text: str) -> Dict:
88
- """Detect artifact type from user request - domain agnostic."""
89
  if not text:
90
- return {"requires_artifact": False, "artifact_type": None}
91
  t = text.lower()
92
-
93
- # Repository indicators
94
- if any(k in t for k in ["repo", "repository", "application", "app", "codebase", "project", "package"]):
95
- return {"requires_artifact": True, "artifact_type": "repo"}
96
-
97
- # Notebook indicators
98
- if any(k in t for k in ["jupyter", "notebook", "ipynb", "analysis", "exploration"]):
99
- return {"requires_artifact": True, "artifact_type": "notebook"}
100
-
101
- # Document indicators
102
- if any(k in t for k in ["document", "report", "documentation", ".docx", "word"]):
103
- return {"requires_artifact": True, "artifact_type": "word"}
104
-
105
- # Spreadsheet indicators
106
- if any(k in t for k in ["spreadsheet", "excel", ".xlsx", "table", "csv"]):
107
- return {"requires_artifact": True, "artifact_type": "excel"}
108
-
109
- # Script indicators
110
- if any(k in t for k in ["script", ".py", "automation", "tool"]):
111
- return {"requires_artifact": True, "artifact_type": "script"}
112
-
113
- return {"requires_artifact": False, "artifact_type": None}
114
 
115
  def normalize_experiment_type(exp_type: Optional[str], goal_text: str) -> str:
116
  if not exp_type:
117
  detection = detect_requested_output_types(goal_text or "")
118
  return detection.get("artifact_type") or "word"
119
  s = exp_type.strip().lower()
120
- return s if s in KNOWN_ARTIFACT_TYPES else "word"
121
-
122
- def generate_generic_sample_data(format_type: str, num_records: int = 5) -> str:
123
- """Generate domain-neutral sample data."""
124
- if format_type == 'csv':
125
- rows = [f"{i},Item {i},{100 * i},Category {(i % 3) + 1}" for i in range(1, num_records + 1)]
126
- return "id,name,value,category\n" + "\n".join(rows)
127
- else: # json
128
- items = [{"id": i, "name": f"Item {i}", "value": 100 * i, "category": f"Category {(i % 3) + 1}"} for i in range(1, num_records + 1)]
129
- return json.dumps({"data": items, "metadata": {"total": num_records, "generated": True}}, indent=2)
130
-
131
- def generate_default_readme(goal: str) -> str:
132
- return f"""# Generated Application
133
-
134
- ## Overview
135
- {goal}
136
-
137
- ## Quick Start
138
-
139
- ```bash
140
- # Install dependencies
141
- pip install -r requirements.txt
142
-
143
- # Run demonstration
144
- python main.py --demo
145
-
146
- # Run tests
147
- pytest tests/ -v
148
- ```
149
-
150
- ## Project Structure
151
-
152
- ```
153
- .
154
- ├── main.py # Entry point
155
- ├── src/ # Source modules
156
- ├── tests/ # Test suite
157
- ├── data/ # Sample data
158
- ├── docs/ # Documentation
159
- └── requirements.txt # Dependencies
160
- ```
161
-
162
- ## Testing
163
-
164
- ```bash
165
- pytest tests/ -v --cov=src
166
- ```
167
-
168
- ## Documentation
169
-
170
- See docs/ directory for detailed documentation.
171
- """
172
-
173
- def generate_default_requirements() -> str:
174
- return """python-dotenv>=1.0.0
175
- pytest>=7.0.0
176
- pytest-cov>=4.0.0
177
- """
178
-
179
- def generate_default_main() -> str:
180
- return """#!/usr/bin/env python3
181
- \"\"\"Main entry point.\"\"\"
182
- import argparse
183
- import logging
184
 
185
- logging.basicConfig(level=logging.INFO)
186
- logger = logging.getLogger(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
- def run_demo():
189
- \"\"\"Run demonstration.\"\"\"
190
- logger.info("Running demo...")
191
- logger.info("Demo completed successfully")
192
- return 0
 
 
 
 
 
193
 
194
- def main():
195
- parser = argparse.ArgumentParser(description="Application")
196
- parser.add_argument('--demo', action='store_true', help='Run demo mode')
197
- parser.add_argument('--input', type=str, help='Input file')
198
-
199
- args = parser.parse_args()
200
-
201
  try:
202
- if args.demo:
203
- return run_demo()
204
- elif args.input:
205
- logger.info(f"Processing: {args.input}")
206
- return 0
207
- else:
208
- parser.print_help()
209
- return 1
 
 
 
 
 
 
 
 
210
  except Exception as e:
211
- logger.error(f"Error: {e}", exc_info=True)
212
- return 1
213
-
214
- if __name__ == "__main__":
215
- exit(main())
216
- """
217
-
218
- def generate_default_test() -> str:
219
- return """\"\"\"Test suite.\"\"\"
220
- import pytest
221
- from pathlib import Path
222
-
223
- def test_main_imports():
224
- \"\"\"Verify main module imports.\"\"\"
225
- import main
226
- assert hasattr(main, 'main')
227
-
228
- def test_sample_data_exists():
229
- \"\"\"Verify sample data present.\"\"\"
230
- data_path = Path("data/sample_data.csv")
231
- assert data_path.exists(), "Sample data file missing"
232
-
233
- def test_requirements_file():
234
- \"\"\"Verify requirements.txt exists.\"\"\"
235
- assert Path("requirements.txt").exists()
236
- """
237
-
238
- def generate_architecture_doc(goal: str) -> str:
239
- return f"""# Architecture Documentation
240
-
241
- ## System Overview
242
-
243
- **Purpose:** {goal}
244
-
245
- ## Design Principles
246
-
247
- 1. **Modularity**: Clear separation of concerns
248
- 2. **Testability**: Comprehensive test coverage
249
- 3. **Local-First**: No external dependencies for demo
250
- 4. **Documentation**: Self-documenting code
251
-
252
- ## Technology Stack
253
-
254
- - Python 3.8+
255
- - pytest for testing
256
- - Standard library focused
257
-
258
- ## Component Structure
259
-
260
- Components are organized by responsibility with clear interfaces between modules.
261
-
262
- ## Testing Strategy
263
-
264
- - Unit tests for individual functions
265
- - Integration tests for workflows
266
- - Sample data for validation
267
- """
268
-
269
- def generate_evaluation_guide() -> str:
270
- return """# Evaluation Guide
271
-
272
- ## Quick Evaluation (5 minutes)
273
-
274
- ### Step 1: Install
275
- ```bash
276
- pip install -r requirements.txt
277
- ```
278
-
279
- ### Step 2: Run Demo
280
- ```bash
281
- python main.py --demo
282
- ```
283
-
284
- ### Step 3: Run Tests
285
- ```bash
286
- pytest tests/ -v
287
- ```
288
-
289
- ## Quality Checklist
290
-
291
- - [ ] All dependencies install successfully
292
- - [ ] Demo runs without errors
293
- - [ ] All tests pass
294
- - [ ] Sample data is present
295
- - [ ] Documentation is complete
296
-
297
- ## Expected Results
298
-
299
- - Demo should complete successfully
300
- - Tests should show 100% pass rate
301
- - Output files should be generated
302
- """
303
-
304
- def generate_caveats_doc() -> str:
305
- return """# Caveats and Limitations
306
-
307
- ## General Limitations
308
-
309
- - Sample data for demonstration only
310
- - Not optimized for production scale
311
- - Manual review recommended for critical use
312
-
313
- ## Scalability Considerations
314
-
315
- Current implementation suitable for:
316
- - Development and testing
317
- - Small to medium datasets
318
- - Single-machine execution
319
-
320
- For production scale:
321
- - Consider distributed processing
322
- - Implement proper monitoring
323
- - Add comprehensive error handling
324
 
325
- ## Best Practices
326
-
327
- 1. Test on small dataset first
328
- 2. Implement proper logging
329
- 3. Regular quality audits
330
- 4. Maintain documentation
331
- """
332
-
333
- def validate_repository_quality(repo_files: Dict[str, str]) -> Dict:
334
- """Domain-agnostic quality validation."""
335
- score = 0
336
- issues = []
337
- warnings = []
338
- successes = []
339
-
340
- # Critical files (30 points)
341
- critical = {'README.md': 10, 'requirements.txt': 8, 'main.py': 12}
342
- for filename, points in critical.items():
343
- if any(filename in path for path in repo_files.keys()):
344
- score += points
345
- successes.append(f"✓ {filename}")
346
- else:
347
- issues.append(f"Missing {filename} (-{points})")
348
-
349
- # Documentation (20 points)
350
- doc_files = {'ARCHITECTURE.md': 7, 'EVALUATION_GUIDE.md': 7, 'CAVEATS.md': 6}
351
- for filename, points in doc_files.items():
352
- if any(filename in path for path in repo_files.keys()):
353
- score += points
354
- successes.append(f"✓ {filename}")
355
- else:
356
- warnings.append(f"Missing {filename}")
357
-
358
- # Sample data (15 points)
359
- has_data = any('data/' in f or 'sample' in f.lower() for f in repo_files.keys())
360
- if has_data:
361
- score += 15
362
- successes.append("✓ Sample data")
363
- else:
364
- issues.append("No sample data (-15)")
365
-
366
- # Tests (15 points)
367
- test_files = [f for f in repo_files.keys() if 'test' in f.lower()]
368
- if test_files:
369
- score += 15
370
- successes.append(f"✓ {len(test_files)} test(s)")
371
- else:
372
- issues.append("No tests (-15)")
373
-
374
- # Code quality (20 points)
375
- all_code = "\n".join(str(v) for v in repo_files.values())
376
-
377
- if 'def ' in all_code or 'class ' in all_code:
378
- score += 5
379
- if '"""' in all_code:
380
- score += 3
381
- if 'logging' in all_code.lower():
382
- score += 3
383
- if 'try:' in all_code and 'except' in all_code:
384
- score += 4
385
- if '->' in all_code:
386
- score += 3
387
- if '.env' in all_code.lower() or 'dotenv' in all_code.lower():
388
- score += 2
389
-
390
- # Anti-patterns (penalties)
391
- if '# TODO' in all_code or '#TODO' in all_code:
392
- penalty = min(10, all_code.count('# TODO') * 2)
393
- score -= penalty
394
- issues.append(f"TODO placeholders (-{penalty})")
395
-
396
- if 'example.com' in all_code.lower():
397
- score -= 5
398
- issues.append("Dummy URLs (-5)")
399
-
400
- if 'pass # implement' in all_code:
401
- score -= 5
402
- issues.append("Placeholder code (-5)")
403
-
404
- # Wrong file types
405
- doc_in_py = [f for f in repo_files.keys() if f.endswith('.py') and any(
406
- kw in f.lower() for kw in ['readme', 'doc', 'guide', 'architecture']
407
- )]
408
- if doc_in_py:
409
- score -= 5
410
- issues.append(f"Docs in .py files (-5)")
411
-
412
- final_score = max(0, min(100, score))
413
-
414
- if final_score >= 90:
415
- grade = "A"
416
- elif final_score >= 80:
417
- grade = "B"
418
- elif final_score >= 70:
419
- grade = "C"
420
- elif final_score >= 60:
421
- grade = "D"
422
- else:
423
- grade = "F"
424
-
425
- return {
426
- "score": final_score,
427
- "grade": grade,
428
- "issues": issues,
429
- "warnings": warnings,
430
- "successes": successes,
431
- "file_count": len(repo_files),
432
- "has_tests": bool(test_files),
433
- "has_data": has_data
434
- }
435
-
436
- def extract_files_from_llm(llm_text: str) -> Dict[str, str]:
437
- """Extract files from LLM response."""
438
- files = {}
439
-
440
- # Pattern: ### filename
441
- for match in re.finditer(r"###\s+([\w\/_\-\.]+)\s*\n```(?:\w+)?\s*\n(.*?)\n```", llm_text, re.DOTALL):
442
- files[match.group(1).strip()] = match.group(2).strip()
443
-
444
- # Pattern: **filename**
445
- for match in re.finditer(r"\*\*([\w\/_\-\.]+)\*\*\s*\n```(?:\w+)?\s*\n(.*?)\n```", llm_text, re.DOTALL):
446
- fp = match.group(1).strip()
447
- if fp not in files:
448
- files[fp] = match.group(2).strip()
449
-
450
- return files
451
 
452
  def build_repo_zip(files_map: Dict[str,str], repo_name: str="generated_app", out_dir: Optional[str]=None) -> str:
453
- """Build repository ZIP file."""
454
  out_dir = out_dir or OUT_DIR
455
  os.makedirs(out_dir, exist_ok=True)
456
  uid = uuid.uuid4().hex[:8]
@@ -460,8 +255,12 @@ def build_repo_zip(files_map: Dict[str,str], repo_name: str="generated_app", out
460
  for rel_path, content in files_map.items():
461
  dest = os.path.join(repo_dir, rel_path)
462
  os.makedirs(os.path.dirname(dest), exist_ok=True)
463
- with open(dest, "w", encoding="utf-8") as fh:
464
- fh.write(str(content))
 
 
 
 
465
 
466
  zip_path = os.path.join(out_dir, f"{repo_name}_{uid}.zip")
467
  with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
@@ -473,93 +272,91 @@ def build_repo_zip(files_map: Dict[str,str], repo_name: str="generated_app", out
473
 
474
  return zip_path
475
 
476
- def write_notebook_from_text(llm_text: str, out_dir: Optional[str]=None) -> str:
477
- out_dir = out_dir or OUT_DIR
478
- os.makedirs(out_dir, exist_ok=True)
479
- nb = new_notebook()
480
- nb['cells'] = [new_markdown_cell("# Generated Notebook\n\nContent generated by AI system.")]
481
- uid = uuid.uuid4().hex[:10]
482
- filename = os.path.join(out_dir, f"notebook_{uid}.ipynb")
483
- nbformat.write(nb, filename)
484
- return filename
485
-
486
- def write_docx_from_text(text: str, out_dir: Optional[str]=None) -> str:
487
- out_dir = out_dir or OUT_DIR
488
- os.makedirs(out_dir, exist_ok=True)
489
- doc = DocxDocument()
490
- for para in [p.strip() for p in text.split("\n\n") if p.strip()]:
491
- doc.add_paragraph(para)
492
- uid = uuid.uuid4().hex[:10]
493
- filename = os.path.join(out_dir, f"document_{uid}.docx")
494
- doc.save(filename)
495
- return filename
496
-
497
- # ===================================================================
498
- # AGENTS
499
- # ===================================================================
500
-
501
  def run_triage_agent(state: AgentState):
502
  log.info("--- TRIAGE ---")
503
- prompt = f"Is this a greeting or task request? '{state.get('userInput','')}' Respond: 'greeting' or 'task'"
504
  response = llm.invoke(prompt)
505
  content = getattr(response, "content", "") or ""
506
  if 'greeting' in content.lower():
507
- return {"draftResponse": "Hello! How can I assist you today?", "execution_path": ["Triage"], "status_update": "Greeting"}
508
- return {"execution_path": ["Triage"], "status_update": "Task"}
509
 
510
  def run_planner_agent(state: AgentState):
511
  log.info("--- PLANNER ---")
512
  path = ensure_list(state, 'execution_path') + ["Planner"]
513
- prompt = f"Create execution plan for: '{state.get('userInput','')}'. JSON with 'plan' (list), 'estimated_llm_calls_per_loop' (int)"
514
  response = llm.invoke(prompt)
515
  plan_data = parse_json_from_llm(getattr(response, "content", "") or "")
516
-
517
  if not plan_data:
518
- return {"pmPlan": {"error": "Planning failed"}, "execution_path": path}
519
 
520
  calls = plan_data.get('estimated_llm_calls_per_loop', 3)
521
- cost = calls * AVG_TOKENS_PER_CALL * (GPT4O_INPUT_COST_PER_1K_TOKENS + GPT4O_OUTPUT_COST_PER_1K_TOKENS) / 2
522
- plan_data['estimated_cost_usd'] = round(cost * (INITIAL_MAX_REWORK_CYCLES + 1), 2)
 
 
523
 
524
  detection = detect_requested_output_types(state.get('userInput',''))
525
  if detection.get('requires_artifact'):
526
- plan_data['experiment_needed'] = True
527
- plan_data['experiment_type'] = detection.get('artifact_type')
 
528
 
529
- return {"pmPlan": plan_data, "execution_path": path, "status_update": "Planned"}
530
 
531
  def run_memory_retrieval(state: AgentState):
532
  log.info("--- MEMORY ---")
533
  path = ensure_list(state, 'execution_path') + ["Memory"]
534
  mems = memory_manager.retrieve_relevant_memories(state.get('userInput',''))
535
- context = "\n".join([m.page_content for m in mems]) if mems else "No prior context"
536
- return {"retrievedMemory": context, "execution_path": path, "status_update": "Retrieved"}
537
 
538
  def run_intent_agent(state: AgentState):
539
  log.info("--- INTENT ---")
540
  path = ensure_list(state, 'execution_path') + ["Intent"]
541
- prompt = f"Clarify core objective.\n\nContext: {state.get('retrievedMemory', '')}\n\nRequest: {state.get('userInput','')}\n\nObjective:"
542
  response = llm.invoke(prompt)
543
- return {"coreObjectivePrompt": getattr(response, "content", ""), "execution_path": path, "status_update": "Clarified"}
 
544
 
545
  def run_pm_agent(state: AgentState):
546
  log.info("--- PM ---")
547
  current_cycles = ensure_int(state, 'rework_cycles', 0) + 1
 
548
  path = ensure_list(state, 'execution_path') + ["PM"]
549
 
550
- context = f"User Request: {state.get('userInput', '')}\n\nObjective: {state.get('coreObjectivePrompt', '')}"
 
 
 
 
 
551
  if state.get('qaFeedback'):
552
- context += f"\n\nQA Feedback: {state.get('qaFeedback')}"
 
 
 
553
 
554
- prompt = f"""{context}
 
 
555
 
556
- Create detailed execution plan. JSON format:
 
 
 
 
 
557
  {{
558
- "plan_steps": ["Specific step 1", "Specific step 2"],
559
  "experiment_needed": true/false,
560
- "experiment_type": "repo|notebook|word|excel|script",
561
- "experiment_goal": "What to create"
562
- }}"""
 
 
 
563
 
564
  response = llm.invoke(prompt)
565
  plan = parse_json_from_llm(getattr(response, "content", "") or "")
@@ -567,14 +364,26 @@ Create detailed execution plan. JSON format:
567
  if not plan:
568
  detection = detect_requested_output_types(state.get('userInput', ''))
569
  plan = {
570
- "plan_steps": ["Analyze requirements", "Implement solution", "Validate output"],
571
  "experiment_needed": detection.get('requires_artifact', False),
572
- "experiment_type": detection.get('artifact_type', 'word')
 
573
  }
574
 
575
- plan['experiment_type'] = normalize_experiment_type(plan.get('experiment_type'), plan.get('experiment_goal', ''))
 
576
 
577
- return {"pmPlan": plan, "execution_path": path, "rework_cycles": current_cycles, "status_update": "Planned"}
 
 
 
 
 
 
 
 
 
 
578
 
579
  def run_experimenter_agent(state: AgentState):
580
  log.info("--- EXPERIMENTER ---")
@@ -582,140 +391,400 @@ def run_experimenter_agent(state: AgentState):
582
  pm = state.get('pmPlan', {}) or {}
583
 
584
  if not pm.get('experiment_needed'):
585
- return {"experimentCode": None, "experimentResults": None, "execution_path": path, "status_update": "Skipped"}
586
 
587
  exp_type = normalize_experiment_type(pm.get('experiment_type'), pm.get('experiment_goal',''))
588
- goal = pm.get('experiment_goal', state.get('userInput', ''))
 
 
 
 
 
 
 
 
 
 
 
589
 
 
 
 
 
 
 
590
  if exp_type == 'repo':
591
- repo_prompt = f"""Create complete, functional repository.
592
-
593
- Goal: {goal}
594
-
595
- Requirements:
596
- - README.md with usage instructions
597
- - requirements.txt with dependencies
598
- - main.py entry point with --demo flag
599
- - tests/ directory with pytest tests
600
- - data/ directory with sample data
601
- - docs/ with ARCHITECTURE.md, EVALUATION_GUIDE.md, CAVEATS.md
602
- - .env.example for configuration
603
- - NO TODO comments or placeholders
604
- - NO example.com URLs
605
- - Documentation in .md files (not .py)
606
-
607
- Format each file:
608
- ### path/to/file.ext
609
- ```language
610
- [complete code]
611
- ```"""
612
-
613
- response = llm.invoke(repo_prompt)
614
- llm_text = getattr(response, "content", "") or ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
615
 
616
- repo_files = extract_files_from_llm(llm_text)
 
 
 
617
 
618
- # Ensure critical files
619
- if not any('README' in f.upper() for f in repo_files.keys()):
620
- repo_files['README.md'] = generate_default_readme(goal)
621
- if 'requirements.txt' not in repo_files:
622
- repo_files['requirements.txt'] = generate_default_requirements()
623
- if not any('main.py' in f for f in repo_files.keys()):
624
- repo_files['main.py'] = generate_default_main()
625
- if not any('test' in f.lower() for f in repo_files.keys()):
626
- repo_files['tests/test_main.py'] = generate_default_test()
627
- if not any('data/' in f or 'sample' in f.lower() for f in repo_files.keys()):
628
- repo_files['data/sample_data.csv'] = generate_generic_sample_data('csv')
629
- if not any('ARCHITECTURE' in f.upper() for f in repo_files.keys()):
630
- repo_files['docs/ARCHITECTURE.md'] = generate_architecture_doc(goal)
631
- if not any('EVALUATION' in f.upper() for f in repo_files.keys()):
632
- repo_files['docs/EVALUATION_GUIDE.md'] = generate_evaluation_guide()
633
- if not any('CAVEAT' in f.upper() for f in repo_files.keys()):
634
- repo_files['docs/CAVEATS.md'] = generate_caveats_doc()
635
 
636
- zip_path = build_repo_zip(repo_files)
637
- validation = validate_repository_quality(repo_files)
 
 
638
 
639
- results = {
640
- "success": True,
641
- "paths": {"repo_zip": sanitize_path(zip_path)},
642
- "files_created": len(repo_files),
643
- "validation": validation
644
- }
645
 
646
- return {"experimentCode": None, "experimentResults": results, "execution_path": path, "status_update": f"Repository ({validation['score']}/100)"}
647
-
648
- elif exp_type == 'notebook':
649
- nb_path = write_notebook_from_text("", out_dir=OUT_DIR)
650
- return {"experimentCode": None, "experimentResults": {"success": True, "paths": {"notebook": sanitize_path(nb_path)}}, "execution_path": path, "status_update": "Notebook created"}
 
 
 
 
 
 
 
 
 
 
 
651
 
652
- else:
653
- doc_path = write_docx_from_text("Generated content", out_dir=OUT_DIR)
654
- return {"experimentCode": None, "experimentResults": {"success": True, "paths": {"docx": sanitize_path(doc_path)}}, "execution_path": path, "status_update": "Document created"}
655
-
 
 
 
 
 
656
  def run_synthesis_agent(state: AgentState):
657
  log.info("--- SYNTHESIS ---")
658
- path = ensure_list(state, 'execution_path') + ["Synthesis"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
659
 
660
- exp_results = state.get('experimentResults', {})
661
- validation = exp_results.get('validation', {})
 
 
 
 
 
 
662
 
663
- context = f"Request: {state.get('userInput', '')}\n\nObjective: {state.get('coreObjectivePrompt', '')}"
664
- if validation:
665
- context += f"\n\nQuality: {validation.get('score', 0)}/100 (Grade {validation.get('grade', 'N/A')})"
666
 
667
- prompt = f"""{context}
 
668
 
669
- Create comprehensive final response explaining what was delivered and how to use it."""
670
-
671
- response = llm.invoke(prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
672
  final_text = getattr(response, "content", "") or ""
673
-
674
- # Add artifacts
675
- if exp_results and exp_results.get("paths"):
676
- artifacts = [f"- {k.title()}: {os.path.basename(v)}" for k, v in exp_results["paths"].items()]
677
- final_text += "\n\n**Generated Artifacts:**\n" + "\n".join(artifacts)
678
-
679
- # Add quality note
680
- if validation.get('score', 100) < 70:
681
- final_text += f"\n\n⚠️ Quality: {validation['score']}/100 - Review recommended"
682
-
683
- return {"draftResponse": final_text, "execution_path": path, "status_update": "Synthesized"}
684
 
685
  def run_qa_agent(state: AgentState):
686
  log.info("--- QA ---")
687
  path = ensure_list(state, 'execution_path') + ["QA"]
688
-
689
- exp_results = state.get('experimentResults', {})
690
- validation = exp_results.get('validation', {})
691
-
692
- if validation.get('score', 100) < 70:
693
- issues_str = ', '.join(validation.get('issues', [])[:3])
694
- feedback = f"Quality insufficient ({validation['score']}/100). Issues: {issues_str}"
695
- return {"approved": False, "qaFeedback": feedback, "execution_path": path, "status_update": "Failed QA"}
696
-
697
- prompt = f"Review response: {state.get('draftResponse', '')[:300]}... Respond 'APPROVED' or provide feedback"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
698
  response = llm.invoke(prompt)
699
  content = getattr(response, "content", "") or ""
700
-
701
  if "APPROVED" in content.upper():
702
  return {"approved": True, "qaFeedback": None, "execution_path": path, "status_update": "Approved"}
703
- return {"approved": False, "qaFeedback": content, "execution_path": path, "status_update": "Revision needed"}
 
 
 
 
 
704
 
705
  def run_archivist_agent(state: AgentState):
706
  log.info("--- ARCHIVIST ---")
707
  path = ensure_list(state, 'execution_path') + ["Archivist"]
708
- summary = state.get('coreObjectivePrompt', '')[:200]
709
- memory_manager.add_to_memory(summary, {"objective": state.get('coreObjectivePrompt')})
710
- return {"execution_path": path, "status_update": "Archived"}
 
 
 
711
 
712
  def run_disclaimer_agent(state: AgentState):
713
  log.warning("--- DISCLAIMER ---")
714
  path = ensure_list(state, 'execution_path') + ["Disclaimer"]
715
- disclaimer = "**Budget limit reached. Response may be incomplete.**\n\n"
716
- return {"draftResponse": disclaimer + state.get('draftResponse', ""), "execution_path": path, "status_update": "Limited"}
 
 
 
 
717
 
718
  def should_continue(state: AgentState):
 
 
 
719
  if state.get("approved"):
720
  return "archivist_agent"
721
  if ensure_int(state, "rework_cycles", 0) > ensure_int(state, "max_loops", 0):
@@ -726,10 +795,7 @@ def should_run_experiment(state: AgentState):
726
  pm = state.get('pmPlan', {}) or {}
727
  return "experimenter_agent" if pm.get('experiment_needed') else "synthesis_agent"
728
 
729
- # ===================================================================
730
- # WORKFLOW GRAPHS
731
- # ===================================================================
732
-
733
  triage_workflow = StateGraph(AgentState)
734
  triage_workflow.add_node("triage", run_triage_agent)
735
  triage_workflow.set_entry_point("triage")
@@ -762,9 +828,11 @@ main_workflow.add_edge("disclaimer_agent", END)
762
 
763
  main_workflow.add_conditional_edges("pm_agent", should_run_experiment)
764
  main_workflow.add_conditional_edges("qa_agent", should_continue, {
765
- "archivist_agent": "archivist_agent",
766
- "pm_agent": "pm_agent",
767
- "disclaimer_agent": "disclaimer_agent"
768
  })
769
 
770
- main_app = main_workflow.compile()
 
 
 
1
+ # graph.py - Enhanced with better loop control and cost tracking
2
+
3
  import json
4
  import re
5
  import math
 
9
  import zipfile
10
  import operator
11
  from typing import TypedDict, List, Dict, Optional, Annotated
12
+ from datetime import datetime
13
  from langchain_openai import ChatOpenAI
14
  from langgraph.graph import StateGraph, END
15
  from memory_manager import memory_manager
 
20
  import nbformat
21
  from nbformat.v4 import new_notebook, new_markdown_cell, new_code_cell
22
  import pandas as pd
23
+ from docx import Document
24
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
25
  from reportlab.lib.styles import getSampleStyleSheet
26
 
27
+ # --- Configuration ---
28
  OUT_DIR = os.environ.get("OUT_DIR", "/tmp")
29
  os.makedirs(OUT_DIR, exist_ok=True)
30
+ EXPORTS_DIR = os.path.join(OUT_DIR, "exports")
31
+ os.makedirs(EXPORTS_DIR, exist_ok=True)
32
+
33
+ # --- Helpers ---
34
+ def ensure_list(state, key):
35
+ v = state.get(key) if state else None
36
+ if v is None:
37
+ return []
38
+ if isinstance(v, list):
39
+ return v
40
+ if isinstance(v, tuple):
41
+ return list(v)
42
+ return [v]
43
+
44
+ def ensure_int(state, key, default=0):
45
+ try:
46
+ v = state.get(key) if state else None
47
+ if v is None:
48
+ return default
49
+ return int(v)
50
+ except Exception:
51
+ return default
52
 
53
+ def sanitize_path(path: str) -> str:
54
+ return os.path.abspath(path)
55
+
56
+ # --- Setup ---
57
  setup_logging()
58
  log = get_logger(__name__)
59
  INITIAL_MAX_REWORK_CYCLES = 3
 
61
  GPT4O_OUTPUT_COST_PER_1K_TOKENS = 0.015
62
  AVG_TOKENS_PER_CALL = 2.0
63
 
64
+ # --- State ---
65
  class AgentState(TypedDict):
66
  userInput: str
67
  chatHistory: List[str]
 
77
  rework_cycles: int
78
  max_loops: int
79
  status_update: str
80
+ # NEW: For real-time cost tracking
81
+ current_cost: float
82
+ budget_exceeded: bool
83
 
 
84
 
85
+ # --- LLM ---
86
+ llm = ChatOpenAI(model="gpt-4o", temperature=0.1, max_retries=3, request_timeout=60)
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  def parse_json_from_llm(llm_output: str) -> Optional[dict]:
89
  try:
 
103
  log.error(f"JSON parsing failed: {e}")
104
  return None
105
 
106
+ # --- Artifact detection ---
107
+ KNOWN_ARTIFACT_TYPES = {"notebook","excel","word","pdf","image","repo","script"}
108
 
109
  def detect_requested_output_types(text: str) -> Dict:
 
110
  if not text:
111
+ return {"requires_artifact": False, "artifact_type": None, "artifact_hint": None}
112
  t = text.lower()
113
+ if any(k in t for k in ["jupyter notebook", "jupyter", "notebook", "ipynb"]):
114
+ return {"requires_artifact": True, "artifact_type": "notebook", "artifact_hint": "jupyter notebook"}
115
+ if any(k in t for k in ["excel", ".xlsx", "spreadsheet", "csv"]):
116
+ return {"requires_artifact": True, "artifact_type": "excel", "artifact_hint": "Excel file"}
117
+ if any(k in t for k in ["word document", ".docx", "docx"]):
118
+ return {"requires_artifact": True, "artifact_type": "word", "artifact_hint": "Word document"}
119
+ if any(k in t for k in ["pdf", "pdf file"]):
120
+ return {"requires_artifact": True, "artifact_type": "pdf", "artifact_hint": "PDF document"}
121
+ if any(k in t for k in ["repo", "repository", "app repo", "backend", "codebase"]):
122
+ return {"requires_artifact": True, "artifact_type": "repo", "artifact_hint": "application repository"}
123
+ if any(k in t for k in [".py", "python script", "script"]):
124
+ return {"requires_artifact": True, "artifact_type": "script", "artifact_hint": "Python script"}
125
+ return {"requires_artifact": False, "artifact_type": None, "artifact_hint": None}
 
 
 
 
 
 
 
 
 
126
 
127
  def normalize_experiment_type(exp_type: Optional[str], goal_text: str) -> str:
128
  if not exp_type:
129
  detection = detect_requested_output_types(goal_text or "")
130
  return detection.get("artifact_type") or "word"
131
  s = exp_type.strip().lower()
132
+ if s in KNOWN_ARTIFACT_TYPES:
133
+ return s
134
+ if "notebook" in s or "ipynb" in s:
135
+ return "notebook"
136
+ if "excel" in s or "xlsx" in s:
137
+ return "excel"
138
+ if "word" in s or "docx" in s:
139
+ return "word"
140
+ if "pdf" in s:
141
+ return "pdf"
142
+ if "repo" in s or "repository" in s or "backend" in s:
143
+ return "repo"
144
+ if "script" in s or "python" in s:
145
+ return "script"
146
+ detection = detect_requested_output_types(goal_text or "")
147
+ return detection.get("artifact_type") or "word"
148
+
149
+ # --- Artifact builders ---
150
+ def write_notebook_from_text(llm_text: str, out_dir: Optional[str]=None) -> str:
151
+ out_dir = out_dir or OUT_DIR
152
+ os.makedirs(out_dir, exist_ok=True)
153
+ code_blocks = re.findall(r"```python\s*(.*?)\s*```", llm_text, re.DOTALL)
154
+ if not code_blocks:
155
+ code_blocks = re.findall(r"```\s*(.*?)\s*```", llm_text, re.DOTALL)
156
+ md_parts = re.split(r"```(?:python)?\s*.*?\s*```", llm_text, flags=re.DOTALL)
157
+ nb = new_notebook()
158
+ cells = []
159
+ max_len = max(len(md_parts), len(code_blocks))
160
+ for i in range(max_len):
161
+ if i < len(md_parts) and md_parts[i].strip():
162
+ cells.append(new_markdown_cell(md_parts[i].strip()))
163
+ if i < len(code_blocks) and code_blocks[i].strip():
164
+ cells.append(new_code_cell(code_blocks[i].strip()))
165
+ if not cells:
166
+ cells = [new_markdown_cell("# Notebook\n\nNo content generated.")]
167
+ nb['cells'] = cells
168
+ uid = uuid.uuid4().hex[:10]
169
+ filename = os.path.join(out_dir, f"generated_notebook_{uid}.ipynb")
170
+ nbformat.write(nb, filename)
171
+ return filename
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
+ def write_script(code_text: str, language_hint: Optional[str]=None, out_dir: Optional[str]=None) -> str:
174
+ out_dir = out_dir or OUT_DIR
175
+ os.makedirs(out_dir, exist_ok=True)
176
+ ext = ".txt"
177
+ if language_hint:
178
+ l = language_hint.lower()
179
+ if "python" in l:
180
+ ext = ".py"
181
+ elif "r" in l:
182
+ ext = ".R"
183
+ elif "java" in l:
184
+ ext = ".java"
185
+ elif "javascript" in l:
186
+ ext = ".js"
187
+ uid = uuid.uuid4().hex[:10]
188
+ filename = os.path.join(out_dir, f"generated_script_{uid}{ext}")
189
+ with open(filename, "w", encoding="utf-8") as f:
190
+ f.write(code_text)
191
+ return filename
192
 
193
+ def write_docx_from_text(text: str, out_dir: Optional[str]=None) -> str:
194
+ out_dir = out_dir or OUT_DIR
195
+ os.makedirs(out_dir, exist_ok=True)
196
+ doc = Document()
197
+ for para in [p.strip() for p in text.split("\n\n") if p.strip()]:
198
+ doc.add_paragraph(para)
199
+ uid = uuid.uuid4().hex[:10]
200
+ filename = os.path.join(out_dir, f"generated_doc_{uid}.docx")
201
+ doc.save(filename)
202
+ return filename
203
 
204
+ def write_excel_from_tables(maybe_table_text: str, out_dir: Optional[str]=None) -> str:
205
+ out_dir = out_dir or OUT_DIR
206
+ os.makedirs(out_dir, exist_ok=True)
207
+ uid = uuid.uuid4().hex[:10]
208
+ filename = os.path.join(out_dir, f"generated_excel_{uid}.xlsx")
 
 
209
  try:
210
+ try:
211
+ parsed = json.loads(maybe_table_text)
212
+ if isinstance(parsed, list):
213
+ df = pd.DataFrame(parsed)
214
+ elif isinstance(parsed, dict):
215
+ df = pd.DataFrame([parsed])
216
+ else:
217
+ df = pd.DataFrame({"content":[str(maybe_table_text)]})
218
+ except Exception:
219
+ if "," in maybe_table_text:
220
+ from io import StringIO
221
+ df = pd.read_csv(StringIO(maybe_table_text))
222
+ else:
223
+ df = pd.DataFrame({"content":[maybe_table_text]})
224
+ df.to_excel(filename, index=False, engine="openpyxl")
225
+ return filename
226
  except Exception as e:
227
+ log.error(f"Excel creation failed: {e}")
228
+ return write_docx_from_text(f"Excel error: {e}\n\n{maybe_table_text}", out_dir=out_dir)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
 
230
+ def write_pdf_from_text(text: str, out_dir: Optional[str]=None) -> str:
231
+ out_dir = out_dir or OUT_DIR
232
+ os.makedirs(out_dir, exist_ok=True)
233
+ uid = uuid.uuid4().hex[:10]
234
+ filename = os.path.join(out_dir, f"generated_doc_{uid}.pdf")
235
+ try:
236
+ doc = SimpleDocTemplate(filename)
237
+ styles = getSampleStyleSheet()
238
+ flowables = []
239
+ for para in [p.strip() for p in text.split("\n\n") if p.strip()]:
240
+ flowables.append(Paragraph(para.replace("\n","<br/>"), styles["Normal"]))
241
+ flowables.append(Spacer(1, 8))
242
+ doc.build(flowables)
243
+ return filename
244
+ except Exception as e:
245
+ log.error(f"PDF creation failed: {e}")
246
+ return write_docx_from_text(f"PDF error: {e}\n\n{text}", out_dir=out_dir)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
 
248
  def build_repo_zip(files_map: Dict[str,str], repo_name: str="generated_app", out_dir: Optional[str]=None) -> str:
 
249
  out_dir = out_dir or OUT_DIR
250
  os.makedirs(out_dir, exist_ok=True)
251
  uid = uuid.uuid4().hex[:8]
 
255
  for rel_path, content in files_map.items():
256
  dest = os.path.join(repo_dir, rel_path)
257
  os.makedirs(os.path.dirname(dest), exist_ok=True)
258
+
259
+ if isinstance(content, str) and os.path.exists(content):
260
+ shutil.copyfile(content, dest)
261
+ else:
262
+ with open(dest, "w", encoding="utf-8") as fh:
263
+ fh.write(str(content))
264
 
265
  zip_path = os.path.join(out_dir, f"{repo_name}_{uid}.zip")
266
  with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
 
272
 
273
  return zip_path
274
 
275
+ # --- Nodes ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  def run_triage_agent(state: AgentState):
277
  log.info("--- TRIAGE ---")
278
+ prompt = f"Is this a greeting or a task? '{state.get('userInput','')}' Reply: 'greeting' or 'task'"
279
  response = llm.invoke(prompt)
280
  content = getattr(response, "content", "") or ""
281
  if 'greeting' in content.lower():
282
+ return {"draftResponse": "Hello! How can I help?", "execution_path": ["Triage"], "status_update": "Greeting"}
283
+ return {"execution_path": ["Triage"], "status_update": "Task detected"}
284
 
285
  def run_planner_agent(state: AgentState):
286
  log.info("--- PLANNER ---")
287
  path = ensure_list(state, 'execution_path') + ["Planner"]
288
+ prompt = f"Create a plan for: '{state.get('userInput','')}'. JSON with 'plan' (list), 'estimated_llm_calls_per_loop' (int)"
289
  response = llm.invoke(prompt)
290
  plan_data = parse_json_from_llm(getattr(response, "content", "") or "")
 
291
  if not plan_data:
292
+ return {"pmPlan": {"error": "Planning failed"}, "execution_path": path, "status_update": "Error"}
293
 
294
  calls = plan_data.get('estimated_llm_calls_per_loop', 3)
295
+ cost_per_loop = (calls * AVG_TOKENS_PER_CALL) * ((GPT4O_INPUT_COST_PER_1K_TOKENS + GPT4O_OUTPUT_COST_PER_1K_TOKENS) / 2)
296
+ plan_data['max_loops_initial'] = INITIAL_MAX_REWORK_CYCLES
297
+ plan_data['estimated_cost_usd'] = round(cost_per_loop * (INITIAL_MAX_REWORK_CYCLES + 1), 2)
298
+ plan_data['cost_per_loop_usd'] = max(0.01, round(cost_per_loop, 3))
299
 
300
  detection = detect_requested_output_types(state.get('userInput',''))
301
  if detection.get('requires_artifact'):
302
+ plan_data.setdefault('experiment_needed', True)
303
+ plan_data.setdefault('experiment_type', detection.get('artifact_type'))
304
+ plan_data.setdefault('experiment_goal', state.get('userInput',''))
305
 
306
+ return {"pmPlan": plan_data, "execution_path": path, "status_update": "Plan created"}
307
 
308
  def run_memory_retrieval(state: AgentState):
309
  log.info("--- MEMORY ---")
310
  path = ensure_list(state, 'execution_path') + ["Memory"]
311
  mems = memory_manager.retrieve_relevant_memories(state.get('userInput',''))
312
+ context = "\n".join([f"Memory: {m.page_content}" for m in mems]) if mems else "No memories"
313
+ return {"retrievedMemory": context, "execution_path": path, "status_update": "Memory retrieved"}
314
 
315
  def run_intent_agent(state: AgentState):
316
  log.info("--- INTENT ---")
317
  path = ensure_list(state, 'execution_path') + ["Intent"]
318
+ prompt = f"Refine into clear objective.\n\nMemory: {state.get('retrievedMemory')}\n\nRequest: {state.get('userInput','')}\n\nCore Objective:"
319
  response = llm.invoke(prompt)
320
+ core_obj = getattr(response, "content", "") or ""
321
+ return {"coreObjectivePrompt": core_obj, "execution_path": path, "status_update": "Objective clarified"}
322
 
323
  def run_pm_agent(state: AgentState):
324
  log.info("--- PM ---")
325
  current_cycles = ensure_int(state, 'rework_cycles', 0) + 1
326
+ max_loops_val = ensure_int(state, 'max_loops', 0)
327
  path = ensure_list(state, 'execution_path') + ["PM"]
328
 
329
+ context_parts = [
330
+ f"=== USER REQUEST ===\n{state.get('userInput', '')}",
331
+ f"\n=== OBJECTIVE ===\n{state.get('coreObjectivePrompt', '')}",
332
+ f"\n=== MEMORY ===\n{state.get('retrievedMemory', 'None')}",
333
+ ]
334
+
335
  if state.get('qaFeedback'):
336
+ context_parts.append(f"\n=== QA FEEDBACK (MUST FIX) ===\n{state.get('qaFeedback')}")
337
+ context_parts.append(f"\n=== PREVIOUS PLAN ===\n{json.dumps(state.get('pmPlan', {}).get('plan_steps', []), indent=2)}")
338
+
339
+ full_context = "\n".join(context_parts)
340
 
341
+ prompt = f"""Create DETAILED, EXECUTABLE plan.
342
+
343
+ {full_context}
344
 
345
+ Each step must be SPECIFIC and ACTIONABLE:
346
+ - State EXACTLY what will be created/analyzed
347
+ - Specify WHAT data/information will be used
348
+ - Define WHAT methods will be applied
349
+
350
+ JSON format:
351
  {{
352
+ "plan_steps": ["Specific step 1...", "Specific step 2..."],
353
  "experiment_needed": true/false,
354
+ "experiment_type": "notebook|script|excel|word|pdf|repo",
355
+ "experiment_goal": "Detailed artifact description",
356
+ "key_requirements": ["Critical requirements"]
357
+ }}
358
+
359
+ Be specific about using uploaded files, implementing algorithms, creating schemas."""
360
 
361
  response = llm.invoke(prompt)
362
  plan = parse_json_from_llm(getattr(response, "content", "") or "")
 
364
  if not plan:
365
  detection = detect_requested_output_types(state.get('userInput', ''))
366
  plan = {
367
+ "plan_steps": ["Analyze request", "Process information", "Create deliverable", "Review"],
368
  "experiment_needed": detection.get('requires_artifact', False),
369
+ "experiment_type": detection.get('artifact_type', 'word'),
370
+ "experiment_goal": state.get('coreObjectivePrompt', state.get('userInput', ''))
371
  }
372
 
373
+ exp_type = normalize_experiment_type(plan.get('experiment_type'), plan.get('experiment_goal',''))
374
+ plan['experiment_type'] = exp_type
375
 
376
+ if plan.get('experiment_needed') and not plan.get('experiment_goal'):
377
+ plan['experiment_goal'] = state.get('userInput','')
378
+
379
+ return {"pmPlan": plan, "execution_path": path, "rework_cycles": current_cycles, "status_update": f"Plan created ({len(plan.get('plan_steps', []))} steps)"}
380
+
381
+ def _extract_code_blocks(text: str, lang_hint: Optional[str]=None) -> List[str]:
382
+ if lang_hint and "python" in (lang_hint or "").lower():
383
+ blocks = re.findall(r"```python\s*(.*?)\s*```", text, re.DOTALL)
384
+ if blocks:
385
+ return blocks
386
+ return re.findall(r"```(?:\w+)?\s*(.*?)\s*```", text, re.DOTALL)
387
 
388
  def run_experimenter_agent(state: AgentState):
389
  log.info("--- EXPERIMENTER ---")
 
391
  pm = state.get('pmPlan', {}) or {}
392
 
393
  if not pm.get('experiment_needed'):
394
+ return {"experimentCode": None, "experimentResults": None, "execution_path": path, "status_update": "No experiment needed"}
395
 
396
  exp_type = normalize_experiment_type(pm.get('experiment_type'), pm.get('experiment_goal',''))
397
+ goal = pm.get('experiment_goal', 'No goal')
398
+
399
+ # BUILD RICH CONTEXT
400
+ context_parts = [
401
+ f"=== USER REQUEST ===\n{state.get('userInput', '')}",
402
+ f"\n=== OBJECTIVE ===\n{state.get('coreObjectivePrompt', '')}",
403
+ f"\n=== PLAN ===\n{json.dumps(pm.get('plan_steps', []), indent=2)}",
404
+ f"\n=== REQUIREMENTS ===\n{json.dumps(pm.get('key_requirements', []), indent=2)}",
405
+ ]
406
+
407
+ if state.get('retrievedMemory'):
408
+ context_parts.append(f"\n=== CONTEXT ===\n{state.get('retrievedMemory', '')}")
409
 
410
+ if state.get('qaFeedback'):
411
+ context_parts.append(f"\n=== FEEDBACK TO ADDRESS ===\n{state.get('qaFeedback', '')}")
412
+
413
+ full_context = "\n".join(context_parts)
414
+
415
+ # REPO REQUIRES SPECIAL HANDLING
416
  if exp_type == 'repo':
417
+ repo_prompt = f"""Create COMPLETE, PRODUCTION-READY application repository.
418
+
419
+ {full_context}
420
+
421
+ GOAL: {goal}
422
+
423
+ CRITICAL REQUIREMENTS:
424
+
425
+ 1. ACTUAL WORKING CODE - Not templates, not documentation, not examples. REAL production code.
426
+
427
+ 2. FILE STRUCTURE - Indicate each file clearly:
428
+ ### path/to/file.py
429
+ ```python
430
+ [Complete working code]
431
+ MUST INCLUDE:
432
+
433
+ Complete API clients with error handling, retries, rate limiting
434
+
435
+ Database schema with CREATE TABLE statements
436
+
437
+ Data processing with real transformation logic
438
+
439
+ Config management (.env handling)
440
+
441
+ requirements.txt with ALL dependencies
442
+
443
+ main.py entry point
444
+
445
+ Comprehensive README
446
+
447
+ CODE QUALITY:
448
+
449
+ Environment variables for secrets
450
+
451
+ Error handling and logging
452
+
453
+ Docstrings and comments
454
+
455
+ Real business logic based on request
456
+
457
+ RUNNABLE out of the box
458
+
459
+ SPECIFIC TO REQUEST:
460
+
461
+ Use EXACT APIs mentioned (e.g., CricAPI, SportsRadar)
462
+
463
+ Implement SPECIFIC algorithms (e.g., batting avg, strike rate)
464
+
465
+ Create EXACT database tables needed
466
+
467
+ Process SPECIFIC data formats
468
+
469
+ NO placeholders like "# TODO"
470
+ NO dummy data - implement REAL logic
471
+ NO documentation-style code - PRODUCTION code only
472
+
473
+ Format each file:
474
+
475
+ path/to/file.py
476
+ # Complete code here
477
+ Generate complete repository:"""
478
+
479
+ response = llm.invoke(repo_prompt)
480
+ llm_text = getattr(response, "content", "") or ""
481
+
482
+ # Parse files from response
483
+ repo_files = {}
484
+
485
+ # Extract with ### headers
486
+ file_pattern = r"###\s+([\w\/_\-\.]+)\s*\n```(?:\w+)?\s*\n(.*?)\n```"
487
+ matches = re.finditer(file_pattern, llm_text, re.DOTALL)
488
+
489
+ for match in matches:
490
+ filepath = match.group(1).strip()
491
+ content = match.group(2).strip()
492
+ repo_files[filepath] = content
493
+
494
+ # Fallback: extract code blocks
495
+ if not repo_files:
496
+ code_blocks = re.findall(r"```(?:python|sql)?\s*\n(.*?)\n```", llm_text, re.DOTALL)
497
+ if code_blocks:
498
+ for i, block in enumerate(code_blocks):
499
+ if len(block) > 50: # Skip tiny blocks
500
+ repo_files[f"module_{i}.py"] = block
501
+
502
+ # Add README if missing
503
+ if not any('README' in f.upper() for f in repo_files):
504
+ repo_files["README.md"] = f"""# Generated Application
505
+ Overview
506
+ {goal}
507
+
508
+ Files
509
+ {chr(10).join(f'- {f}' for f in sorted(repo_files.keys()))}
510
+
511
+ Setup
512
+ pip install -r requirements.txt
513
+
514
+ Copy .env.example to .env and configure
515
+
516
+ Run: python main.py
517
+ """
518
+
519
+ # Add requirements.txt
520
+ if "requirements.txt" not in repo_files:
521
+ all_code = " ".join(repo_files.values()).lower()
522
+ deps = []
523
+ if 'requests' in all_code: deps.append('requests')
524
+ if 'pandas' in all_code: deps.append('pandas')
525
+ if 'numpy' in all_code: deps.append('numpy')
526
+ if 'sqlalchemy' in all_code: deps.append('sqlalchemy')
527
+ if 'postgresql' in all_code or 'psycopg2' in all_code: deps.append('psycopg2-binary')
528
+ if 'flask' in all_code: deps.append('flask')
529
+ if 'fastapi' in all_code:
530
+ deps.append('fastapi')
531
+ deps.append('uvicorn')
532
+ if 'dotenv' in all_code: deps.append('python-dotenv')
533
+
534
+ repo_files["requirements.txt"] = "\n".join(deps) if deps else "# Dependencies"
535
+
536
+ # Add .env.example
537
+ if ".env.example" not in repo_files:
538
+ repo_files[".env.example"] = """# Configuration
539
+ API_KEY=your_key_here
540
+ DATABASE_URL=postgresql://user:pass@localhost/db
541
+ DEBUG=False
542
+ """
543
+
544
+ # Add main.py if missing
545
+ if not any('main.py' in f for f in repo_files):
546
+ repo_files["main.py"] = """#!/usr/bin/env python3
547
+ import os
548
+ from dotenv import load_dotenv
549
+
550
+ load_dotenv()
551
+
552
+ def main():
553
+ print("Application starting...")
554
+ # Add your logic here
555
+ pass
556
+
557
+ if name == "main":
558
+ main()
559
+ """
560
+
561
+ # Build zip
562
+ zip_path = build_repo_zip(repo_files, repo_name="generated_app", out_dir=OUT_DIR)
563
+
564
+ results = {
565
+ "success": True,
566
+ "paths": {"repo_zip": sanitize_path(zip_path)},
567
+ "files_created": len(repo_files),
568
+ "context_used": len(full_context)
569
+ }
570
+
571
+ return {
572
+ "experimentCode": None,
573
+ "experimentResults": results,
574
+ "execution_path": path,
575
+ "status_update": f"Repository created ({len(repo_files)} files)"
576
+ }
577
+
578
+ # OTHER ARTIFACT TYPES
579
+ enhanced_prompt = f"""Create HIGH-QUALITY {exp_type} artifact.
580
+ {full_context}
581
+
582
+ GOAL: {goal}
583
+
584
+ REQUIREMENTS:
585
+
586
+ Use ALL specific details from request
587
+
588
+ PRODUCTION-READY, COMPLETE content (NO placeholders)
589
+
590
+ ACTUAL data, REALISTIC examples, WORKING code
591
+
592
+ For notebooks: markdown + executable code + visualizations
593
+
594
+ For scripts: error handling + docs + real logic
595
+
596
+ For documents: substantive detailed content
597
+
598
+ Generate complete content for '{exp_type}' with proper code fences."""
599
+
600
+ response = llm.invoke(enhanced_prompt)
601
+ llm_text = getattr(response, "content", "") or ""
602
+ results = {"success": False, "paths": {}, "stderr": "", "stdout": "", "context_used": len(full_context)}
603
+
604
+ try:
605
+ if exp_type == 'notebook':
606
+ nb_path = write_notebook_from_text(llm_text, out_dir=OUT_DIR)
607
+ results.update({"success": True, "paths": {"notebook": sanitize_path(nb_path)}})
608
+ return {"experimentCode": None, "experimentResults": results, "execution_path": path, "status_update": "Notebook created"}
609
 
610
+ elif exp_type == 'excel':
611
+ excel_path = write_excel_from_tables(llm_text, out_dir=OUT_DIR)
612
+ results.update({"success": True, "paths": {"excel": sanitize_path(excel_path)}})
613
+ return {"experimentCode": None, "experimentResults": results, "execution_path": path, "status_update": "Excel created"}
614
 
615
+ elif exp_type == 'word':
616
+ docx_path = write_docx_from_text(llm_text, out_dir=OUT_DIR)
617
+ results.update({"success": True, "paths": {"docx": sanitize_path(docx_path)}})
618
+ return {"experimentCode": None, "experimentResults": results, "execution_path": path, "status_update": "DOCX created"}
 
 
 
 
 
 
 
 
 
 
 
 
 
619
 
620
+ elif exp_type == 'pdf':
621
+ pdf_path = write_pdf_from_text(llm_text, out_dir=OUT_DIR)
622
+ results.update({"success": True, "paths": {"pdf": sanitize_path(pdf_path)}})
623
+ return {"experimentCode": None, "experimentResults": results, "execution_path": path, "status_update": "PDF created"}
624
 
625
+ elif exp_type == 'script':
626
+ lang_hint = pm.get('experiment_language') or "python"
627
+ code_blocks = _extract_code_blocks(llm_text, lang_hint)
628
+ code_text = "\n\n# === BLOCK ===\n\n".join(code_blocks) if code_blocks else llm_text
 
 
629
 
630
+ script_path = write_script(code_text, language_hint=lang_hint, out_dir=OUT_DIR)
631
+ exec_results = {}
632
+
633
+ if script_path.endswith(".py"):
634
+ try:
635
+ exec_results = execute_python_code(code_text)
636
+ except Exception as e:
637
+ exec_results = {"stdout":"","stderr":str(e),"success":False}
638
+
639
+ results.update({
640
+ "success": True,
641
+ "paths": {"script": sanitize_path(script_path)},
642
+ "stdout": exec_results.get("stdout",""),
643
+ "stderr": exec_results.get("stderr","")
644
+ })
645
+ return {"experimentCode": code_text, "experimentResults": results, "execution_path": path, "status_update": "Script created"}
646
 
647
+ else:
648
+ fallback = write_docx_from_text(llm_text, out_dir=OUT_DIR)
649
+ results.update({"success": True, "paths": {"docx": sanitize_path(fallback)}})
650
+ return {"experimentCode": None, "experimentResults": results, "execution_path": path, "status_update": "Document created"}
651
+
652
+ except Exception as e:
653
+ log.error(f"Experimenter failed: {e}")
654
+ results.update({"success": False, "stderr": str(e)})
655
+ return {"experimentCode": None, "experimentResults": results, "execution_path": path, "status_update": "Error"}
656
  def run_synthesis_agent(state: AgentState):
657
  log.info("--- SYNTHESIS ---")
658
+ _state = state or {}
659
+ path = ensure_list(_state, 'execution_path') + ["Synthesis"]
660
+
661
+ exp_results = _state.get('experimentResults')
662
+ pm_plan = _state.get('pmPlan', {}) or {}
663
+
664
+ synthesis_context = [
665
+ f"=== USER REQUEST ===\n{_state.get('userInput', '')}",
666
+ f"\n=== OBJECTIVE ===\n{_state.get('coreObjectivePrompt', '')}",
667
+ f"\n=== PLAN ===\n{json.dumps(pm_plan.get('plan_steps', []), indent=2)}",
668
+ ]
669
+
670
+ artifact_details = []
671
+ artifact_message = ""
672
+
673
+ if exp_results and isinstance(exp_results, dict):
674
+ paths = exp_results.get("paths") or {}
675
 
676
+ if paths:
677
+ artifact_lines = []
678
+ for artifact_type, artifact_path in paths.items():
679
+ artifact_lines.append(f"- **{artifact_type.title()}**: `{os.path.basename(artifact_path)}`")
680
+ artifact_details.append(f"{artifact_type}: {artifact_path}")
681
+
682
+ artifact_message = "\n\n**Artifacts Generated:**\n" + "\n".join(artifact_lines)
683
+ synthesis_context.append(f"\n=== ARTIFACTS ===\n" + "\n".join(artifact_details))
684
 
685
+ if exp_results.get('stdout'):
686
+ synthesis_context.append(f"\n=== OUTPUT ===\n{exp_results.get('stdout', '')}")
 
687
 
688
+ if exp_results.get('stderr'):
689
+ synthesis_context.append(f"\n=== ERRORS ===\n{exp_results.get('stderr', '')}")
690
 
691
+ full_context = "\n".join(synthesis_context)
692
+
693
+ synthesis_prompt = f"""Create FINAL RESPONSE after executing user's request.
694
+ {full_context}
695
+
696
+ Create comprehensive response that:
697
+
698
+ Directly addresses original request
699
+
700
+ Explains what was accomplished and HOW
701
+
702
+ References specific artifacts and explains PURPOSE
703
+
704
+ Provides context on how to USE deliverables
705
+
706
+ Highlights KEY INSIGHTS
707
+
708
+ Suggests NEXT STEPS if relevant
709
+
710
+ Be SPECIFIC about what was created."""
711
+
712
+ response = llm.invoke(synthesis_prompt)
713
  final_text = getattr(response, "content", "") or ""
714
+
715
+ if artifact_message:
716
+ final_text = final_text + "\n\n---\n" + artifact_message
717
+
718
+ return {"draftResponse": final_text, "execution_path": path, "status_update": "Response synthesized"}
 
 
 
 
 
 
719
 
720
  def run_qa_agent(state: AgentState):
721
  log.info("--- QA ---")
722
  path = ensure_list(state, 'execution_path') + ["QA"]
723
+
724
+ qa_context = [
725
+ f"=== REQUEST ===\n{state.get('userInput', '')}",
726
+ f"\n=== OBJECTIVE ===\n{state.get('coreObjectivePrompt', '')}",
727
+ f"\n=== DRAFT ===\n{state.get('draftResponse', '')}",
728
+ ]
729
+
730
+ if state.get('experimentResults'):
731
+ qa_context.append(f"\n=== ARTIFACTS ===\n{json.dumps(state.get('experimentResults', {}).get('paths', {}), indent=2)}")
732
+
733
+ # MODIFIED PROMPT: Encourage convergence
734
+ prompt = f"""You are a QA reviewer. Review the draft response against the user's objective.
735
+ {chr(10).join(qa_context)}
736
+
737
+ Review Instructions:
738
+
739
+ Does the draft and its artifacts COMPLETELY satisfy ALL parts of the user's request?
740
+
741
+ Is the quality of the work high?
742
+
743
+ If this is a re-submission (rework cycle > 1), has the previous feedback been successfully addressed?
744
+
745
+ Response Format:
746
+
747
+ If the work is complete and high-quality, respond ONLY with the word 'APPROVED'.
748
+
749
+ Otherwise, provide SPECIFIC, ACTIONABLE, and NOVEL feedback on what must be changed. Do not repeat previous feedback if it has already been actioned.
750
+ """
751
+
752
  response = llm.invoke(prompt)
753
  content = getattr(response, "content", "") or ""
754
+
755
  if "APPROVED" in content.upper():
756
  return {"approved": True, "qaFeedback": None, "execution_path": path, "status_update": "Approved"}
757
+ else:
758
+ # Sanitize feedback to ensure it's a useful string
759
+ feedback = content.replace("APPROVED", "").strip()
760
+ if not feedback:
761
+ feedback = "General quality improvements required."
762
+ return {"approved": False, "qaFeedback": feedback, "execution_path": path, "status_update": "Needs improvement"}
763
 
764
  def run_archivist_agent(state: AgentState):
765
  log.info("--- ARCHIVIST ---")
766
  path = ensure_list(state, 'execution_path') + ["Archivist"]
767
+
768
+ summary_prompt = f"Summarize for memory.\n\nObjective: {state.get('coreObjectivePrompt')}\n\nResponse: {state.get('draftResponse')}\n\nSummary:"
769
+ response = llm.invoke(summary_prompt)
770
+ memory_manager.add_to_memory(getattr(response,"content",""), {"objective": state.get('coreObjectivePrompt')})
771
+
772
+ return {"execution_path": path, "status_update": "Saved to memory"}
773
 
774
  def run_disclaimer_agent(state: AgentState):
775
  log.warning("--- DISCLAIMER ---")
776
  path = ensure_list(state, 'execution_path') + ["Disclaimer"]
777
+
778
+ reason = "Budget limit reached." if state.get('budget_exceeded') else "Rework limit reached."
779
+ disclaimer = f"**DISCLAIMER: {reason} Draft may be incomplete.**\n\n---\n\n"
780
+ final_response = disclaimer + state.get('draftResponse', "No response")
781
+
782
+ return {"draftResponse": final_response, "execution_path": path, "status_update": reason}
783
 
784
  def should_continue(state: AgentState):
785
+ # NEW: Check for budget excess first
786
+ if state.get("budget_exceeded"):
787
+ return "disclaimer_agent"
788
  if state.get("approved"):
789
  return "archivist_agent"
790
  if ensure_int(state, "rework_cycles", 0) > ensure_int(state, "max_loops", 0):
 
795
  pm = state.get('pmPlan', {}) or {}
796
  return "experimenter_agent" if pm.get('experiment_needed') else "synthesis_agent"
797
 
798
+ #--- Build graphs ---
 
 
 
799
  triage_workflow = StateGraph(AgentState)
800
  triage_workflow.add_node("triage", run_triage_agent)
801
  triage_workflow.set_entry_point("triage")
 
828
 
829
  main_workflow.add_conditional_edges("pm_agent", should_run_experiment)
830
  main_workflow.add_conditional_edges("qa_agent", should_continue, {
831
+ "archivist_agent": "archivist_agent",
832
+ "pm_agent": "pm_agent",
833
+ "disclaimer_agent": "disclaimer_agent"
834
  })
835
 
836
+ main_app = main_workflow.compile()
837
+
838
+