Spaces:
Paused
Paused
Commit ·
906d397
1
Parent(s): a186460
Upload autonomous lab application
Browse files- SPOC_V1/.gitattributes +35 -0
- SPOC_V1/README.md +12 -0
- __pycache__/code_executor.cpython-311.pyc +0 -0
- __pycache__/graph.cpython-311.pyc +0 -0
- __pycache__/graph.cpython-39.pyc +0 -0
- __pycache__/logging_config.cpython-311.pyc +0 -0
- __pycache__/memory_manager.cpython-311.pyc +0 -0
- __pycache__/memory_manager.cpython-39.pyc +0 -0
- __pycache__/utils.cpython-311.pyc +0 -0
- app.py +98 -0
- app_gradio.py +227 -0
- code_executor.py +44 -0
- graph.py +275 -0
- logging_config.py +35 -0
- logs/ai_lab_run.log +0 -0
- memory/faiss.faiss +0 -0
- memory/faiss.pkl +3 -0
- memory_manager.py +55 -0
- outputs/last_run_flow.md +10 -0
- requirements.txt +15 -0
- static/script.js +152 -0
- static/style.css +26 -0
- templates/index.html +33 -0
- test_agent.py +35 -0
- utils.py +13 -0
SPOC_V1/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
SPOC_V1/README.md
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: SPOC V1
|
| 3 |
+
emoji: 📉
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: indigo
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 5.47.2
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
__pycache__/code_executor.cpython-311.pyc
ADDED
|
Binary file (2.2 kB). View file
|
|
|
__pycache__/graph.cpython-311.pyc
ADDED
|
Binary file (20.5 kB). View file
|
|
|
__pycache__/graph.cpython-39.pyc
ADDED
|
Binary file (6.13 kB). View file
|
|
|
__pycache__/logging_config.cpython-311.pyc
ADDED
|
Binary file (2.02 kB). View file
|
|
|
__pycache__/memory_manager.cpython-311.pyc
ADDED
|
Binary file (4.33 kB). View file
|
|
|
__pycache__/memory_manager.cpython-39.pyc
ADDED
|
Binary file (2.44 kB). View file
|
|
|
__pycache__/utils.cpython-311.pyc
ADDED
|
Binary file (990 Bytes). View file
|
|
|
app.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py (Updated with Triage Orchestration)
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import math
|
| 5 |
+
from flask import Flask, render_template, request, jsonify
|
| 6 |
+
from dotenv import load_dotenv
|
| 7 |
+
from graph import triage_app, planner_app, main_app # Import all three compiled apps
|
| 8 |
+
from utils import generate_mermaid_diagram
|
| 9 |
+
|
| 10 |
+
load_dotenv()
|
| 11 |
+
app = Flask(__name__)
|
| 12 |
+
|
| 13 |
+
# Create necessary directories on startup
|
| 14 |
+
os.makedirs("outputs", exist_ok=True)
|
| 15 |
+
os.makedirs("uploads", exist_ok=True)
|
| 16 |
+
os.makedirs("memory", exist_ok=True)
|
| 17 |
+
os.makedirs("logs", exist_ok=True)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@app.route('/')
|
| 21 |
+
def index():
|
| 22 |
+
return render_template('index.html')
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
@app.route('/estimate', methods=['POST'])
|
| 26 |
+
def estimate():
|
| 27 |
+
data = request.json
|
| 28 |
+
user_input = data.get('message', '').strip()
|
| 29 |
+
if not user_input:
|
| 30 |
+
return jsonify({"error": "Message cannot be empty."}), 400
|
| 31 |
+
|
| 32 |
+
# --- NEW TRIAGE STEP ---
|
| 33 |
+
# First, check if the input is a simple greeting
|
| 34 |
+
triage_inputs = {"userInput": user_input}
|
| 35 |
+
triage_result = triage_app.invoke(triage_inputs)
|
| 36 |
+
|
| 37 |
+
# If the triage agent provided a direct response, it's a greeting.
|
| 38 |
+
if triage_result.get("draftResponse"):
|
| 39 |
+
# We add a special key to let the frontend know to just display the message
|
| 40 |
+
return jsonify({"is_greeting": True, "response": triage_result["draftResponse"]})
|
| 41 |
+
|
| 42 |
+
# --- If not a greeting, proceed to the planner ---
|
| 43 |
+
planner_inputs = {"userInput": user_input}
|
| 44 |
+
try:
|
| 45 |
+
estimate_result = planner_app.invoke(planner_inputs)
|
| 46 |
+
estimate_result['pmPlan']['is_greeting'] = False
|
| 47 |
+
return jsonify(estimate_result.get('pmPlan', {}))
|
| 48 |
+
except Exception as e:
|
| 49 |
+
return jsonify({"error": "An unexpected error occurred during planning."}), 500
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
@app.route('/chat', methods=['POST'])
|
| 53 |
+
def chat():
|
| 54 |
+
data = request.json
|
| 55 |
+
user_input = data.get('message', '').strip()
|
| 56 |
+
|
| 57 |
+
try:
|
| 58 |
+
user_budget = float(data.get('user_budget', 0.0))
|
| 59 |
+
cost_per_loop = float(data.get('cost_per_loop', 0.05))
|
| 60 |
+
except (ValueError, TypeError):
|
| 61 |
+
return jsonify({"error": "Invalid budget or cost format."}), 400
|
| 62 |
+
|
| 63 |
+
if not user_input:
|
| 64 |
+
return jsonify({"error": "Message cannot be empty."}), 400
|
| 65 |
+
|
| 66 |
+
if cost_per_loop > 0:
|
| 67 |
+
total_runs_affordable = max(1, math.floor(user_budget / cost_per_loop))
|
| 68 |
+
max_loops_calibrated = total_runs_affordable - 1
|
| 69 |
+
else:
|
| 70 |
+
max_loops_calibrated = 0
|
| 71 |
+
|
| 72 |
+
initial_state = {
|
| 73 |
+
"userInput": user_input, "chatHistory": [], "coreObjectivePrompt": "",
|
| 74 |
+
"retrievedMemory": "", "pmPlan": {}, "experimentCode": None,
|
| 75 |
+
"experimentResults": None, "draftResponse": "", "qaFeedback": None,
|
| 76 |
+
"approved": False, "execution_path": [], "rework_cycles": 0,
|
| 77 |
+
"max_loops": max_loops_calibrated
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
try:
|
| 81 |
+
final_state = main_app.invoke(initial_state)
|
| 82 |
+
except Exception as e:
|
| 83 |
+
return jsonify({"response": "An unexpected error occurred during execution. Please check the logs."}), 500
|
| 84 |
+
|
| 85 |
+
run_path = final_state.get('execution_path', [])
|
| 86 |
+
if run_path:
|
| 87 |
+
mermaid_syntax = generate_mermaid_diagram(run_path)
|
| 88 |
+
with open("outputs/last_run_flow.md", "w") as f:
|
| 89 |
+
f.write("# Last Run Execution Flow\n\n")
|
| 90 |
+
f.write("```mermaid\n")
|
| 91 |
+
f.write(mermaid_syntax)
|
| 92 |
+
f.write("```\n")
|
| 93 |
+
|
| 94 |
+
response = final_state.get('draftResponse', "An error occurred, and no response was generated.")
|
| 95 |
+
return jsonify({"response": response})
|
| 96 |
+
|
| 97 |
+
if __name__ == '__main__':
|
| 98 |
+
app.run(debug=True, port=5001)
|
app_gradio.py
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app_gradio.py (Feature-Complete with Authentication)
|
| 2 |
+
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import math
|
| 5 |
+
import os
|
| 6 |
+
import json
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
|
| 10 |
+
# Import your existing agentic logic
|
| 11 |
+
from graph import triage_app, planner_app, main_app
|
| 12 |
+
from logging_config import get_logger
|
| 13 |
+
|
| 14 |
+
# --- Setup & Configuration ---
|
| 15 |
+
load_dotenv()
|
| 16 |
+
log = get_logger(__name__)
|
| 17 |
+
|
| 18 |
+
# Create necessary directories on startup
|
| 19 |
+
os.makedirs("outputs", exist_ok=True)
|
| 20 |
+
os.makedirs("uploads", exist_ok=True)
|
| 21 |
+
os.makedirs("conversations", exist_ok=True)
|
| 22 |
+
|
| 23 |
+
# --- FEATURE: Basic & Secure Authentication ---
|
| 24 |
+
# A simple dictionary of allowed users.
|
| 25 |
+
# In a real product, this would come from a secure database.
|
| 26 |
+
USERS = {
|
| 27 |
+
"tester1": "pass123",
|
| 28 |
+
"researcher": "lab456",
|
| 29 |
+
"admin": "admin789"
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
# --- State Management ---
|
| 33 |
+
def get_default_state():
|
| 34 |
+
"""Initializes a new session state."""
|
| 35 |
+
return {
|
| 36 |
+
"original_user_message": "",
|
| 37 |
+
"estimate": {},
|
| 38 |
+
"current_conversation_file": None,
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
# --- Backend I/O Functions ---
|
| 42 |
+
|
| 43 |
+
# --- FEATURE: Conversation Management ---
|
| 44 |
+
def save_conversation(history, state):
|
| 45 |
+
"""Saves the current chat history to a timestamped JSON file."""
|
| 46 |
+
if not history:
|
| 47 |
+
return "Nothing to save."
|
| 48 |
+
|
| 49 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 50 |
+
filename = f"conv_{timestamp}.json"
|
| 51 |
+
filepath = os.path.join("conversations", filename)
|
| 52 |
+
|
| 53 |
+
with open(filepath, 'w', encoding='utf-8') as f:
|
| 54 |
+
json.dump(history, f, indent=2)
|
| 55 |
+
|
| 56 |
+
state["current_conversation_file"] = filepath
|
| 57 |
+
log.info(f"Conversation saved to {filepath}")
|
| 58 |
+
# Return a confirmation message to a Gradio component
|
| 59 |
+
return f"Saved to {filename}", gr.update(choices=get_saved_conversations())
|
| 60 |
+
|
| 61 |
+
def load_conversation(filepath, state):
|
| 62 |
+
"""Loads a chat history from a JSON file."""
|
| 63 |
+
if not filepath:
|
| 64 |
+
return [], state
|
| 65 |
+
|
| 66 |
+
with open(filepath, 'r', encoding='utf-8') as f:
|
| 67 |
+
history = json.load(f)
|
| 68 |
+
|
| 69 |
+
state["current_conversation_file"] = filepath
|
| 70 |
+
log.info(f"Conversation loaded from {filepath}")
|
| 71 |
+
return history, state
|
| 72 |
+
|
| 73 |
+
def get_saved_conversations():
|
| 74 |
+
"""Returns a list of saved conversation file paths."""
|
| 75 |
+
return [os.path.join("conversations", f) for f in os.listdir("conversations") if f.endswith(".json")]
|
| 76 |
+
|
| 77 |
+
def clear_conversation():
|
| 78 |
+
"""Resets the chat to a new, empty state."""
|
| 79 |
+
return [], get_default_state()
|
| 80 |
+
|
| 81 |
+
# --- FEATURE: Artifact Management ---
|
| 82 |
+
def handle_file_upload(file, state):
|
| 83 |
+
"""Saves an uploaded file to the /uploads directory."""
|
| 84 |
+
if file is None:
|
| 85 |
+
return "No file uploaded."
|
| 86 |
+
|
| 87 |
+
# Gradio provides the file object with a `name` attribute (the original path)
|
| 88 |
+
basename = os.path.basename(file.name)
|
| 89 |
+
destination_path = os.path.join("uploads", basename)
|
| 90 |
+
|
| 91 |
+
# The file object is already on disk in a temporary location, so we move it
|
| 92 |
+
os.rename(file.name, destination_path)
|
| 93 |
+
|
| 94 |
+
log.info(f"File uploaded to {destination_path}")
|
| 95 |
+
return f"Uploaded: {basename}"
|
| 96 |
+
|
| 97 |
+
def update_artifact_list():
|
| 98 |
+
"""Scans the outputs directory and returns a list of files as Markdown links."""
|
| 99 |
+
output_files = os.listdir("outputs")
|
| 100 |
+
if not output_files:
|
| 101 |
+
return "No artifacts generated yet."
|
| 102 |
+
|
| 103 |
+
# Gradio can't serve files directly, so we use this workaround for local execution
|
| 104 |
+
# For a real web deployment, these would be proper download links.
|
| 105 |
+
markdown_list = "### Generated Artifacts:\n"
|
| 106 |
+
for f in output_files:
|
| 107 |
+
markdown_list += f"- `{f}` (in your 'outputs' folder)\n"
|
| 108 |
+
|
| 109 |
+
return markdown_list
|
| 110 |
+
|
| 111 |
+
# --- Core Logic Functions (Unchanged) ---
|
| 112 |
+
def start_estimation(message, history, state):
|
| 113 |
+
log.info(f"Starting estimation for: '{message}'")
|
| 114 |
+
state["original_user_message"] = message
|
| 115 |
+
history.append({"role": "user", "content": message})
|
| 116 |
+
yield history, state, gr.update(value="", interactive=False), gr.update(visible=False), gr.update()
|
| 117 |
+
|
| 118 |
+
triage_inputs = {"userInput": message}
|
| 119 |
+
triage_result = triage_app.invoke(triage_inputs)
|
| 120 |
+
|
| 121 |
+
if triage_result.get("draftResponse"):
|
| 122 |
+
history.append({"role": "assistant", "content": triage_result["draftResponse"]})
|
| 123 |
+
yield history, state, gr.update(interactive=True), gr.update(visible=False), gr.update()
|
| 124 |
+
return
|
| 125 |
+
|
| 126 |
+
planner_inputs = {"userInput": message}
|
| 127 |
+
estimate_result = planner_app.invoke(planner_inputs)
|
| 128 |
+
estimate = estimate_result.get('pmPlan', {})
|
| 129 |
+
state["estimate"] = estimate
|
| 130 |
+
|
| 131 |
+
if estimate.get("error"):
|
| 132 |
+
history.append({"role": "assistant", "content": f"Error during planning: {estimate['error']}"})
|
| 133 |
+
yield history, state, gr.update(interactive=True), gr.update(visible=False), gr.update()
|
| 134 |
+
else:
|
| 135 |
+
plan_text = "\n".join([f"- {step}" for step in estimate.get('plan', [])])
|
| 136 |
+
approval_text_md = (f"**Here is my plan:**\n{plan_text}\n\nThis may require up to **{estimate.get('max_loops_initial', 0) + 1} attempts** and could cost approximately **${estimate.get('estimated_cost_usd', 0.0)}**.")
|
| 137 |
+
yield history, state, gr.update(interactive=False), gr.update(visible=True, value=approval_text_md), gr.update(value=estimate.get('estimated_cost_usd', 0.10))
|
| 138 |
+
|
| 139 |
+
def execute_main_task(history, state, budget):
|
| 140 |
+
log.info(f"Executing main task with budget: ${budget}")
|
| 141 |
+
yield history, state, gr.update(visible=False), gr.update(value="", interactive=False)
|
| 142 |
+
|
| 143 |
+
history.append({"role": "assistant", "content": "Executing task within budget..."})
|
| 144 |
+
yield history, state, gr.update(visible=False), gr.update(interactive=False)
|
| 145 |
+
|
| 146 |
+
cost_per_loop = state["estimate"].get('cost_per_loop_usd', 0.05)
|
| 147 |
+
if cost_per_loop > 0:
|
| 148 |
+
total_runs_affordable = max(1, math.floor(float(budget) / cost_per_loop))
|
| 149 |
+
max_loops_calibrated = total_runs_affordable - 1
|
| 150 |
+
else:
|
| 151 |
+
max_loops_calibrated = 0
|
| 152 |
+
|
| 153 |
+
initial_state = { "userInput": state["original_user_message"], "chatHistory": [], "coreObjectivePrompt": "", "retrievedMemory": "", "pmPlan": {}, "experimentCode": None, "experimentResults": None, "draftResponse": "", "qaFeedback": None, "approved": False, "execution_path": [], "rework_cycles": 0, "max_loops": max_loops_calibrated }
|
| 154 |
+
|
| 155 |
+
final_state = main_app.invoke(initial_state)
|
| 156 |
+
final_response = final_state.get('draftResponse', "An error occurred.")
|
| 157 |
+
|
| 158 |
+
history[-1] = {"role": "assistant", "content": final_response}
|
| 159 |
+
|
| 160 |
+
yield history, state, gr.update(visible=False), gr.update(interactive=True)
|
| 161 |
+
|
| 162 |
+
def cancel_task(history, state):
|
| 163 |
+
history.append({"role": "assistant", "content": "Task cancelled."})
|
| 164 |
+
return history, state, gr.update(visible=False), gr.update(interactive=True)
|
| 165 |
+
|
| 166 |
+
# --- Gradio UI Definition ---
|
| 167 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="Autonomous AI Lab") as demo:
|
| 168 |
+
state = gr.State(value=get_default_state())
|
| 169 |
+
|
| 170 |
+
gr.Markdown("# Autonomous AI Lab")
|
| 171 |
+
|
| 172 |
+
with gr.Row():
|
| 173 |
+
with gr.Column(scale=1):
|
| 174 |
+
# --- FEATURE: Sidebar controls ---
|
| 175 |
+
gr.Markdown("### Controls")
|
| 176 |
+
clear_btn = gr.Button("🗑️ New Conversation")
|
| 177 |
+
|
| 178 |
+
gr.Markdown("### Saved Chats")
|
| 179 |
+
saved_chats_dropdown = gr.Dropdown(label="Load a past conversation", choices=get_saved_conversations(), interactive=True)
|
| 180 |
+
save_chat_btn = gr.Button("💾 Save Current Chat")
|
| 181 |
+
save_status_text = gr.Textbox(label="Save Status", interactive=False)
|
| 182 |
+
|
| 183 |
+
gr.Markdown("### Artifacts")
|
| 184 |
+
file_uploader = gr.File(label="Upload a file")
|
| 185 |
+
upload_status_text = gr.Textbox(label="Upload Status", interactive=False)
|
| 186 |
+
|
| 187 |
+
refresh_artifacts_btn = gr.Button("🔄 Refresh Artifacts List")
|
| 188 |
+
artifact_list_display = gr.Markdown("No artifacts generated yet.")
|
| 189 |
+
|
| 190 |
+
with gr.Column(scale=4):
|
| 191 |
+
chatbot = gr.Chatbot(label="Conversation", height=600, type="messages", avatar_images=(None, "https://i.imgur.com/b5OqI32.png"))
|
| 192 |
+
|
| 193 |
+
# --- FEATURE: Approval box is now part of the main layout ---
|
| 194 |
+
with gr.Group(visible=False) as approval_box:
|
| 195 |
+
approval_text = gr.Markdown()
|
| 196 |
+
with gr.Row():
|
| 197 |
+
budget_input = gr.Number(label="Set your maximum budget ($)", value=0.10, minimum=0.01, step=0.05)
|
| 198 |
+
with gr.Row():
|
| 199 |
+
proceed_btn = gr.Button("✅ Approve & Proceed", variant="primary")
|
| 200 |
+
cancel_btn = gr.Button("❌ Cancel")
|
| 201 |
+
|
| 202 |
+
with gr.Row():
|
| 203 |
+
msg_textbox = gr.Textbox(label="Your Message", placeholder="Ask a question or describe a task...", scale=7)
|
| 204 |
+
submit_btn = gr.Button("Send", variant="primary", scale=1)
|
| 205 |
+
|
| 206 |
+
# --- Event Handlers ---
|
| 207 |
+
# Main chat flow
|
| 208 |
+
msg_textbox.submit(fn=start_estimation, inputs=[msg_textbox, chatbot, state], outputs=[chatbot, state, msg_textbox, approval_box, budget_input])
|
| 209 |
+
submit_btn.click(fn=start_estimation, inputs=[msg_textbox, chatbot, state], outputs=[chatbot, state, msg_textbox, approval_box, budget_input])
|
| 210 |
+
proceed_btn.click(fn=execute_main_task, inputs=[chatbot, state, budget_input], outputs=[chatbot, state, approval_box, msg_textbox])
|
| 211 |
+
cancel_btn.click(fn=cancel_task, inputs=[chatbot, state], outputs=[chatbot, state, approval_box, msg_textbox])
|
| 212 |
+
|
| 213 |
+
# Sidebar controls
|
| 214 |
+
clear_btn.click(fn=clear_conversation, inputs=[], outputs=[chatbot, state])
|
| 215 |
+
save_chat_btn.click(fn=save_conversation, inputs=[chatbot, state], outputs=[save_status_text, saved_chats_dropdown])
|
| 216 |
+
saved_chats_dropdown.change(fn=load_conversation, inputs=[saved_chats_dropdown, state], outputs=[chatbot, state])
|
| 217 |
+
file_uploader.upload(fn=handle_file_upload, inputs=[file_uploader, state], outputs=[upload_status_text])
|
| 218 |
+
refresh_artifacts_btn.click(fn=update_artifact_list, inputs=[], outputs=[artifact_list_display])
|
| 219 |
+
|
| 220 |
+
if __name__ == "__main__":
|
| 221 |
+
# --- FEATURE: Add authentication to the launch() method ---
|
| 222 |
+
demo.launch(
|
| 223 |
+
share=True,
|
| 224 |
+
debug=True,
|
| 225 |
+
auth=[(user, pwd) for user, pwd in USERS.items()],
|
| 226 |
+
auth_message="Enter your credentials to access the Autonomous AI Lab."
|
| 227 |
+
)
|
code_executor.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import subprocess
|
| 2 |
+
import sys
|
| 3 |
+
import os
|
| 4 |
+
import tempfile
|
| 5 |
+
import shutil
|
| 6 |
+
|
| 7 |
+
def execute_python_code(code: str, timeout: int = 30) -> dict:
|
| 8 |
+
"""
|
| 9 |
+
Executes Python code in a temporary directory and returns the output.
|
| 10 |
+
"""
|
| 11 |
+
temp_dir = tempfile.mkdtemp()
|
| 12 |
+
script_path = os.path.join(temp_dir, "script.py")
|
| 13 |
+
|
| 14 |
+
with open(script_path, "w") as f:
|
| 15 |
+
f.write(code)
|
| 16 |
+
|
| 17 |
+
python_executable = sys.executable
|
| 18 |
+
|
| 19 |
+
try:
|
| 20 |
+
print(f"🧪 Executing code in a temporary environment...")
|
| 21 |
+
process = subprocess.run(
|
| 22 |
+
[python_executable, script_path],
|
| 23 |
+
capture_output=True,
|
| 24 |
+
text=True,
|
| 25 |
+
timeout=timeout,
|
| 26 |
+
cwd=temp_dir
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
stdout = process.stdout
|
| 30 |
+
stderr = process.stderr
|
| 31 |
+
success = process.returncode == 0
|
| 32 |
+
|
| 33 |
+
except subprocess.TimeoutExpired:
|
| 34 |
+
stdout = ""
|
| 35 |
+
stderr = f"Execution timed out after {timeout} seconds."
|
| 36 |
+
success = False
|
| 37 |
+
except Exception as e:
|
| 38 |
+
stdout = ""
|
| 39 |
+
stderr = f"An unexpected error occurred: {str(e)}"
|
| 40 |
+
success = False
|
| 41 |
+
finally:
|
| 42 |
+
shutil.rmtree(temp_dir)
|
| 43 |
+
|
| 44 |
+
return {"stdout": stdout, "stderr": stderr, "success": success}
|
graph.py
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# graph.py (Updated with Triage Agent)
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import re
|
| 5 |
+
import math
|
| 6 |
+
from typing import TypedDict, List, Dict, Optional
|
| 7 |
+
from langchain_openai import ChatOpenAI
|
| 8 |
+
from langgraph.graph import StateGraph, END, START
|
| 9 |
+
from memory_manager import memory_manager
|
| 10 |
+
from code_executor import execute_python_code
|
| 11 |
+
from logging_config import setup_logging, get_logger
|
| 12 |
+
|
| 13 |
+
# --- Setup Logging & Constants ---
|
| 14 |
+
setup_logging()
|
| 15 |
+
log = get_logger(__name__)
|
| 16 |
+
INITIAL_MAX_REWORK_CYCLES = 3
|
| 17 |
+
GPT4O_INPUT_COST_PER_1K_TOKENS = 0.005
|
| 18 |
+
GPT4O_OUTPUT_COST_PER_1K_TOKENS = 0.015
|
| 19 |
+
AVG_TOKENS_PER_CALL = 2.0
|
| 20 |
+
|
| 21 |
+
# --- Agent State Definition ---
|
| 22 |
+
class AgentState(TypedDict):
|
| 23 |
+
userInput: str
|
| 24 |
+
chatHistory: List[str]
|
| 25 |
+
coreObjectivePrompt: str
|
| 26 |
+
retrievedMemory: Optional[str]
|
| 27 |
+
pmPlan: Dict
|
| 28 |
+
experimentCode: Optional[str]
|
| 29 |
+
experimentResults: Optional[Dict]
|
| 30 |
+
draftResponse: str
|
| 31 |
+
qaFeedback: Optional[str]
|
| 32 |
+
approved: bool
|
| 33 |
+
execution_path: List[str]
|
| 34 |
+
rework_cycles: int
|
| 35 |
+
max_loops: int
|
| 36 |
+
|
| 37 |
+
# --- Helper for Robust JSON Parsing ---
|
| 38 |
+
def parse_json_from_llm(llm_output: str) -> Optional[dict]:
|
| 39 |
+
try:
|
| 40 |
+
match = re.search(r"```json\n({.*?})\n```", llm_output, re.DOTALL)
|
| 41 |
+
if match:
|
| 42 |
+
json_str = match.group(1)
|
| 43 |
+
else:
|
| 44 |
+
json_str = llm_output[llm_output.find('{'):llm_output.rfind('}')+1]
|
| 45 |
+
|
| 46 |
+
return json.loads(json_str)
|
| 47 |
+
except (json.JSONDecodeError, AttributeError) as e:
|
| 48 |
+
log.error(f"JSON parsing failed. Raw output: '{llm_output}'. Error: {e}")
|
| 49 |
+
return None
|
| 50 |
+
|
| 51 |
+
# --- LLM Initialization ---
|
| 52 |
+
llm = ChatOpenAI(model="gpt-4o", temperature=0.1, max_retries=3, request_timeout=60)
|
| 53 |
+
|
| 54 |
+
# --- NEW: Triage Agent ---
|
| 55 |
+
def run_triage_agent(state: AgentState):
|
| 56 |
+
log.info("--- triage ---")
|
| 57 |
+
prompt = f"Analyze the user input. Is it a simple conversational greeting (e.g., 'hello', 'hi', 'how are you?') that doesn't require a complex plan, or is it a task/question that needs to be decomposed and executed? Respond with 'greeting' or 'task'.\n\nUser Input: \"{state['userInput']}\""
|
| 58 |
+
response = llm.invoke(prompt)
|
| 59 |
+
|
| 60 |
+
if 'greeting' in response.content.lower():
|
| 61 |
+
log.info("Triage result: Simple Greeting. Bypassing main workflow.")
|
| 62 |
+
# Set a direct, simple response
|
| 63 |
+
return {
|
| 64 |
+
"draftResponse": "Hello! How can I help you today?",
|
| 65 |
+
"execution_path": ["Triage Agent"]
|
| 66 |
+
}
|
| 67 |
+
else:
|
| 68 |
+
log.info("Triage result: Complex Task. Proceeding to planner.")
|
| 69 |
+
# Let the main workflow handle it
|
| 70 |
+
return { "execution_path": ["Triage Agent"] }
|
| 71 |
+
|
| 72 |
+
def should_start_full_workflow(state: AgentState):
|
| 73 |
+
"""Conditional edge to decide if we run the full graph or end early."""
|
| 74 |
+
if state.get("draftResponse"):
|
| 75 |
+
# If the triage agent already set a response, we are done.
|
| 76 |
+
return "end_early"
|
| 77 |
+
else:
|
| 78 |
+
return "planner"
|
| 79 |
+
|
| 80 |
+
# --- Agent Node Functions (Planner, PM, etc. are unchanged) ---
|
| 81 |
+
def run_planner_agent(state: AgentState):
|
| 82 |
+
log.info("--- ✈️ Running Planner Agent for Pre-flight Check ---")
|
| 83 |
+
path = state.get('execution_path', []) + ["Planner Agent"]
|
| 84 |
+
prompt = f"Analyze the user's request and provide a high-level plan and cost estimate. 1. Decompose the request into high-level steps. 2. Count the likely number of specialist LLM calls needed for ONE successful loop. User Request: \"{state['userInput']}\". Provide the output ONLY as a valid JSON object within ```json code fences with keys: 'plan' (list of strings), 'estimated_llm_calls_per_loop' (integer)."
|
| 85 |
+
|
| 86 |
+
response = llm.invoke(prompt)
|
| 87 |
+
plan_data = parse_json_from_llm(response.content)
|
| 88 |
+
|
| 89 |
+
if not plan_data:
|
| 90 |
+
return {"pmPlan": {"error": "Failed to create a valid plan."}, "execution_path": path}
|
| 91 |
+
|
| 92 |
+
calls_per_loop = plan_data.get('estimated_llm_calls_per_loop', 3)
|
| 93 |
+
cost_per_loop = (calls_per_loop * AVG_TOKENS_PER_CALL) * ( (GPT4O_INPUT_COST_PER_1K_TOKENS + GPT4O_OUTPUT_COST_PER_1K_TOKENS) / 2 )
|
| 94 |
+
estimated_cost = cost_per_loop * (INITIAL_MAX_REWORK_CYCLES + 1)
|
| 95 |
+
|
| 96 |
+
plan_data['max_loops_initial'] = INITIAL_MAX_REWORK_CYCLES
|
| 97 |
+
plan_data['estimated_cost_usd'] = round(estimated_cost, 2)
|
| 98 |
+
plan_data['cost_per_loop_usd'] = max(0.01, round(cost_per_loop, 3))
|
| 99 |
+
|
| 100 |
+
log.info(f"Pre-flight Estimate: {plan_data}")
|
| 101 |
+
return {"pmPlan": plan_data, "execution_path": path}
|
| 102 |
+
|
| 103 |
+
# --- All other agent functions remain the same ---
|
| 104 |
+
# ... (run_pm_agent, run_memory_retrieval, etc.)
|
| 105 |
+
def run_memory_retrieval(state: AgentState):
|
| 106 |
+
log.info("--- 🧠 Accessing Long-Term Memory ---")
|
| 107 |
+
path = state.get('execution_path', []) + ["Memory Retriever"]
|
| 108 |
+
relevant_mems = memory_manager.retrieve_relevant_memories(state['userInput'])
|
| 109 |
+
if relevant_mems:
|
| 110 |
+
context = "\n".join([f"Memory: {mem.page_content}" for mem in relevant_mems])
|
| 111 |
+
log.info(f"Found {len(relevant_mems)} relevant memories.")
|
| 112 |
+
else:
|
| 113 |
+
context = "No relevant memories found."
|
| 114 |
+
log.info(context)
|
| 115 |
+
return {"retrievedMemory": context, "execution_path": path}
|
| 116 |
+
|
| 117 |
+
def run_intent_agent(state: AgentState):
|
| 118 |
+
log.info("--- 🎯 Running Intent Agent ---")
|
| 119 |
+
path = state.get('execution_path', []) + ["Intent Agent"]
|
| 120 |
+
prompt = f"As an expert prompt engineer, analyze the user's request, conversation history, and retrieved memories. Refine it into a clear, actionable 'core objective prompt'.\n\nRelevant Memory:\n{state.get('retrievedMemory')}\n\nUser Request: \"{state.get('userInput')}\"\n\nCore Objective:"
|
| 121 |
+
response = llm.invoke(prompt)
|
| 122 |
+
return {"coreObjectivePrompt": response.content, "execution_path": path}
|
| 123 |
+
|
| 124 |
+
def run_pm_agent(state: AgentState):
|
| 125 |
+
log.info("--- 👷 Running PM Agent ---")
|
| 126 |
+
current_cycles = state.get('rework_cycles', 0) + 1
|
| 127 |
+
log.info(f"Starting work cycle {current_cycles}/{state.get('max_loops', 0) + 1}")
|
| 128 |
+
|
| 129 |
+
path = state.get('execution_path', []) + ["PM Agent"]
|
| 130 |
+
feedback = f"QA Feedback (must be addressed): {state.get('qaFeedback')}" if state.get('qaFeedback') else ""
|
| 131 |
+
prompt = f"As the Project Manager, decompose the core objective into a plan. Identify if code execution is needed. If so, define the goal of the experiment.\n\nCore Objective: {state.get('coreObjectivePrompt')}\n\n{feedback}\n\nProvide your plan ONLY as a valid JSON object within ```json code fences with keys: 'plan_steps' (list of strings) and 'experiment_needed' (boolean). If 'experiment_needed' is true, also include 'experiment_goal' (string)."
|
| 132 |
+
|
| 133 |
+
response = llm.invoke(prompt)
|
| 134 |
+
plan = parse_json_from_llm(response.content)
|
| 135 |
+
|
| 136 |
+
if not plan:
|
| 137 |
+
log.error("PM Agent failed to produce a valid JSON plan.")
|
| 138 |
+
plan = {"plan_steps": ["Error: The Project Manager failed to create a valid plan."], "experiment_needed": False}
|
| 139 |
+
|
| 140 |
+
log.info(f"Generated Plan: Experiment Needed = {plan.get('experiment_needed', False)}")
|
| 141 |
+
return {"pmPlan": plan, "execution_path": path, "rework_cycles": current_cycles}
|
| 142 |
+
|
| 143 |
+
def run_experimenter_agent(state: AgentState):
|
| 144 |
+
log.info("--- 🔬 Running Experimenter Agent ---")
|
| 145 |
+
path = state.get('execution_path', []) + ["Experimenter Agent"]
|
| 146 |
+
if not state.get('pmPlan', {}).get('experiment_needed'):
|
| 147 |
+
return {"experimentCode": None, "experimentResults": None, "execution_path": path}
|
| 148 |
+
|
| 149 |
+
goal = state.get('pmPlan', {}).get('experiment_goal', 'No goal specified.')
|
| 150 |
+
prompt = f"Write a Python script to achieve this goal. Do not require user input. Print results to standard output.\n\nGoal: {goal}\n\nPython Code:\n```python\n# Your code here\n```"
|
| 151 |
+
response = llm.invoke(prompt)
|
| 152 |
+
code_match = re.search(r"```python\n(.*?)\n```", response.content, re.DOTALL)
|
| 153 |
+
if not code_match:
|
| 154 |
+
log.error("Experimenter failed to generate valid Python code within fences.")
|
| 155 |
+
return {"experimentCode": "# ERROR: No code generated", "experimentResults": {"success": False, "stderr": "No valid Python code block was generated."}, "execution_path": path}
|
| 156 |
+
|
| 157 |
+
code = code_match.group(1).strip()
|
| 158 |
+
results = execute_python_code(code)
|
| 159 |
+
return {"experimentCode": code, "experimentResults": results, "execution_path": path}
|
| 160 |
+
|
| 161 |
+
def run_synthesis_agent(state: AgentState):
|
| 162 |
+
log.info("--- ✍️ Running Synthesis Agent ---")
|
| 163 |
+
path = state.get('execution_path', []) + ["Synthesis Agent"]
|
| 164 |
+
exp_results = state.get('experimentResults')
|
| 165 |
+
results_summary = "No experiment was conducted."
|
| 166 |
+
if exp_results:
|
| 167 |
+
results_summary = f"An experiment was conducted. Output:\nSTDOUT:\n{exp_results.get('stdout', '')}\nSTDERR:\n{exp_results.get('stderr', '')}"
|
| 168 |
+
|
| 169 |
+
prompt = f"Synthesize all information into a final response.\n\nCore Objective: {state.get('coreObjectivePrompt')}\n\nPlan: {state.get('pmPlan', {}).get('plan_steps')}\n\n{results_summary}\n\nFinal Response:"
|
| 170 |
+
response = llm.invoke(prompt)
|
| 171 |
+
return {"draftResponse": response.content, "execution_path": path}
|
| 172 |
+
|
| 173 |
+
def run_qa_agent(state: AgentState):
|
| 174 |
+
log.info("--- ✅ Running QA Agent ---")
|
| 175 |
+
path = state.get('execution_path', []) + ["QA Agent"]
|
| 176 |
+
prompt = f"Review the draft response based on the core objective. Respond ONLY with 'APPROVED' if it is complete and accurate. Otherwise, provide concise feedback for rework.\n\nCore Objective: {state.get('coreObjectivePrompt')}\n\nDraft Response: {state.get('draftResponse')}"
|
| 177 |
+
response = llm.invoke(prompt)
|
| 178 |
+
if "APPROVED" in response.content.upper():
|
| 179 |
+
return {"approved": True, "qaFeedback": None, "execution_path": path}
|
| 180 |
+
else:
|
| 181 |
+
return {"approved": False, "qaFeedback": response.content or "No specific feedback provided.", "execution_path": path}
|
| 182 |
+
|
| 183 |
+
def run_archivist_agent(state: AgentState):
|
| 184 |
+
log.info("--- 💾 Running Archivist Agent ---")
|
| 185 |
+
path = state.get('execution_path', []) + ["Archivist Agent"]
|
| 186 |
+
summary_prompt = f"Create a concise summary of this successful task for long-term memory. Focus on key insights and results.\n\nCore Objective: {state.get('coreObjectivePrompt')}\n\nFinal Response: {state.get('draftResponse')}\n\nMemory Summary:"
|
| 187 |
+
|
| 188 |
+
# FIX: Use the correct variable name 'summary_prompt' instead of 'prompt'
|
| 189 |
+
response = llm.invoke(summary_prompt)
|
| 190 |
+
|
| 191 |
+
memory_manager.add_to_memory(response.content, {"objective": state.get('coreObjectivePrompt')})
|
| 192 |
+
return {"execution_path": path}
|
| 193 |
+
|
| 194 |
+
def run_disclaimer_agent(state: AgentState):
|
| 195 |
+
log.warning("--- ⚠️ Running Disclaimer Agent ---")
|
| 196 |
+
path = state.get('execution_path', []) + ["Disclaimer Agent"]
|
| 197 |
+
disclaimer = "**DISCLAIMER: The process was stopped after exhausting the user-defined budget. The following response is the best available draft but may be incomplete, unverified, or contain errors. Please review it carefully.**\n\n---\n\n"
|
| 198 |
+
final_response = disclaimer + state.get('draftResponse', "No response was generated before the budget was exhausted.")
|
| 199 |
+
return {"draftResponse": final_response, "execution_path": path}
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
# --- Conditional Edges & Graph Definition ---
|
| 203 |
+
# ... (should_continue and should_run_experiment are unchanged)
|
| 204 |
+
def should_continue(state: AgentState):
|
| 205 |
+
log.info("--- 🤔 Decision: Is the response QA approved? ---")
|
| 206 |
+
if state.get("approved"):
|
| 207 |
+
log.info("Routing to: Archivist (Success Path)")
|
| 208 |
+
return "archivist_agent"
|
| 209 |
+
|
| 210 |
+
if state.get("rework_cycles", 0) > state.get("max_loops", 0):
|
| 211 |
+
log.error(f"BUDGET LIMIT REACHED. Aborting task after {state.get('rework_cycles', 0)-1} rework cycles.")
|
| 212 |
+
return "disclaimer_agent"
|
| 213 |
+
else:
|
| 214 |
+
log.info("Routing to: PM Agent for rework")
|
| 215 |
+
return "pm_agent"
|
| 216 |
+
|
| 217 |
+
def should_run_experiment(state: AgentState):
|
| 218 |
+
decision = "experimenter_agent" if state.get('pmPlan', {}).get('experiment_needed') else "synthesis_agent"
|
| 219 |
+
return decision
|
| 220 |
+
|
| 221 |
+
# --- Build the Graph (Updated with Triage) ---
|
| 222 |
+
workflow = StateGraph(AgentState)
|
| 223 |
+
workflow.add_node("triage", run_triage_agent)
|
| 224 |
+
workflow.add_node("memory_retriever", run_memory_retrieval)
|
| 225 |
+
# ... add all other nodes
|
| 226 |
+
workflow.add_node("intent_agent", run_intent_agent)
|
| 227 |
+
workflow.add_node("pm_agent", run_pm_agent)
|
| 228 |
+
workflow.add_node("experimenter_agent", run_experimenter_agent)
|
| 229 |
+
workflow.add_node("synthesis_agent", run_synthesis_agent)
|
| 230 |
+
workflow.add_node("qa_agent", run_qa_agent)
|
| 231 |
+
workflow.add_node("archivist_agent", run_archivist_agent)
|
| 232 |
+
workflow.add_node("disclaimer_agent", run_disclaimer_agent)
|
| 233 |
+
|
| 234 |
+
# NEW Triage Logic
|
| 235 |
+
# We now have two separate graphs that are conditionally called by the app
|
| 236 |
+
# 1. A simple Triage graph
|
| 237 |
+
triage_workflow = StateGraph(AgentState)
|
| 238 |
+
triage_workflow.add_node("triage", run_triage_agent)
|
| 239 |
+
triage_workflow.set_entry_point("triage")
|
| 240 |
+
triage_app = triage_workflow.compile()
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
# 2. Planner-only graph
|
| 244 |
+
planner_workflow = StateGraph(AgentState)
|
| 245 |
+
planner_workflow.add_node("planner", run_planner_agent)
|
| 246 |
+
planner_workflow.set_entry_point("planner")
|
| 247 |
+
planner_app = planner_workflow.compile()
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
# 3. Full execution graph
|
| 251 |
+
main_workflow = StateGraph(AgentState)
|
| 252 |
+
main_workflow.add_node("memory_retriever", run_memory_retrieval)
|
| 253 |
+
main_workflow.add_node("intent_agent", run_intent_agent)
|
| 254 |
+
main_workflow.add_node("pm_agent", run_pm_agent)
|
| 255 |
+
main_workflow.add_node("experimenter_agent", run_experimenter_agent)
|
| 256 |
+
main_workflow.add_node("synthesis_agent", run_synthesis_agent)
|
| 257 |
+
main_workflow.add_node("qa_agent", run_qa_agent)
|
| 258 |
+
main_workflow.add_node("archivist_agent", run_archivist_agent)
|
| 259 |
+
main_workflow.add_node("disclaimer_agent", run_disclaimer_agent)
|
| 260 |
+
|
| 261 |
+
main_workflow.set_entry_point("memory_retriever")
|
| 262 |
+
main_workflow.add_edge("memory_retriever", "intent_agent")
|
| 263 |
+
main_workflow.add_edge("intent_agent", "pm_agent")
|
| 264 |
+
main_workflow.add_edge("experimenter_agent", "synthesis_agent")
|
| 265 |
+
main_workflow.add_edge("synthesis_agent", "qa_agent")
|
| 266 |
+
main_workflow.add_edge("archivist_agent", END)
|
| 267 |
+
main_workflow.add_edge("disclaimer_agent", END)
|
| 268 |
+
|
| 269 |
+
main_workflow.add_conditional_edges("pm_agent", should_run_experiment)
|
| 270 |
+
main_workflow.add_conditional_edges("qa_agent", should_continue, {
|
| 271 |
+
"archivist_agent": "archivist_agent",
|
| 272 |
+
"pm_agent": "pm_agent",
|
| 273 |
+
"disclaimer_agent": "disclaimer_agent"
|
| 274 |
+
})
|
| 275 |
+
main_app = main_workflow.compile()
|
logging_config.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# logging_config.py (Corrected with UTF-8 encoding)
|
| 2 |
+
|
| 3 |
+
import logging
|
| 4 |
+
import sys
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
def setup_logging():
|
| 8 |
+
"""Configures structured logging for the AI Lab."""
|
| 9 |
+
os.makedirs("logs", exist_ok=True)
|
| 10 |
+
|
| 11 |
+
logging.basicConfig(
|
| 12 |
+
level=logging.INFO,
|
| 13 |
+
format="%(asctime)s [%(levelname)s] [%(name)s]: %(message)s",
|
| 14 |
+
datefmt="%Y-%m-%d %H:%M:%S",
|
| 15 |
+
handlers=[
|
| 16 |
+
# Force UTF-8 encoding for the log file
|
| 17 |
+
logging.FileHandler("logs/ai_lab_run.log", mode='w', encoding='utf-8'),
|
| 18 |
+
# Force UTF-8 encoding for the console stream
|
| 19 |
+
logging.StreamHandler(sys.stdout)
|
| 20 |
+
]
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
# Forcing UTF-8 on the console handler is critical for Windows
|
| 24 |
+
logging.getLogger().handlers[-1].setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] [%(name)s]: %(message)s"))
|
| 25 |
+
logging.getLogger().handlers[-1].encoding = 'utf-8'
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
# Silence overly verbose libraries to keep the log clean
|
| 29 |
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
| 30 |
+
logging.getLogger("urllib3").setLevel(logging.WARNING)
|
| 31 |
+
logging.getLogger("sentence_transformers").setLevel(logging.WARNING)
|
| 32 |
+
|
| 33 |
+
def get_logger(name):
|
| 34 |
+
"""A helper to get a logger instance for a specific module."""
|
| 35 |
+
return logging.getLogger(name)
|
logs/ai_lab_run.log
ADDED
|
File without changes
|
memory/faiss.faiss
ADDED
|
Binary file (4.65 kB). View file
|
|
|
memory/faiss.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3bede98be9f381efd5413f20d96dbea5e53ca588b2811ead62079ea6a3880816
|
| 3 |
+
size 3104
|
memory_manager.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import shutil
|
| 3 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 4 |
+
from langchain_community.vectorstores import FAISS
|
| 5 |
+
from langchain.docstore.document import Document
|
| 6 |
+
|
| 7 |
+
# --- Configuration ---
|
| 8 |
+
MEMORY_DIR = "memory"
|
| 9 |
+
INDEX_NAME = "faiss"
|
| 10 |
+
MODEL_NAME = "all-MiniLM-L6-v2"
|
| 11 |
+
|
| 12 |
+
class MemoryManager:
|
| 13 |
+
def __init__(self):
|
| 14 |
+
self.embeddings = HuggingFaceEmbeddings(model_name=MODEL_NAME)
|
| 15 |
+
self.vector_store = self._load_or_create_vector_store()
|
| 16 |
+
|
| 17 |
+
def _load_or_create_vector_store(self):
|
| 18 |
+
"""Loads FAISS index or creates a new one, handling potential corruption."""
|
| 19 |
+
index_path = os.path.join(MEMORY_DIR, f"{INDEX_NAME}.faiss")
|
| 20 |
+
if os.path.exists(index_path):
|
| 21 |
+
try:
|
| 22 |
+
print("🧠 Loading existing memory from disk...")
|
| 23 |
+
return FAISS.load_local(
|
| 24 |
+
folder_path=MEMORY_DIR,
|
| 25 |
+
embeddings=self.embeddings,
|
| 26 |
+
index_name=INDEX_NAME,
|
| 27 |
+
allow_dangerous_deserialization=True
|
| 28 |
+
)
|
| 29 |
+
except Exception as e:
|
| 30 |
+
print(f"⚠️ Error loading memory index: {e}. Rebuilding index.")
|
| 31 |
+
shutil.rmtree(MEMORY_DIR)
|
| 32 |
+
os.makedirs(MEMORY_DIR, exist_ok=True)
|
| 33 |
+
return self._create_new_index()
|
| 34 |
+
else:
|
| 35 |
+
print("🧠 No existing memory found. Creating a new one.")
|
| 36 |
+
return self._create_new_index()
|
| 37 |
+
|
| 38 |
+
def _create_new_index(self):
|
| 39 |
+
"""Creates a fresh, empty FAISS index."""
|
| 40 |
+
dummy_doc = [Document(page_content="Initial memory entry.")]
|
| 41 |
+
vs = FAISS.from_documents(dummy_doc, self.embeddings)
|
| 42 |
+
vs.save_local(folder_path=MEMORY_DIR, index_name=INDEX_NAME)
|
| 43 |
+
return vs
|
| 44 |
+
|
| 45 |
+
def add_to_memory(self, text_to_add: str, metadata: dict):
|
| 46 |
+
print(f"📝 Adding new memory: {text_to_add[:100]}...")
|
| 47 |
+
doc = Document(page_content=text_to_add, metadata=metadata)
|
| 48 |
+
self.vector_store.add_documents([doc])
|
| 49 |
+
self.vector_store.save_local(folder_path=MEMORY_DIR, index_name=INDEX_NAME)
|
| 50 |
+
|
| 51 |
+
def retrieve_relevant_memories(self, query: str, k: int = 5) -> list[Document]:
|
| 52 |
+
print(f"🔍 Searching memory for: {query}")
|
| 53 |
+
return self.vector_store.similarity_search(query, k=k)
|
| 54 |
+
|
| 55 |
+
memory_manager = MemoryManager()
|
outputs/last_run_flow.md
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Last Run Execution Flow
|
| 2 |
+
|
| 3 |
+
```mermaid
|
| 4 |
+
graph TD;
|
| 5 |
+
Memory_Retriever --> Intent_Agent;
|
| 6 |
+
Intent_Agent --> PM_Agent;
|
| 7 |
+
PM_Agent --> Synthesis_Agent;
|
| 8 |
+
Synthesis_Agent --> QA_Agent;
|
| 9 |
+
QA_Agent --> Archivist_Agent;
|
| 10 |
+
```
|
requirements.txt
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Web Framework & Server
|
| 2 |
+
flask
|
| 3 |
+
|
| 4 |
+
# Core LangChain & Agent Framework
|
| 5 |
+
langchain
|
| 6 |
+
langgraph
|
| 7 |
+
langchain_openai
|
| 8 |
+
|
| 9 |
+
# Vector Store & Embeddings
|
| 10 |
+
faiss-cpu
|
| 11 |
+
langchain-huggingface
|
| 12 |
+
sentence-transformers
|
| 13 |
+
|
| 14 |
+
# Environment Variables
|
| 15 |
+
python-dotenv
|
static/script.js
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// static/script.js (Updated to handle greeting triage)
|
| 2 |
+
|
| 3 |
+
document.addEventListener('DOMContentLoaded', () => {
|
| 4 |
+
const sendBtn = document.getElementById('send-btn');
|
| 5 |
+
const messageInput = document.getElementById('message-input');
|
| 6 |
+
const chatWindow = document.getElementById('chat-window');
|
| 7 |
+
let originalUserMessage = '';
|
| 8 |
+
let currentEstimate = {};
|
| 9 |
+
|
| 10 |
+
const appendMessage = (sender, message, isHtml = false) => {
|
| 11 |
+
const senderClass = sender === 'You' ? 'user-message' : 'agent-message';
|
| 12 |
+
const messageDiv = document.createElement('div');
|
| 13 |
+
messageDiv.classList.add('message', senderClass);
|
| 14 |
+
const contentDiv = document.createElement('div');
|
| 15 |
+
const senderStrong = document.createElement('strong');
|
| 16 |
+
senderStrong.textContent = `${sender}:`;
|
| 17 |
+
contentDiv.appendChild(senderStrong);
|
| 18 |
+
const messageP = document.createElement('p');
|
| 19 |
+
if (isHtml) {
|
| 20 |
+
messageP.innerHTML = message;
|
| 21 |
+
} else {
|
| 22 |
+
messageP.textContent = message;
|
| 23 |
+
}
|
| 24 |
+
contentDiv.appendChild(messageP);
|
| 25 |
+
messageDiv.appendChild(contentDiv);
|
| 26 |
+
chatWindow.appendChild(messageDiv);
|
| 27 |
+
chatWindow.scrollTop = chatWindow.scrollHeight;
|
| 28 |
+
return messageDiv;
|
| 29 |
+
};
|
| 30 |
+
|
| 31 |
+
const showApprovalDialog = (estimate) => {
|
| 32 |
+
// ... (This function is unchanged)
|
| 33 |
+
currentEstimate = estimate;
|
| 34 |
+
const planHtml = `
|
| 35 |
+
<p>Here is my plan to address your request:</p>
|
| 36 |
+
<ul>${estimate.plan.map(step => `<li>${step}</li>`).join('')}</ul>
|
| 37 |
+
<p>I estimate this will cost ~<strong>$${estimate.estimated_cost_usd}</strong> (for ${estimate.max_loops_initial + 1} attempts).</p>
|
| 38 |
+
<div class="approval-form">
|
| 39 |
+
<label for="budget-input">Set your maximum budget ($):</label>
|
| 40 |
+
<input type="number" id="budget-input" step="0.05" min="0.05" value="${estimate.estimated_cost_usd}">
|
| 41 |
+
<div class="approval-buttons">
|
| 42 |
+
<button id="proceed-btn">✅ Proceed with Budget</button>
|
| 43 |
+
<button id="cancel-btn">❌ Cancel</button>
|
| 44 |
+
</div>
|
| 45 |
+
</div>
|
| 46 |
+
`;
|
| 47 |
+
const dialog = appendMessage('Agent', planHtml, true);
|
| 48 |
+
|
| 49 |
+
document.getElementById('proceed-btn').addEventListener('click', () => {
|
| 50 |
+
const userBudget = document.getElementById('budget-input').value;
|
| 51 |
+
dialog.remove();
|
| 52 |
+
appendMessage('Agent', `Budget of $${userBudget} approved. Starting the main process...`);
|
| 53 |
+
executeMainTask(userBudget);
|
| 54 |
+
});
|
| 55 |
+
|
| 56 |
+
document.getElementById('cancel-btn').addEventListener('click', () => {
|
| 57 |
+
dialog.remove();
|
| 58 |
+
appendMessage('Agent', 'Task cancelled.');
|
| 59 |
+
enableInput();
|
| 60 |
+
});
|
| 61 |
+
};
|
| 62 |
+
|
| 63 |
+
const disableInput = () => { /* ... unchanged ... */ };
|
| 64 |
+
const enableInput = () => { /* ... unchanged ... */ };
|
| 65 |
+
|
| 66 |
+
const startEstimation = async () => {
|
| 67 |
+
originalUserMessage = messageInput.value.trim();
|
| 68 |
+
if (!originalUserMessage) return;
|
| 69 |
+
|
| 70 |
+
appendMessage('You', originalUserMessage);
|
| 71 |
+
messageInput.value = '';
|
| 72 |
+
disableInput();
|
| 73 |
+
|
| 74 |
+
const thinkingMessage = appendMessage('Agent', 'Thinking...');
|
| 75 |
+
|
| 76 |
+
try {
|
| 77 |
+
const response = await fetch('/estimate', {
|
| 78 |
+
method: 'POST',
|
| 79 |
+
headers: { 'Content-Type': 'application/json' },
|
| 80 |
+
body: JSON.stringify({ message: originalUserMessage })
|
| 81 |
+
});
|
| 82 |
+
const result = await response.json();
|
| 83 |
+
|
| 84 |
+
thinkingMessage.remove();
|
| 85 |
+
|
| 86 |
+
if (result.error) {
|
| 87 |
+
appendMessage('Agent', `Error during planning: ${result.error}`);
|
| 88 |
+
enableInput();
|
| 89 |
+
} else if (result.is_greeting) {
|
| 90 |
+
// NEW: Handle the greeting case
|
| 91 |
+
appendMessage('Agent', result.response);
|
| 92 |
+
enableInput();
|
| 93 |
+
} else {
|
| 94 |
+
// It's a complex task, show the approval dialog
|
| 95 |
+
showApprovalDialog(result);
|
| 96 |
+
}
|
| 97 |
+
} catch (error) {
|
| 98 |
+
console.error('Error:', error);
|
| 99 |
+
thinkingMessage.remove();
|
| 100 |
+
appendMessage('Agent', 'Sorry, I encountered an error during estimation.');
|
| 101 |
+
enableInput();
|
| 102 |
+
}
|
| 103 |
+
};
|
| 104 |
+
|
| 105 |
+
const executeMainTask = async (userBudget) => { /* ... unchanged ... */ };
|
| 106 |
+
|
| 107 |
+
// ... All other functions and event listeners are unchanged ...
|
| 108 |
+
const disableInput = () => {
|
| 109 |
+
messageInput.disabled = true;
|
| 110 |
+
sendBtn.disabled = true;
|
| 111 |
+
};
|
| 112 |
+
|
| 113 |
+
const enableInput = () => {
|
| 114 |
+
messageInput.disabled = false;
|
| 115 |
+
sendBtn.disabled = false;
|
| 116 |
+
messageInput.focus();
|
| 117 |
+
};
|
| 118 |
+
|
| 119 |
+
const executeMainTask = async (userBudget) => {
|
| 120 |
+
disableInput();
|
| 121 |
+
const thinkingMessage = appendMessage('Agent', 'Executing task within budget...');
|
| 122 |
+
|
| 123 |
+
try {
|
| 124 |
+
const response = await fetch('/chat', {
|
| 125 |
+
method: 'POST',
|
| 126 |
+
headers: { 'Content-Type': 'application/json' },
|
| 127 |
+
body: JSON.stringify({
|
| 128 |
+
message: originalUserMessage,
|
| 129 |
+
user_budget: userBudget,
|
| 130 |
+
cost_per_loop: currentEstimate.cost_per_loop_usd
|
| 131 |
+
})
|
| 132 |
+
});
|
| 133 |
+
const data = await response.json();
|
| 134 |
+
thinkingMessage.remove();
|
| 135 |
+
appendMessage('Agent', data.response, true);
|
| 136 |
+
} catch (error) {
|
| 137 |
+
console.error('Error:', error);
|
| 138 |
+
thinkingMessage.remove();
|
| 139 |
+
appendMessage('Agent', 'Sorry, I encountered an error during execution.');
|
| 140 |
+
} finally {
|
| 141 |
+
enableInput();
|
| 142 |
+
}
|
| 143 |
+
};
|
| 144 |
+
|
| 145 |
+
sendBtn.addEventListener('click', startEstimation);
|
| 146 |
+
messageInput.addEventListener('keydown', (event) => {
|
| 147 |
+
if (event.key === 'Enter' && !event.shiftKey) {
|
| 148 |
+
event.preventDefault();
|
| 149 |
+
startEstimation();
|
| 150 |
+
}
|
| 151 |
+
});
|
| 152 |
+
});
|
static/style.css
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif; display: flex; height: 100vh; margin: 0; background-color: #f8f9fa; }
|
| 2 |
+
.container { display: flex; width: 100%; height: 100%; }
|
| 3 |
+
.sidebar { width: 250px; background-color: #e9ecef; padding: 15px; border-right: 1px solid #dee2e6; display: flex; flex-direction: column; }
|
| 4 |
+
.sidebar h3 { font-size: 1rem; color: #495057; margin-top: 0; }
|
| 5 |
+
.sidebar button { width: 100%; padding: 10px; border: none; border-radius: 5px; background-color: #007bff; color: white; cursor: pointer; margin-bottom: 15px; }
|
| 6 |
+
.sidebar hr { border: none; border-top: 1px solid #dee2e6; margin: 15px 0; }
|
| 7 |
+
.main-chat { flex-grow: 1; display: flex; flex-direction: column; background-color: #ffffff; }
|
| 8 |
+
#chat-window { flex-grow: 1; padding: 20px; overflow-y: auto; }
|
| 9 |
+
.chat-input { display: flex; padding: 10px; border-top: 1px solid #dee2e6; background-color: #f1f3f5; }
|
| 10 |
+
textarea { flex-grow: 1; padding: 10px; border-radius: 8px; border: 1px solid #ced4da; resize: none; font-size: 1rem; line-height: 1.5; }
|
| 11 |
+
textarea:focus { outline: none; border-color: #80bdff; box-shadow: 0 0 0 0.2rem rgba(0,123,255,.25); }
|
| 12 |
+
#send-btn { padding: 10px 15px; margin-left: 10px; border: none; background-color: #007bff; color: white; border-radius: 8px; cursor: pointer; font-weight: bold; }
|
| 13 |
+
#send-btn:disabled { background-color: #6c757d; cursor: not-allowed; }
|
| 14 |
+
.message { margin-bottom: 15px; max-width: 80%; }
|
| 15 |
+
.message strong { display: block; margin-bottom: 5px; color: #212529; }
|
| 16 |
+
.message p { margin: 0; padding: 10px 15px; border-radius: 12px; line-height: 1.6; }
|
| 17 |
+
.user-message { align-self: flex-end; margin-left: auto; }
|
| 18 |
+
.user-message p { background-color: #007bff; color: white; }
|
| 19 |
+
.agent-message { align-self: flex-start; }
|
| 20 |
+
.agent-message p { background-color: #e9ecef; color: #212529; }
|
| 21 |
+
.approval-form { background-color: #f8f9fa; border: 1px solid #dee2e6; border-radius: 8px; padding: 15px; margin-top: 10px; }
|
| 22 |
+
.approval-form label { display: block; margin-bottom: 5px; font-weight: bold; }
|
| 23 |
+
.approval-form input { width: 100px; padding: 5px; border-radius: 5px; border: 1px solid #ced4da; margin-bottom: 10px; }
|
| 24 |
+
.approval-buttons button { padding: 8px 12px; border: none; border-radius: 5px; cursor: pointer; margin-right: 10px; }
|
| 25 |
+
#proceed-btn { background-color: #28a745; color: white; }
|
| 26 |
+
#cancel-btn { background-color: #dc3545; color: white; }
|
templates/index.html
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<title>Autonomous AI Lab</title>
|
| 6 |
+
<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
|
| 7 |
+
</head>
|
| 8 |
+
<body>
|
| 9 |
+
<div class="container">
|
| 10 |
+
<div class="sidebar">
|
| 11 |
+
<h3>Conversations</h3>
|
| 12 |
+
<button id="new-chat-btn">+ New Chat</button>
|
| 13 |
+
<ul id="chat-list"></ul>
|
| 14 |
+
<hr>
|
| 15 |
+
<h3>Artifacts</h3>
|
| 16 |
+
<input type="file" id="file-upload">
|
| 17 |
+
<div id="file-list"></div>
|
| 18 |
+
</div>
|
| 19 |
+
<div class="main-chat">
|
| 20 |
+
<div id="chat-window">
|
| 21 |
+
<div class="message agent-message">
|
| 22 |
+
<div><strong>Agent:</strong><p>Welcome to the Autonomous AI Lab. How can I help you today?</p></div>
|
| 23 |
+
</div>
|
| 24 |
+
</div>
|
| 25 |
+
<div class="chat-input">
|
| 26 |
+
<textarea id="message-input" placeholder="Ask a question or describe a task..."></textarea>
|
| 27 |
+
<button id="send-btn">Send</button>
|
| 28 |
+
</div>
|
| 29 |
+
</div>
|
| 30 |
+
</div>
|
| 31 |
+
<script src="{{ url_for('static', filename='script.js') }}"></script>
|
| 32 |
+
</body>
|
| 33 |
+
</html>
|
test_agent.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
from graph import AgentState, run_pm_agent, run_synthesis_agent
|
| 4 |
+
|
| 5 |
+
# Load environment variables from .env file
|
| 6 |
+
load_dotenv()
|
| 7 |
+
|
| 8 |
+
print("---🔬 Interactive Agent Test Bed ---")
|
| 9 |
+
|
| 10 |
+
# 1. Create a mock state to test the PM Agent
|
| 11 |
+
mock_state_for_pm = AgentState(
|
| 12 |
+
userInput="How do I fine-tune a Llama-3 model on a custom dataset?",
|
| 13 |
+
coreObjectivePrompt="Provide a detailed, step-by-step guide for fine-tuning a Llama-3 model on a custom dataset, including code examples and best practices.",
|
| 14 |
+
retrievedMemory="Memory: Fine-tuning requires a powerful GPU and careful data preparation.",
|
| 15 |
+
qaFeedback=None,
|
| 16 |
+
execution_path=[]
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
print("\n--- Testing PM Agent ---")
|
| 20 |
+
pm_output = run_pm_agent(mock_state_for_pm)
|
| 21 |
+
pm_plan = pm_output.get('pmPlan', {})
|
| 22 |
+
print(f"PM Plan Generated: {pm_plan}")
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# 2. Use the output from the PM test to test the Synthesis Agent
|
| 26 |
+
if pm_plan:
|
| 27 |
+
mock_state_for_synthesis = AgentState(
|
| 28 |
+
coreObjectivePrompt=mock_state_for_pm['coreObjectivePrompt'],
|
| 29 |
+
pmPlan=pm_plan,
|
| 30 |
+
experimentResults=None # Mocking that no experiment was run
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
print("\n--- Testing Synthesis Agent ---")
|
| 34 |
+
synthesis_output = run_synthesis_agent(mock_state_for_synthesis)
|
| 35 |
+
print(f"Synthesized Draft (first 300 chars): {synthesis_output['draftResponse'][:300]}...")
|
utils.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def generate_mermaid_diagram(path: list) -> str:
|
| 2 |
+
"""Generates Mermaid.js syntax for the execution path."""
|
| 3 |
+
if not path:
|
| 4 |
+
return ""
|
| 5 |
+
|
| 6 |
+
diagram = "graph TD;\n"
|
| 7 |
+
# Replace spaces with underscores for valid Mermaid node names
|
| 8 |
+
path_nodes = [p.replace(' ', '_') for p in path]
|
| 9 |
+
|
| 10 |
+
for i in range(len(path_nodes) - 1):
|
| 11 |
+
diagram += f" {path_nodes[i]} --> {path_nodes[i+1]};\n"
|
| 12 |
+
|
| 13 |
+
return diagram
|