davidleocadio94
commited on
Commit
·
c8a4550
0
Parent(s):
feat: data analyzer agent with gradio interface
Browse files- Gradio chat interface with Data Analyzer Agent integration
- E2B Code Interpreter sandbox for secure code execution
- Image display for matplotlib visualizations
- Lazy sandbox initialization pattern
- .gitignore +28 -0
- app.py +149 -0
- requirements.txt +10 -0
- src/__init__.py +8 -0
- src/agent.py +94 -0
- src/tools.py +82 -0
.gitignore
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Development files
|
| 2 |
+
project_untracked.ipynb
|
| 3 |
+
e2b_course*/
|
| 4 |
+
TEST/
|
| 5 |
+
.claude/
|
| 6 |
+
|
| 7 |
+
# Markdown files (except docs.md)
|
| 8 |
+
*.md
|
| 9 |
+
!docs.md
|
| 10 |
+
|
| 11 |
+
# Environment and secrets
|
| 12 |
+
.env
|
| 13 |
+
|
| 14 |
+
# Student-generated files (created during notebook usage)
|
| 15 |
+
agent_files/
|
| 16 |
+
visualizations/
|
| 17 |
+
charts/
|
| 18 |
+
webapp/
|
| 19 |
+
games/
|
| 20 |
+
outputs/
|
| 21 |
+
|
| 22 |
+
# Python cache
|
| 23 |
+
__pycache__/
|
| 24 |
+
*.pyc
|
| 25 |
+
.ipynb_checkpoints/
|
| 26 |
+
|
| 27 |
+
# E2B cache
|
| 28 |
+
sbx.cache
|
app.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Gradio Chat Interface for Data Analyzer Agent.
|
| 2 |
+
|
| 3 |
+
Interactive chat UI for non-technical users to ask data analysis questions
|
| 4 |
+
and see visualizations generated by the agent.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import base64
|
| 9 |
+
import io
|
| 10 |
+
from PIL import Image
|
| 11 |
+
|
| 12 |
+
import gradio as gr
|
| 13 |
+
from openai import OpenAI
|
| 14 |
+
from e2b_code_interpreter import Sandbox
|
| 15 |
+
|
| 16 |
+
from src import coding_agent, execute_code_schema, tools
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
# Load environment variables
|
| 20 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 21 |
+
E2B_API_KEY = os.getenv("E2B_API_KEY")
|
| 22 |
+
|
| 23 |
+
# Initialize OpenAI client
|
| 24 |
+
client = OpenAI() if OPENAI_API_KEY else None
|
| 25 |
+
|
| 26 |
+
# Global sandbox reference (created lazily on first use)
|
| 27 |
+
sbx = None
|
| 28 |
+
|
| 29 |
+
# System prompt for data analysis agent
|
| 30 |
+
SYSTEM_PROMPT = """You are a data analysis agent. Generate Python code to analyze data, perform statistical analysis, and create visualizations using matplotlib, pandas, numpy, and seaborn. Always use these libraries for professional data analysis."""
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def chat_handler(message: str, history: list):
|
| 34 |
+
"""Handle chat messages and return agent response with images.
|
| 35 |
+
|
| 36 |
+
Args:
|
| 37 |
+
message: User's input message
|
| 38 |
+
history: Chat history (list of [user_msg, bot_msg] pairs)
|
| 39 |
+
|
| 40 |
+
Returns:
|
| 41 |
+
Updated history with new message and response
|
| 42 |
+
"""
|
| 43 |
+
global sbx
|
| 44 |
+
|
| 45 |
+
try:
|
| 46 |
+
# Check if API keys are set
|
| 47 |
+
if not client or not E2B_API_KEY:
|
| 48 |
+
error_msg = "Error: Environment variables not set. Please set OPENAI_API_KEY and E2B_API_KEY."
|
| 49 |
+
history.append([message, error_msg])
|
| 50 |
+
return history, []
|
| 51 |
+
|
| 52 |
+
# Create sandbox on first use (lazy initialization)
|
| 53 |
+
if sbx is None:
|
| 54 |
+
sbx = Sandbox.create(timeout=3600)
|
| 55 |
+
|
| 56 |
+
# Call coding_agent with current message
|
| 57 |
+
messages, metadata = coding_agent(
|
| 58 |
+
client=client,
|
| 59 |
+
query=message,
|
| 60 |
+
system=SYSTEM_PROMPT,
|
| 61 |
+
tools=tools,
|
| 62 |
+
tools_schemas=[execute_code_schema],
|
| 63 |
+
sbx=sbx,
|
| 64 |
+
messages=None,
|
| 65 |
+
max_steps=5
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
# Extract final text response from messages
|
| 69 |
+
response_text = ""
|
| 70 |
+
for msg in reversed(messages):
|
| 71 |
+
if isinstance(msg, dict) and msg.get("type") == "message":
|
| 72 |
+
response_text = msg.get("content", "")
|
| 73 |
+
break
|
| 74 |
+
|
| 75 |
+
# If no message found, use default
|
| 76 |
+
if not response_text:
|
| 77 |
+
response_text = "Analysis complete."
|
| 78 |
+
|
| 79 |
+
# Decode and save images to display in gallery
|
| 80 |
+
image_paths = []
|
| 81 |
+
if metadata.get("images"):
|
| 82 |
+
for i, png_data in enumerate(metadata["images"]):
|
| 83 |
+
# Decode base64 PNG to PIL Image
|
| 84 |
+
img_bytes = base64.b64decode(png_data)
|
| 85 |
+
img = Image.open(io.BytesIO(img_bytes))
|
| 86 |
+
|
| 87 |
+
# Save to temporary file
|
| 88 |
+
temp_path = f"/tmp/plot_{i}.png"
|
| 89 |
+
img.save(temp_path)
|
| 90 |
+
image_paths.append(temp_path)
|
| 91 |
+
|
| 92 |
+
# Append to history
|
| 93 |
+
history.append([message, response_text])
|
| 94 |
+
return history, image_paths
|
| 95 |
+
|
| 96 |
+
except Exception as e:
|
| 97 |
+
error_msg = f"Error: {str(e)}"
|
| 98 |
+
history.append([message, error_msg])
|
| 99 |
+
return history, []
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
# Create Gradio interface using Blocks for image support
|
| 103 |
+
with gr.Blocks(title="Data Analyzer Agent") as demo:
|
| 104 |
+
gr.Markdown("# Data Analyzer Agent")
|
| 105 |
+
gr.Markdown("Ask me to analyze data and create visualizations!")
|
| 106 |
+
|
| 107 |
+
chatbot = gr.Chatbot(label="Chat", height=400, type="tuples")
|
| 108 |
+
gallery = gr.Gallery(label="Visualizations", columns=2, height=300)
|
| 109 |
+
|
| 110 |
+
with gr.Row():
|
| 111 |
+
msg = gr.Textbox(
|
| 112 |
+
label="Message",
|
| 113 |
+
placeholder="Ask me to analyze data...",
|
| 114 |
+
scale=4
|
| 115 |
+
)
|
| 116 |
+
submit = gr.Button("Send", scale=1)
|
| 117 |
+
|
| 118 |
+
clear = gr.Button("Clear Chat")
|
| 119 |
+
|
| 120 |
+
def submit_message(message, history):
|
| 121 |
+
"""Handle message submission."""
|
| 122 |
+
new_history, images = chat_handler(message, history)
|
| 123 |
+
return new_history, images, ""
|
| 124 |
+
|
| 125 |
+
def clear_chat():
|
| 126 |
+
"""Clear chat history and gallery."""
|
| 127 |
+
return [], [], ""
|
| 128 |
+
|
| 129 |
+
submit.click(
|
| 130 |
+
submit_message,
|
| 131 |
+
inputs=[msg, chatbot],
|
| 132 |
+
outputs=[chatbot, gallery, msg]
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
msg.submit(
|
| 136 |
+
submit_message,
|
| 137 |
+
inputs=[msg, chatbot],
|
| 138 |
+
outputs=[chatbot, gallery, msg]
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
clear.click(
|
| 142 |
+
clear_chat,
|
| 143 |
+
inputs=[],
|
| 144 |
+
outputs=[chatbot, gallery, msg]
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
if __name__ == "__main__":
|
| 149 |
+
demo.launch(share=False)
|
requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
e2b-code-interpreter==2.2.0
|
| 2 |
+
openai==2.4.0
|
| 3 |
+
python-dotenv>=1.1.1
|
| 4 |
+
jupyter>=1.1.1
|
| 5 |
+
jupyter-ai>=3.0.0b9,<3.1
|
| 6 |
+
# Pin working versions to avoid aiosqlite/langgraph compatibility issues
|
| 7 |
+
aiosqlite==0.21.0
|
| 8 |
+
langgraph-checkpoint-sqlite==3.0.0
|
| 9 |
+
gradio>=4.15.0
|
| 10 |
+
pillow>=10.0.0
|
src/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Data Analyzer Agent - E2B Code Interpreter with OpenAI function calling."""
|
| 2 |
+
|
| 3 |
+
__version__ = "0.1.0"
|
| 4 |
+
|
| 5 |
+
from .tools import execute_code, execute_code_schema, execute_tool, tools
|
| 6 |
+
from .agent import coding_agent
|
| 7 |
+
|
| 8 |
+
__all__ = ["coding_agent", "execute_code", "execute_code_schema", "execute_tool", "tools"]
|
src/agent.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Coding agent implementation with iterative LLM loop.
|
| 2 |
+
|
| 3 |
+
Main agent loop that orchestrates LLM calls, tool execution,
|
| 4 |
+
and conversation memory management.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import json
|
| 8 |
+
from openai import OpenAI
|
| 9 |
+
from e2b_code_interpreter import Sandbox
|
| 10 |
+
from .tools import execute_tool
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def coding_agent(
|
| 14 |
+
client: OpenAI,
|
| 15 |
+
query: str,
|
| 16 |
+
system: str,
|
| 17 |
+
tools: dict,
|
| 18 |
+
tools_schemas: list,
|
| 19 |
+
sbx: Sandbox,
|
| 20 |
+
messages: list = None,
|
| 21 |
+
max_steps: int = 5
|
| 22 |
+
):
|
| 23 |
+
"""Run coding agent with iterative tool calling loop.
|
| 24 |
+
|
| 25 |
+
Args:
|
| 26 |
+
client: OpenAI client instance
|
| 27 |
+
query: User query/prompt
|
| 28 |
+
system: System prompt defining agent behavior
|
| 29 |
+
tools: Dict mapping tool names to functions
|
| 30 |
+
tools_schemas: List of OpenAI function schemas
|
| 31 |
+
sbx: E2B Sandbox instance for code execution
|
| 32 |
+
messages: Optional existing message history
|
| 33 |
+
max_steps: Maximum iteration steps (default 5)
|
| 34 |
+
|
| 35 |
+
Returns:
|
| 36 |
+
Tuple of (messages list, metadata dict)
|
| 37 |
+
- messages: Full conversation history
|
| 38 |
+
- metadata: Accumulated metadata (especially images)
|
| 39 |
+
"""
|
| 40 |
+
if messages is None:
|
| 41 |
+
messages = []
|
| 42 |
+
messages.append({"role": "user", "content": query})
|
| 43 |
+
|
| 44 |
+
metadata = {}
|
| 45 |
+
steps = 0
|
| 46 |
+
|
| 47 |
+
while steps < max_steps:
|
| 48 |
+
# Call LLM with current conversation state
|
| 49 |
+
response = client.responses.create(
|
| 50 |
+
model="gpt-4.1-mini",
|
| 51 |
+
input=[
|
| 52 |
+
{"role": "developer", "content": system},
|
| 53 |
+
*messages
|
| 54 |
+
],
|
| 55 |
+
tools=tools_schemas
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
has_function_call = False
|
| 59 |
+
|
| 60 |
+
# Process all parts of the response
|
| 61 |
+
for part in response.output:
|
| 62 |
+
messages.append(part.to_dict())
|
| 63 |
+
|
| 64 |
+
if part.type == "message":
|
| 65 |
+
print(part.content)
|
| 66 |
+
|
| 67 |
+
elif part.type == "function_call":
|
| 68 |
+
has_function_call = True
|
| 69 |
+
name = part.name
|
| 70 |
+
args = part.arguments
|
| 71 |
+
|
| 72 |
+
# Execute the tool and get results
|
| 73 |
+
result, tool_metadata = execute_tool(name, args, tools, sbx=sbx)
|
| 74 |
+
|
| 75 |
+
# Accumulate metadata (especially images)
|
| 76 |
+
if "images" in tool_metadata:
|
| 77 |
+
metadata.setdefault("images", []).extend(tool_metadata["images"])
|
| 78 |
+
if "error" in tool_metadata:
|
| 79 |
+
metadata["error"] = tool_metadata["error"]
|
| 80 |
+
|
| 81 |
+
# Append function result to conversation
|
| 82 |
+
messages.append({
|
| 83 |
+
"type": "function_call_output",
|
| 84 |
+
"call_id": part.call_id,
|
| 85 |
+
"output": json.dumps(result)
|
| 86 |
+
})
|
| 87 |
+
|
| 88 |
+
# Stop if no more function calls
|
| 89 |
+
if not has_function_call:
|
| 90 |
+
break
|
| 91 |
+
|
| 92 |
+
steps += 1
|
| 93 |
+
|
| 94 |
+
return messages, metadata
|
src/tools.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tool system for Data Analyzer Agent.
|
| 2 |
+
|
| 3 |
+
Provides execute_code function for running Python code in E2B sandbox,
|
| 4 |
+
along with tool schema and routing infrastructure.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import json
|
| 8 |
+
from typing import Callable
|
| 9 |
+
from e2b_code_interpreter import Sandbox
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def execute_code(code: str, sbx: Sandbox):
|
| 13 |
+
"""Execute Python code in E2B sandbox and return results with metadata.
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
code: Python code string to execute
|
| 17 |
+
sbx: E2B Sandbox instance
|
| 18 |
+
|
| 19 |
+
Returns:
|
| 20 |
+
Tuple of (execution.to_json(), metadata dict)
|
| 21 |
+
- execution.to_json() contains results and errors
|
| 22 |
+
- metadata contains images list with PNG data extracted from results
|
| 23 |
+
"""
|
| 24 |
+
execution = sbx.run_code(code)
|
| 25 |
+
metadata = {}
|
| 26 |
+
|
| 27 |
+
# Extract PNG data from execution results into metadata
|
| 28 |
+
if hasattr(execution, "results") and execution.results:
|
| 29 |
+
for result in execution.results:
|
| 30 |
+
if getattr(result, "png", None):
|
| 31 |
+
metadata.setdefault("images", []).append(result.png)
|
| 32 |
+
result.png = None # Clear PNG from result to avoid duplication
|
| 33 |
+
|
| 34 |
+
if execution.error:
|
| 35 |
+
metadata["error"] = str(execution.error)
|
| 36 |
+
|
| 37 |
+
return execution.to_json(), metadata
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
execute_code_schema = {
|
| 41 |
+
"type": "function",
|
| 42 |
+
"name": "execute_code",
|
| 43 |
+
"description": "Execute Python code and return result",
|
| 44 |
+
"parameters": {
|
| 45 |
+
"type": "object",
|
| 46 |
+
"properties": {
|
| 47 |
+
"code": {"type": "string", "description": "Python code"}
|
| 48 |
+
},
|
| 49 |
+
"required": ["code"],
|
| 50 |
+
"additionalProperties": False
|
| 51 |
+
}
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
tools = {"execute_code": execute_code}
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def execute_tool(name: str, args: str, tools: dict, **kwargs):
|
| 59 |
+
"""Route tool calls from LLM to implementation functions.
|
| 60 |
+
|
| 61 |
+
Args:
|
| 62 |
+
name: Tool name to execute
|
| 63 |
+
args: JSON string with tool arguments
|
| 64 |
+
tools: Dict mapping tool names to functions
|
| 65 |
+
**kwargs: Additional parameters passed to tool function (e.g., sbx)
|
| 66 |
+
|
| 67 |
+
Returns:
|
| 68 |
+
Tuple of (result dict, metadata dict)
|
| 69 |
+
- result contains execution output or error
|
| 70 |
+
- metadata contains additional data like images
|
| 71 |
+
"""
|
| 72 |
+
try:
|
| 73 |
+
args_dict = json.loads(args)
|
| 74 |
+
if name not in tools:
|
| 75 |
+
return {"error": f"Tool {name} does not exist."}, {}
|
| 76 |
+
return tools[name](**args_dict, **kwargs)
|
| 77 |
+
except json.JSONDecodeError as e:
|
| 78 |
+
return {"error": f"Failed to parse JSON arguments: {str(e)}"}, {}
|
| 79 |
+
except KeyError as e:
|
| 80 |
+
return {"error": f"Missing key in arguments: {str(e)}"}, {}
|
| 81 |
+
except Exception as e:
|
| 82 |
+
return {"error": str(e)}, {}
|