davidleocadio94 commited on
Commit
c8a4550
·
0 Parent(s):

feat: data analyzer agent with gradio interface

Browse files

- Gradio chat interface with Data Analyzer Agent integration
- E2B Code Interpreter sandbox for secure code execution
- Image display for matplotlib visualizations
- Lazy sandbox initialization pattern

Files changed (6) hide show
  1. .gitignore +28 -0
  2. app.py +149 -0
  3. requirements.txt +10 -0
  4. src/__init__.py +8 -0
  5. src/agent.py +94 -0
  6. src/tools.py +82 -0
.gitignore ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Development files
2
+ project_untracked.ipynb
3
+ e2b_course*/
4
+ TEST/
5
+ .claude/
6
+
7
+ # Markdown files (except docs.md)
8
+ *.md
9
+ !docs.md
10
+
11
+ # Environment and secrets
12
+ .env
13
+
14
+ # Student-generated files (created during notebook usage)
15
+ agent_files/
16
+ visualizations/
17
+ charts/
18
+ webapp/
19
+ games/
20
+ outputs/
21
+
22
+ # Python cache
23
+ __pycache__/
24
+ *.pyc
25
+ .ipynb_checkpoints/
26
+
27
+ # E2B cache
28
+ sbx.cache
app.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gradio Chat Interface for Data Analyzer Agent.
2
+
3
+ Interactive chat UI for non-technical users to ask data analysis questions
4
+ and see visualizations generated by the agent.
5
+ """
6
+
7
+ import os
8
+ import base64
9
+ import io
10
+ from PIL import Image
11
+
12
+ import gradio as gr
13
+ from openai import OpenAI
14
+ from e2b_code_interpreter import Sandbox
15
+
16
+ from src import coding_agent, execute_code_schema, tools
17
+
18
+
19
+ # Load environment variables
20
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
21
+ E2B_API_KEY = os.getenv("E2B_API_KEY")
22
+
23
+ # Initialize OpenAI client
24
+ client = OpenAI() if OPENAI_API_KEY else None
25
+
26
+ # Global sandbox reference (created lazily on first use)
27
+ sbx = None
28
+
29
+ # System prompt for data analysis agent
30
+ SYSTEM_PROMPT = """You are a data analysis agent. Generate Python code to analyze data, perform statistical analysis, and create visualizations using matplotlib, pandas, numpy, and seaborn. Always use these libraries for professional data analysis."""
31
+
32
+
33
+ def chat_handler(message: str, history: list):
34
+ """Handle chat messages and return agent response with images.
35
+
36
+ Args:
37
+ message: User's input message
38
+ history: Chat history (list of [user_msg, bot_msg] pairs)
39
+
40
+ Returns:
41
+ Updated history with new message and response
42
+ """
43
+ global sbx
44
+
45
+ try:
46
+ # Check if API keys are set
47
+ if not client or not E2B_API_KEY:
48
+ error_msg = "Error: Environment variables not set. Please set OPENAI_API_KEY and E2B_API_KEY."
49
+ history.append([message, error_msg])
50
+ return history, []
51
+
52
+ # Create sandbox on first use (lazy initialization)
53
+ if sbx is None:
54
+ sbx = Sandbox.create(timeout=3600)
55
+
56
+ # Call coding_agent with current message
57
+ messages, metadata = coding_agent(
58
+ client=client,
59
+ query=message,
60
+ system=SYSTEM_PROMPT,
61
+ tools=tools,
62
+ tools_schemas=[execute_code_schema],
63
+ sbx=sbx,
64
+ messages=None,
65
+ max_steps=5
66
+ )
67
+
68
+ # Extract final text response from messages
69
+ response_text = ""
70
+ for msg in reversed(messages):
71
+ if isinstance(msg, dict) and msg.get("type") == "message":
72
+ response_text = msg.get("content", "")
73
+ break
74
+
75
+ # If no message found, use default
76
+ if not response_text:
77
+ response_text = "Analysis complete."
78
+
79
+ # Decode and save images to display in gallery
80
+ image_paths = []
81
+ if metadata.get("images"):
82
+ for i, png_data in enumerate(metadata["images"]):
83
+ # Decode base64 PNG to PIL Image
84
+ img_bytes = base64.b64decode(png_data)
85
+ img = Image.open(io.BytesIO(img_bytes))
86
+
87
+ # Save to temporary file
88
+ temp_path = f"/tmp/plot_{i}.png"
89
+ img.save(temp_path)
90
+ image_paths.append(temp_path)
91
+
92
+ # Append to history
93
+ history.append([message, response_text])
94
+ return history, image_paths
95
+
96
+ except Exception as e:
97
+ error_msg = f"Error: {str(e)}"
98
+ history.append([message, error_msg])
99
+ return history, []
100
+
101
+
102
+ # Create Gradio interface using Blocks for image support
103
+ with gr.Blocks(title="Data Analyzer Agent") as demo:
104
+ gr.Markdown("# Data Analyzer Agent")
105
+ gr.Markdown("Ask me to analyze data and create visualizations!")
106
+
107
+ chatbot = gr.Chatbot(label="Chat", height=400, type="tuples")
108
+ gallery = gr.Gallery(label="Visualizations", columns=2, height=300)
109
+
110
+ with gr.Row():
111
+ msg = gr.Textbox(
112
+ label="Message",
113
+ placeholder="Ask me to analyze data...",
114
+ scale=4
115
+ )
116
+ submit = gr.Button("Send", scale=1)
117
+
118
+ clear = gr.Button("Clear Chat")
119
+
120
+ def submit_message(message, history):
121
+ """Handle message submission."""
122
+ new_history, images = chat_handler(message, history)
123
+ return new_history, images, ""
124
+
125
+ def clear_chat():
126
+ """Clear chat history and gallery."""
127
+ return [], [], ""
128
+
129
+ submit.click(
130
+ submit_message,
131
+ inputs=[msg, chatbot],
132
+ outputs=[chatbot, gallery, msg]
133
+ )
134
+
135
+ msg.submit(
136
+ submit_message,
137
+ inputs=[msg, chatbot],
138
+ outputs=[chatbot, gallery, msg]
139
+ )
140
+
141
+ clear.click(
142
+ clear_chat,
143
+ inputs=[],
144
+ outputs=[chatbot, gallery, msg]
145
+ )
146
+
147
+
148
+ if __name__ == "__main__":
149
+ demo.launch(share=False)
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ e2b-code-interpreter==2.2.0
2
+ openai==2.4.0
3
+ python-dotenv>=1.1.1
4
+ jupyter>=1.1.1
5
+ jupyter-ai>=3.0.0b9,<3.1
6
+ # Pin working versions to avoid aiosqlite/langgraph compatibility issues
7
+ aiosqlite==0.21.0
8
+ langgraph-checkpoint-sqlite==3.0.0
9
+ gradio>=4.15.0
10
+ pillow>=10.0.0
src/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ """Data Analyzer Agent - E2B Code Interpreter with OpenAI function calling."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ from .tools import execute_code, execute_code_schema, execute_tool, tools
6
+ from .agent import coding_agent
7
+
8
+ __all__ = ["coding_agent", "execute_code", "execute_code_schema", "execute_tool", "tools"]
src/agent.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Coding agent implementation with iterative LLM loop.
2
+
3
+ Main agent loop that orchestrates LLM calls, tool execution,
4
+ and conversation memory management.
5
+ """
6
+
7
+ import json
8
+ from openai import OpenAI
9
+ from e2b_code_interpreter import Sandbox
10
+ from .tools import execute_tool
11
+
12
+
13
+ def coding_agent(
14
+ client: OpenAI,
15
+ query: str,
16
+ system: str,
17
+ tools: dict,
18
+ tools_schemas: list,
19
+ sbx: Sandbox,
20
+ messages: list = None,
21
+ max_steps: int = 5
22
+ ):
23
+ """Run coding agent with iterative tool calling loop.
24
+
25
+ Args:
26
+ client: OpenAI client instance
27
+ query: User query/prompt
28
+ system: System prompt defining agent behavior
29
+ tools: Dict mapping tool names to functions
30
+ tools_schemas: List of OpenAI function schemas
31
+ sbx: E2B Sandbox instance for code execution
32
+ messages: Optional existing message history
33
+ max_steps: Maximum iteration steps (default 5)
34
+
35
+ Returns:
36
+ Tuple of (messages list, metadata dict)
37
+ - messages: Full conversation history
38
+ - metadata: Accumulated metadata (especially images)
39
+ """
40
+ if messages is None:
41
+ messages = []
42
+ messages.append({"role": "user", "content": query})
43
+
44
+ metadata = {}
45
+ steps = 0
46
+
47
+ while steps < max_steps:
48
+ # Call LLM with current conversation state
49
+ response = client.responses.create(
50
+ model="gpt-4.1-mini",
51
+ input=[
52
+ {"role": "developer", "content": system},
53
+ *messages
54
+ ],
55
+ tools=tools_schemas
56
+ )
57
+
58
+ has_function_call = False
59
+
60
+ # Process all parts of the response
61
+ for part in response.output:
62
+ messages.append(part.to_dict())
63
+
64
+ if part.type == "message":
65
+ print(part.content)
66
+
67
+ elif part.type == "function_call":
68
+ has_function_call = True
69
+ name = part.name
70
+ args = part.arguments
71
+
72
+ # Execute the tool and get results
73
+ result, tool_metadata = execute_tool(name, args, tools, sbx=sbx)
74
+
75
+ # Accumulate metadata (especially images)
76
+ if "images" in tool_metadata:
77
+ metadata.setdefault("images", []).extend(tool_metadata["images"])
78
+ if "error" in tool_metadata:
79
+ metadata["error"] = tool_metadata["error"]
80
+
81
+ # Append function result to conversation
82
+ messages.append({
83
+ "type": "function_call_output",
84
+ "call_id": part.call_id,
85
+ "output": json.dumps(result)
86
+ })
87
+
88
+ # Stop if no more function calls
89
+ if not has_function_call:
90
+ break
91
+
92
+ steps += 1
93
+
94
+ return messages, metadata
src/tools.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tool system for Data Analyzer Agent.
2
+
3
+ Provides execute_code function for running Python code in E2B sandbox,
4
+ along with tool schema and routing infrastructure.
5
+ """
6
+
7
+ import json
8
+ from typing import Callable
9
+ from e2b_code_interpreter import Sandbox
10
+
11
+
12
+ def execute_code(code: str, sbx: Sandbox):
13
+ """Execute Python code in E2B sandbox and return results with metadata.
14
+
15
+ Args:
16
+ code: Python code string to execute
17
+ sbx: E2B Sandbox instance
18
+
19
+ Returns:
20
+ Tuple of (execution.to_json(), metadata dict)
21
+ - execution.to_json() contains results and errors
22
+ - metadata contains images list with PNG data extracted from results
23
+ """
24
+ execution = sbx.run_code(code)
25
+ metadata = {}
26
+
27
+ # Extract PNG data from execution results into metadata
28
+ if hasattr(execution, "results") and execution.results:
29
+ for result in execution.results:
30
+ if getattr(result, "png", None):
31
+ metadata.setdefault("images", []).append(result.png)
32
+ result.png = None # Clear PNG from result to avoid duplication
33
+
34
+ if execution.error:
35
+ metadata["error"] = str(execution.error)
36
+
37
+ return execution.to_json(), metadata
38
+
39
+
40
+ execute_code_schema = {
41
+ "type": "function",
42
+ "name": "execute_code",
43
+ "description": "Execute Python code and return result",
44
+ "parameters": {
45
+ "type": "object",
46
+ "properties": {
47
+ "code": {"type": "string", "description": "Python code"}
48
+ },
49
+ "required": ["code"],
50
+ "additionalProperties": False
51
+ }
52
+ }
53
+
54
+
55
+ tools = {"execute_code": execute_code}
56
+
57
+
58
+ def execute_tool(name: str, args: str, tools: dict, **kwargs):
59
+ """Route tool calls from LLM to implementation functions.
60
+
61
+ Args:
62
+ name: Tool name to execute
63
+ args: JSON string with tool arguments
64
+ tools: Dict mapping tool names to functions
65
+ **kwargs: Additional parameters passed to tool function (e.g., sbx)
66
+
67
+ Returns:
68
+ Tuple of (result dict, metadata dict)
69
+ - result contains execution output or error
70
+ - metadata contains additional data like images
71
+ """
72
+ try:
73
+ args_dict = json.loads(args)
74
+ if name not in tools:
75
+ return {"error": f"Tool {name} does not exist."}, {}
76
+ return tools[name](**args_dict, **kwargs)
77
+ except json.JSONDecodeError as e:
78
+ return {"error": f"Failed to parse JSON arguments: {str(e)}"}, {}
79
+ except KeyError as e:
80
+ return {"error": f"Missing key in arguments: {str(e)}"}, {}
81
+ except Exception as e:
82
+ return {"error": str(e)}, {}