Spaces:

davidleocadio94DLAI
/

data-analyzer-agent

Runtime error

davidleocadio94 commited on Jan 15

Commit

c8a4550

0 Parent(s):

feat: data analyzer agent with gradio interface

- Gradio chat interface with Data Analyzer Agent integration
- E2B Code Interpreter sandbox for secure code execution
- Image display for matplotlib visualizations
- Lazy sandbox initialization pattern

Files changed (6) hide show

.gitignore +28 -0
app.py +149 -0
requirements.txt +10 -0
src/__init__.py +8 -0
src/agent.py +94 -0
src/tools.py +82 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,28 @@

+# Development files
+project_untracked.ipynb
+e2b_course*/
+TEST/
+.claude/
+# Markdown files (except docs.md)
+*.md
+!docs.md
+# Environment and secrets
+.env
+# Student-generated files (created during notebook usage)
+agent_files/
+visualizations/
+charts/
+webapp/
+games/
+outputs/
+# Python cache
+__pycache__/
+*.pyc
+.ipynb_checkpoints/
+# E2B cache
+sbx.cache

app.py ADDED Viewed

	@@ -0,0 +1,149 @@

+"""Gradio Chat Interface for Data Analyzer Agent.
+Interactive chat UI for non-technical users to ask data analysis questions
+and see visualizations generated by the agent.
+"""
+import os
+import base64
+import io
+from PIL import Image
+import gradio as gr
+from openai import OpenAI
+from e2b_code_interpreter import Sandbox
+from src import coding_agent, execute_code_schema, tools
+# Load environment variables
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+E2B_API_KEY = os.getenv("E2B_API_KEY")
+# Initialize OpenAI client
+client = OpenAI() if OPENAI_API_KEY else None
+# Global sandbox reference (created lazily on first use)
+sbx = None
+# System prompt for data analysis agent
+SYSTEM_PROMPT = """You are a data analysis agent. Generate Python code to analyze data, perform statistical analysis, and create visualizations using matplotlib, pandas, numpy, and seaborn. Always use these libraries for professional data analysis."""
+def chat_handler(message: str, history: list):
+    """Handle chat messages and return agent response with images.
+    Args:
+        message: User's input message
+        history: Chat history (list of [user_msg, bot_msg] pairs)
+    Returns:
+        Updated history with new message and response
+    """
+    global sbx
+    try:
+        # Check if API keys are set
+        if not client or not E2B_API_KEY:
+            error_msg = "Error: Environment variables not set. Please set OPENAI_API_KEY and E2B_API_KEY."
+            history.append([message, error_msg])
+            return history, []
+        # Create sandbox on first use (lazy initialization)
+        if sbx is None:
+            sbx = Sandbox.create(timeout=3600)
+        # Call coding_agent with current message
+        messages, metadata = coding_agent(
+            client=client,
+            query=message,
+            system=SYSTEM_PROMPT,
+            tools=tools,
+            tools_schemas=[execute_code_schema],
+            sbx=sbx,
+            messages=None,
+            max_steps=5
+        )
+        # Extract final text response from messages
+        response_text = ""
+        for msg in reversed(messages):
+            if isinstance(msg, dict) and msg.get("type") == "message":
+                response_text = msg.get("content", "")
+                break
+        # If no message found, use default
+        if not response_text:
+            response_text = "Analysis complete."
+        # Decode and save images to display in gallery
+        image_paths = []
+        if metadata.get("images"):
+            for i, png_data in enumerate(metadata["images"]):
+                # Decode base64 PNG to PIL Image
+                img_bytes = base64.b64decode(png_data)
+                img = Image.open(io.BytesIO(img_bytes))
+                # Save to temporary file
+                temp_path = f"/tmp/plot_{i}.png"
+                img.save(temp_path)
+                image_paths.append(temp_path)
+        # Append to history
+        history.append([message, response_text])
+        return history, image_paths
+    except Exception as e:
+        error_msg = f"Error: {str(e)}"
+        history.append([message, error_msg])
+        return history, []
+# Create Gradio interface using Blocks for image support
+with gr.Blocks(title="Data Analyzer Agent") as demo:
+    gr.Markdown("# Data Analyzer Agent")
+    gr.Markdown("Ask me to analyze data and create visualizations!")
+    chatbot = gr.Chatbot(label="Chat", height=400, type="tuples")
+    gallery = gr.Gallery(label="Visualizations", columns=2, height=300)
+    with gr.Row():
+        msg = gr.Textbox(
+            label="Message",
+            placeholder="Ask me to analyze data...",
+            scale=4
+        )
+        submit = gr.Button("Send", scale=1)
+    clear = gr.Button("Clear Chat")
+    def submit_message(message, history):
+        """Handle message submission."""
+        new_history, images = chat_handler(message, history)
+        return new_history, images, ""
+    def clear_chat():
+        """Clear chat history and gallery."""
+        return [], [], ""
+    submit.click(
+        submit_message,
+        inputs=[msg, chatbot],
+        outputs=[chatbot, gallery, msg]
+    )
+    msg.submit(
+        submit_message,
+        inputs=[msg, chatbot],
+        outputs=[chatbot, gallery, msg]
+    )
+    clear.click(
+        clear_chat,
+        inputs=[],
+        outputs=[chatbot, gallery, msg]
+    )
+if __name__ == "__main__":
+    demo.launch(share=False)

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+e2b-code-interpreter==2.2.0
+openai==2.4.0
+python-dotenv>=1.1.1
+jupyter>=1.1.1
+jupyter-ai>=3.0.0b9,<3.1
+# Pin working versions to avoid aiosqlite/langgraph compatibility issues
+aiosqlite==0.21.0
+langgraph-checkpoint-sqlite==3.0.0
+gradio>=4.15.0
+pillow>=10.0.0

src/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+"""Data Analyzer Agent - E2B Code Interpreter with OpenAI function calling."""
+__version__ = "0.1.0"
+from .tools import execute_code, execute_code_schema, execute_tool, tools
+from .agent import coding_agent
+__all__ = ["coding_agent", "execute_code", "execute_code_schema", "execute_tool", "tools"]

src/agent.py ADDED Viewed

	@@ -0,0 +1,94 @@

+"""Coding agent implementation with iterative LLM loop.
+Main agent loop that orchestrates LLM calls, tool execution,
+and conversation memory management.
+"""
+import json
+from openai import OpenAI
+from e2b_code_interpreter import Sandbox
+from .tools import execute_tool
+def coding_agent(
+    client: OpenAI,
+    query: str,
+    system: str,
+    tools: dict,
+    tools_schemas: list,
+    sbx: Sandbox,
+    messages: list = None,
+    max_steps: int = 5
+):
+    """Run coding agent with iterative tool calling loop.
+    Args:
+        client: OpenAI client instance
+        query: User query/prompt
+        system: System prompt defining agent behavior
+        tools: Dict mapping tool names to functions
+        tools_schemas: List of OpenAI function schemas
+        sbx: E2B Sandbox instance for code execution
+        messages: Optional existing message history
+        max_steps: Maximum iteration steps (default 5)
+    Returns:
+        Tuple of (messages list, metadata dict)
+        - messages: Full conversation history
+        - metadata: Accumulated metadata (especially images)
+    """
+    if messages is None:
+        messages = []
+    messages.append({"role": "user", "content": query})
+    metadata = {}
+    steps = 0
+    while steps < max_steps:
+        # Call LLM with current conversation state
+        response = client.responses.create(
+            model="gpt-4.1-mini",
+            input=[
+                {"role": "developer", "content": system},
+                *messages
+            ],
+            tools=tools_schemas
+        )
+        has_function_call = False
+        # Process all parts of the response
+        for part in response.output:
+            messages.append(part.to_dict())
+            if part.type == "message":
+                print(part.content)
+            elif part.type == "function_call":
+                has_function_call = True
+                name = part.name
+                args = part.arguments
+                # Execute the tool and get results
+                result, tool_metadata = execute_tool(name, args, tools, sbx=sbx)
+                # Accumulate metadata (especially images)
+                if "images" in tool_metadata:
+                    metadata.setdefault("images", []).extend(tool_metadata["images"])
+                if "error" in tool_metadata:
+                    metadata["error"] = tool_metadata["error"]
+                # Append function result to conversation
+                messages.append({
+                    "type": "function_call_output",
+                    "call_id": part.call_id,
+                    "output": json.dumps(result)
+                })
+        # Stop if no more function calls
+        if not has_function_call:
+            break
+        steps += 1
+    return messages, metadata

src/tools.py ADDED Viewed

	@@ -0,0 +1,82 @@

+"""Tool system for Data Analyzer Agent.
+Provides execute_code function for running Python code in E2B sandbox,
+along with tool schema and routing infrastructure.
+"""
+import json
+from typing import Callable
+from e2b_code_interpreter import Sandbox
+def execute_code(code: str, sbx: Sandbox):
+    """Execute Python code in E2B sandbox and return results with metadata.
+    Args:
+        code: Python code string to execute
+        sbx: E2B Sandbox instance
+    Returns:
+        Tuple of (execution.to_json(), metadata dict)
+        - execution.to_json() contains results and errors
+        - metadata contains images list with PNG data extracted from results
+    """
+    execution = sbx.run_code(code)
+    metadata = {}
+    # Extract PNG data from execution results into metadata
+    if hasattr(execution, "results") and execution.results:
+        for result in execution.results:
+            if getattr(result, "png", None):
+                metadata.setdefault("images", []).append(result.png)
+                result.png = None  # Clear PNG from result to avoid duplication
+    if execution.error:
+        metadata["error"] = str(execution.error)
+    return execution.to_json(), metadata
+execute_code_schema = {
+    "type": "function",
+    "name": "execute_code",
+    "description": "Execute Python code and return result",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "code": {"type": "string", "description": "Python code"}
+        },
+        "required": ["code"],
+        "additionalProperties": False
+    }
+}
+tools = {"execute_code": execute_code}
+def execute_tool(name: str, args: str, tools: dict, **kwargs):
+    """Route tool calls from LLM to implementation functions.
+    Args:
+        name: Tool name to execute
+        args: JSON string with tool arguments
+        tools: Dict mapping tool names to functions
+        **kwargs: Additional parameters passed to tool function (e.g., sbx)
+    Returns:
+        Tuple of (result dict, metadata dict)
+        - result contains execution output or error
+        - metadata contains additional data like images
+    """
+    try:
+        args_dict = json.loads(args)
+        if name not in tools:
+            return {"error": f"Tool {name} does not exist."}, {}
+        return tools[name](**args_dict, **kwargs)
+    except json.JSONDecodeError as e:
+        return {"error": f"Failed to parse JSON arguments: {str(e)}"}, {}
+    except KeyError as e:
+        return {"error": f"Missing key in arguments: {str(e)}"}, {}
+    except Exception as e:
+        return {"error": str(e)}, {}