v3.3.1: Strict Coder - no sandbox exploration commands
Browse files- Dockerfile +1 -1
- chainlit_app.py +2 -2
- codepilot/agents/coder_agent.py +34 -52
- codepilot/agents/orchestrator.py +1 -1
Dockerfile
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
# HuggingFace Spaces Dockerfile for CodePilot
|
| 2 |
-
# BUILD_VERSION:
|
| 3 |
FROM python:3.11-slim
|
| 4 |
|
| 5 |
# Set working directory
|
|
|
|
| 1 |
# HuggingFace Spaces Dockerfile for CodePilot
|
| 2 |
+
# BUILD_VERSION: 10 (v3.3.1 coder strict - no sandbox exploration)
|
| 3 |
FROM python:3.11-slim
|
| 4 |
|
| 5 |
# Set working directory
|
chainlit_app.py
CHANGED
|
@@ -20,8 +20,8 @@ from concurrent.futures import ThreadPoolExecutor
|
|
| 20 |
# ============================================================
|
| 21 |
# STARTUP VERSION CHECK - Change this to detect if rebuild worked
|
| 22 |
# ============================================================
|
| 23 |
-
APP_VERSION = "3.3.
|
| 24 |
-
BUILD_ID = "2024-12-19-
|
| 25 |
print("=" * 60)
|
| 26 |
print(f"[STARTUP] CodePilot Chainlit App")
|
| 27 |
print(f"[STARTUP] APP_VERSION: {APP_VERSION}")
|
|
|
|
| 20 |
# ============================================================
|
| 21 |
# STARTUP VERSION CHECK - Change this to detect if rebuild worked
|
| 22 |
# ============================================================
|
| 23 |
+
APP_VERSION = "3.3.1-coder-strict"
|
| 24 |
+
BUILD_ID = "2024-12-19-v9"
|
| 25 |
print("=" * 60)
|
| 26 |
print(f"[STARTUP] CodePilot Chainlit App")
|
| 27 |
print(f"[STARTUP] APP_VERSION: {APP_VERSION}")
|
codepilot/agents/coder_agent.py
CHANGED
|
@@ -21,51 +21,34 @@ from typing import Dict, Any, Optional
|
|
| 21 |
import json
|
| 22 |
|
| 23 |
|
| 24 |
-
# Coder's specialized system prompt (v3.
|
| 25 |
-
CODER_SYSTEM_PROMPT = """You are an expert software engineer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
DO NOT:
|
| 33 |
-
- Navigate directories (no list_files)
|
| 34 |
-
- Search for files (no searching)
|
| 35 |
-
- Explore the codebase
|
| 36 |
-
|
| 37 |
-
DO:
|
| 38 |
-
- Use the exact file paths from exploration results
|
| 39 |
-
- Start writing code immediately
|
| 40 |
-
- Follow the plan step by step
|
| 41 |
|
| 42 |
-
===
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
4. Test in sandbox if needed
|
| 47 |
|
| 48 |
-
|
| 49 |
-
- get_file_outline: See file structure (use if unsure about a file)
|
| 50 |
-
- get_code_chunk: Read ONE specific function/class
|
| 51 |
-
- read_file: Read entire file (only when rewriting whole file)
|
| 52 |
-
- write_file: Create or modify files
|
| 53 |
-
- upload_to_sandbox: Upload files for testing
|
| 54 |
-
- run_command_in_sandbox: Run tests in sandbox
|
| 55 |
-
- execute_in_sandbox: Execute Python snippets
|
| 56 |
-
|
| 57 |
-
=== SANDBOX WORKFLOW ===
|
| 58 |
-
When testing in sandbox:
|
| 59 |
-
1. Upload with RELATIVE path: upload_to_sandbox(path="file.py", content=code)
|
| 60 |
-
2. Run with RELATIVE path: run_command_in_sandbox(command="python file.py")
|
| 61 |
-
3. The sandbox CANNOT access /tmp/codepilot_repos/ - use simple filenames!
|
| 62 |
-
|
| 63 |
-
Your code should be:
|
| 64 |
-
- Clean (follow existing code style)
|
| 65 |
-
- Minimal (only change what's necessary)
|
| 66 |
-
- Follow the plan exactly
|
| 67 |
-
|
| 68 |
-
START CODING IMMEDIATELY - do not explore!
|
| 69 |
"""
|
| 70 |
|
| 71 |
|
|
@@ -94,17 +77,16 @@ class CoderAgent:
|
|
| 94 |
|
| 95 |
self.conversation = ConversationManager()
|
| 96 |
|
| 97 |
-
# v3.
|
| 98 |
-
#
|
| 99 |
self.allowed_tools = [
|
| 100 |
-
"
|
| 101 |
-
"get_code_chunk", #
|
| 102 |
-
"read_file", #
|
| 103 |
-
"
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
"execute_in_sandbox" # Execute Python snippets
|
| 108 |
]
|
| 109 |
|
| 110 |
def run(
|
|
|
|
| 21 |
import json
|
| 22 |
|
| 23 |
|
| 24 |
+
# Coder's specialized system prompt (v3.3 - no exploration, write code immediately)
|
| 25 |
+
CODER_SYSTEM_PROMPT = """You are an expert software engineer. Your ONLY job is to WRITE CODE.
|
| 26 |
+
|
| 27 |
+
=== CRITICAL RULES ===
|
| 28 |
+
1. START WRITING CODE IMMEDIATELY with write_file
|
| 29 |
+
2. DO NOT run ls, find, pwd, or any exploration commands
|
| 30 |
+
3. DO NOT try to access /tmp/codepilot_repos/ from sandbox - IT WILL FAIL
|
| 31 |
+
4. Use file paths EXACTLY as shown in the exploration results
|
| 32 |
+
5. The sandbox is ISOLATED - it cannot see the cloned repo
|
| 33 |
+
|
| 34 |
+
=== YOUR WORKFLOW ===
|
| 35 |
+
1. Look at the EXPLORATION RESULTS - they have all the file paths you need
|
| 36 |
+
2. Look at the PLAN - it tells you exactly what to create
|
| 37 |
+
3. Use write_file to create/modify files in the repo
|
| 38 |
+
4. (Optional) Test your code by uploading to sandbox with relative paths
|
| 39 |
|
| 40 |
+
=== TOOLS ===
|
| 41 |
+
- write_file: CREATE OR MODIFY FILES - use this immediately!
|
| 42 |
+
- get_code_chunk: Read a specific function if you need to see existing code
|
| 43 |
+
- upload_to_sandbox: Upload YOUR code for testing (use relative paths like "app.py")
|
| 44 |
+
- run_command_in_sandbox: Run YOUR uploaded code (like "python app.py")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
+
=== EXAMPLE ===
|
| 47 |
+
If plan says "create examples/health_check/app.py":
|
| 48 |
+
✅ CORRECT: write_file(path="/tmp/codepilot_repos/flask_xxx/examples/health_check/app.py", content="...")
|
| 49 |
+
❌ WRONG: run_command_in_sandbox("ls /tmp/codepilot_repos/...") <-- sandbox can't access this!
|
|
|
|
| 50 |
|
| 51 |
+
DO NOT EXPLORE. WRITE CODE NOW.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
"""
|
| 53 |
|
| 54 |
|
|
|
|
| 77 |
|
| 78 |
self.conversation = ConversationManager()
|
| 79 |
|
| 80 |
+
# v3.3: Minimal tools - focus on writing code
|
| 81 |
+
# Removed sandbox commands that were being misused for exploration
|
| 82 |
self.allowed_tools = [
|
| 83 |
+
"write_file", # PRIMARY TOOL - create or modify files
|
| 84 |
+
"get_code_chunk", # Read specific function/class if needed
|
| 85 |
+
"read_file", # Read entire file (only when rewriting)
|
| 86 |
+
"upload_to_sandbox", # Upload code for testing
|
| 87 |
+
"execute_in_sandbox" # Execute Python snippets for testing
|
| 88 |
+
# REMOVED: run_command_in_sandbox - was being misused for ls/find
|
| 89 |
+
# REMOVED: get_file_outline - Coder should use exploration context
|
|
|
|
| 90 |
]
|
| 91 |
|
| 92 |
def run(
|
codepilot/agents/orchestrator.py
CHANGED
|
@@ -9,7 +9,7 @@ The orchestrator is the "brain" that:
|
|
| 9 |
"""
|
| 10 |
|
| 11 |
# VERSION CHECK - If you see this, new code is running!
|
| 12 |
-
ORCHESTRATOR_VERSION = "3.3.
|
| 13 |
print(f"[ORCHESTRATOR] ========== LOADING VERSION {ORCHESTRATOR_VERSION} ==========")
|
| 14 |
|
| 15 |
from enum import Enum
|
|
|
|
| 9 |
"""
|
| 10 |
|
| 11 |
# VERSION CHECK - If you see this, new code is running!
|
| 12 |
+
ORCHESTRATOR_VERSION = "3.3.1-coder-strict"
|
| 13 |
print(f"[ORCHESTRATOR] ========== LOADING VERSION {ORCHESTRATOR_VERSION} ==========")
|
| 14 |
|
| 15 |
from enum import Enum
|