ayushm98 commited on
Commit
8746945
·
1 Parent(s): 02d5eaa

v3.3.1: Strict Coder - no sandbox exploration commands

Browse files
Dockerfile CHANGED
@@ -1,5 +1,5 @@
1
  # HuggingFace Spaces Dockerfile for CodePilot
2
- # BUILD_VERSION: 9 (v3.3.0 clarifying questions)
3
  FROM python:3.11-slim
4
 
5
  # Set working directory
 
1
  # HuggingFace Spaces Dockerfile for CodePilot
2
+ # BUILD_VERSION: 10 (v3.3.1 coder strict - no sandbox exploration)
3
  FROM python:3.11-slim
4
 
5
  # Set working directory
chainlit_app.py CHANGED
@@ -20,8 +20,8 @@ from concurrent.futures import ThreadPoolExecutor
20
  # ============================================================
21
  # STARTUP VERSION CHECK - Change this to detect if rebuild worked
22
  # ============================================================
23
- APP_VERSION = "3.3.0-clarify"
24
- BUILD_ID = "2024-12-19-v8"
25
  print("=" * 60)
26
  print(f"[STARTUP] CodePilot Chainlit App")
27
  print(f"[STARTUP] APP_VERSION: {APP_VERSION}")
 
20
  # ============================================================
21
  # STARTUP VERSION CHECK - Change this to detect if rebuild worked
22
  # ============================================================
23
+ APP_VERSION = "3.3.1-coder-strict"
24
+ BUILD_ID = "2024-12-19-v9"
25
  print("=" * 60)
26
  print(f"[STARTUP] CodePilot Chainlit App")
27
  print(f"[STARTUP] APP_VERSION: {APP_VERSION}")
codepilot/agents/coder_agent.py CHANGED
@@ -21,51 +21,34 @@ from typing import Dict, Any, Optional
21
  import json
22
 
23
 
24
- # Coder's specialized system prompt (v3.2 - no search, no list_files, uses exploration context)
25
- CODER_SYSTEM_PROMPT = """You are an expert software engineer and implementation specialist.
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- Your ONLY job is to write code that implements the given plan. You do NOT explore or search.
28
-
29
- === CRITICAL: USE THE PROVIDED CONTEXT ===
30
- The Explorer agent has ALREADY searched the codebase for you. All file paths and code patterns are in the EXPLORATION RESULTS below.
31
-
32
- DO NOT:
33
- - Navigate directories (no list_files)
34
- - Search for files (no searching)
35
- - Explore the codebase
36
-
37
- DO:
38
- - Use the exact file paths from exploration results
39
- - Start writing code immediately
40
- - Follow the plan step by step
41
 
42
- === WORKFLOW ===
43
- 1. Read the exploration results - they contain all file paths you need
44
- 2. If modifying existing code: use get_code_chunk to read the specific function
45
- 3. Write your changes with write_file using paths from exploration
46
- 4. Test in sandbox if needed
47
 
48
- === TOOLS ===
49
- - get_file_outline: See file structure (use if unsure about a file)
50
- - get_code_chunk: Read ONE specific function/class
51
- - read_file: Read entire file (only when rewriting whole file)
52
- - write_file: Create or modify files
53
- - upload_to_sandbox: Upload files for testing
54
- - run_command_in_sandbox: Run tests in sandbox
55
- - execute_in_sandbox: Execute Python snippets
56
-
57
- === SANDBOX WORKFLOW ===
58
- When testing in sandbox:
59
- 1. Upload with RELATIVE path: upload_to_sandbox(path="file.py", content=code)
60
- 2. Run with RELATIVE path: run_command_in_sandbox(command="python file.py")
61
- 3. The sandbox CANNOT access /tmp/codepilot_repos/ - use simple filenames!
62
-
63
- Your code should be:
64
- - Clean (follow existing code style)
65
- - Minimal (only change what's necessary)
66
- - Follow the plan exactly
67
-
68
- START CODING IMMEDIATELY - do not explore!
69
  """
70
 
71
 
@@ -94,17 +77,16 @@ class CoderAgent:
94
 
95
  self.conversation = ConversationManager()
96
 
97
- # v3.2: Removed list_files - Explorer provides all paths needed
98
- # Coder only needs: read, write, and sandbox tools
99
  self.allowed_tools = [
100
- "get_file_outline", # Get file structure without full code
101
- "get_code_chunk", # Extract specific function/class by name
102
- "read_file", # Full file contents (use sparingly)
103
- "write_file", # Create or modify files
104
- # "list_files" REMOVED - use exploration context instead
105
- "upload_to_sandbox", # Upload files for testing
106
- "run_command_in_sandbox", # Run tests in sandbox
107
- "execute_in_sandbox" # Execute Python snippets
108
  ]
109
 
110
  def run(
 
21
  import json
22
 
23
 
24
+ # Coder's specialized system prompt (v3.3 - no exploration, write code immediately)
25
+ CODER_SYSTEM_PROMPT = """You are an expert software engineer. Your ONLY job is to WRITE CODE.
26
+
27
+ === CRITICAL RULES ===
28
+ 1. START WRITING CODE IMMEDIATELY with write_file
29
+ 2. DO NOT run ls, find, pwd, or any exploration commands
30
+ 3. DO NOT try to access /tmp/codepilot_repos/ from sandbox - IT WILL FAIL
31
+ 4. Use file paths EXACTLY as shown in the exploration results
32
+ 5. The sandbox is ISOLATED - it cannot see the cloned repo
33
+
34
+ === YOUR WORKFLOW ===
35
+ 1. Look at the EXPLORATION RESULTS - they have all the file paths you need
36
+ 2. Look at the PLAN - it tells you exactly what to create
37
+ 3. Use write_file to create/modify files in the repo
38
+ 4. (Optional) Test your code by uploading to sandbox with relative paths
39
 
40
+ === TOOLS ===
41
+ - write_file: CREATE OR MODIFY FILES - use this immediately!
42
+ - get_code_chunk: Read a specific function if you need to see existing code
43
+ - upload_to_sandbox: Upload YOUR code for testing (use relative paths like "app.py")
44
+ - run_command_in_sandbox: Run YOUR uploaded code (like "python app.py")
 
 
 
 
 
 
 
 
 
45
 
46
+ === EXAMPLE ===
47
+ If plan says "create examples/health_check/app.py":
48
+ CORRECT: write_file(path="/tmp/codepilot_repos/flask_xxx/examples/health_check/app.py", content="...")
49
+ WRONG: run_command_in_sandbox("ls /tmp/codepilot_repos/...") <-- sandbox can't access this!
 
50
 
51
+ DO NOT EXPLORE. WRITE CODE NOW.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  """
53
 
54
 
 
77
 
78
  self.conversation = ConversationManager()
79
 
80
+ # v3.3: Minimal tools - focus on writing code
81
+ # Removed sandbox commands that were being misused for exploration
82
  self.allowed_tools = [
83
+ "write_file", # PRIMARY TOOL - create or modify files
84
+ "get_code_chunk", # Read specific function/class if needed
85
+ "read_file", # Read entire file (only when rewriting)
86
+ "upload_to_sandbox", # Upload code for testing
87
+ "execute_in_sandbox" # Execute Python snippets for testing
88
+ # REMOVED: run_command_in_sandbox - was being misused for ls/find
89
+ # REMOVED: get_file_outline - Coder should use exploration context
 
90
  ]
91
 
92
  def run(
codepilot/agents/orchestrator.py CHANGED
@@ -9,7 +9,7 @@ The orchestrator is the "brain" that:
9
  """
10
 
11
  # VERSION CHECK - If you see this, new code is running!
12
- ORCHESTRATOR_VERSION = "3.3.0-clarify"
13
  print(f"[ORCHESTRATOR] ========== LOADING VERSION {ORCHESTRATOR_VERSION} ==========")
14
 
15
  from enum import Enum
 
9
  """
10
 
11
  # VERSION CHECK - If you see this, new code is running!
12
+ ORCHESTRATOR_VERSION = "3.3.1-coder-strict"
13
  print(f"[ORCHESTRATOR] ========== LOADING VERSION {ORCHESTRATOR_VERSION} ==========")
14
 
15
  from enum import Enum