ChefAdorous commited on
Commit
7ccdb9f
Β·
1 Parent(s): 4013140

Add Phase 6 verification scripts and manual testing tools

Browse files
Files changed (2) hide show
  1. verify_enhanced_features.py +378 -0
  2. verify_phase6.ps1 +142 -0
verify_enhanced_features.py ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Manual verification script for Phase 6 testing.
3
+ Tests session management, file operations, and multi-language execution.
4
+ """
5
+
6
+ import requests
7
+ import time
8
+ import sys
9
+ from pathlib import Path
10
+
11
+ BASE_URL = "http://localhost:7860"
12
+
13
+ def print_test(name):
14
+ print(f"\n{'='*60}")
15
+ print(f"TEST: {name}")
16
+ print(f"{'='*60}")
17
+
18
+ def print_success(msg):
19
+ print(f"βœ… {msg}")
20
+
21
+ def print_error(msg):
22
+ print(f"❌ {msg}")
23
+
24
+ def print_info(msg):
25
+ print(f"ℹ️ {msg}")
26
+
27
+
28
+ def test_1_session_lifecycle():
29
+ """Test creating, getting, and destroying a session"""
30
+ print_test("Session Lifecycle")
31
+
32
+ # Create session
33
+ print_info("Creating session...")
34
+ resp = requests.post(f"{BASE_URL}/sessions", json={
35
+ "metadata": {"test": "lifecycle"},
36
+ "timeout_minutes": 30
37
+ })
38
+
39
+ if resp.status_code != 201:
40
+ print_error(f"Failed to create session: {resp.status_code}")
41
+ return None
42
+
43
+ session = resp.json()
44
+ session_id = session["session_id"]
45
+ print_success(f"Session created: {session_id}")
46
+
47
+ # Wait for container to be ready
48
+ time.sleep(3)
49
+
50
+ # Get session details
51
+ print_info("Getting session details...")
52
+ resp = requests.get(f"{BASE_URL}/sessions/{session_id}")
53
+ if resp.status_code == 200:
54
+ print_success("Session retrieved successfully")
55
+ else:
56
+ print_error(f"Failed to get session: {resp.status_code}")
57
+
58
+ # List sessions
59
+ print_info("Listing all sessions...")
60
+ resp = requests.get(f"{BASE_URL}/sessions")
61
+ if resp.status_code == 200:
62
+ sessions = resp.json()
63
+ print_success(f"Found {len(sessions)} active session(s)")
64
+ else:
65
+ print_error(f"Failed to list sessions: {resp.status_code}")
66
+
67
+ return session_id
68
+
69
+
70
+ def test_2_file_operations(session_id):
71
+ """Test file upload, list, download"""
72
+ print_test("File Operations")
73
+
74
+ if not session_id:
75
+ print_error("No session available")
76
+ return False
77
+
78
+ # Upload file
79
+ print_info("Uploading test file...")
80
+ file_content = b"print('Hello from uploaded file!')\nprint(f'2 + 2 = {2+2}')"
81
+ resp = requests.post(
82
+ f"{BASE_URL}/sessions/{session_id}/files",
83
+ files={"file": ("test_script.py", file_content, "text/x-python")}
84
+ )
85
+
86
+ if resp.status_code != 200:
87
+ print_error(f"Failed to upload file: {resp.status_code} - {resp.text}")
88
+ return False
89
+
90
+ upload_result = resp.json()
91
+ print_success(f"File uploaded: {upload_result['filename']} ({upload_result['size']} bytes)")
92
+
93
+ # List files
94
+ print_info("Listing files...")
95
+ resp = requests.get(f"{BASE_URL}/sessions/{session_id}/files")
96
+ if resp.status_code == 200:
97
+ files = resp.json()
98
+ print_success(f"Found {len(files)} file(s) in workspace")
99
+ for f in files:
100
+ print(f" - {f['filename']} ({f['size']} bytes)")
101
+ else:
102
+ print_error(f"Failed to list files: {resp.status_code}")
103
+ return False
104
+
105
+ # Download file
106
+ print_info("Downloading file...")
107
+ resp = requests.get(f"{BASE_URL}/sessions/{session_id}/files/test_script.py")
108
+ if resp.status_code == 200:
109
+ if resp.content == file_content:
110
+ print_success("File downloaded successfully and content matches")
111
+ else:
112
+ print_error("Downloaded file content doesn't match")
113
+ return False
114
+ else:
115
+ print_error(f"Failed to download file: {resp.status_code}")
116
+ return False
117
+
118
+ return True
119
+
120
+
121
+ def test_3_session_execution(session_id):
122
+ """Test code execution in session"""
123
+ print_test("Session-Based Code Execution")
124
+
125
+ if not session_id:
126
+ print_error("No session available")
127
+ return False
128
+
129
+ # Execute Python code
130
+ print_info("Executing Python code...")
131
+ resp = requests.post(
132
+ f"{BASE_URL}/sessions/{session_id}/execute",
133
+ json={
134
+ "code": "import sys\nprint(f'Python version: {sys.version}')\nprint('Execution successful!')",
135
+ "language": "python",
136
+ "working_dir": "/workspace"
137
+ }
138
+ )
139
+
140
+ if resp.status_code != 200:
141
+ print_error(f"Failed to execute code: {resp.status_code} - {resp.text}")
142
+ return False
143
+
144
+ result = resp.json()
145
+ print_success("Code executed successfully")
146
+ print(f" Output: {result['stdout'].strip()}")
147
+ print(f" Exit code: {result['exit_code']}")
148
+ print(f" Execution time: {result['execution_time']}s")
149
+
150
+ return True
151
+
152
+
153
+ def test_4_file_execution(session_id):
154
+ """Test executing uploaded file"""
155
+ print_test("Execute Uploaded File")
156
+
157
+ if not session_id:
158
+ print_error("No session available")
159
+ return False
160
+
161
+ # Execute the previously uploaded file
162
+ print_info("Executing uploaded Python file...")
163
+ resp = requests.post(
164
+ f"{BASE_URL}/sessions/{session_id}/execute-file",
165
+ json={
166
+ "filepath": "/workspace/test_script.py",
167
+ "language": "python",
168
+ "args": []
169
+ }
170
+ )
171
+
172
+ if resp.status_code != 200:
173
+ print_error(f"Failed to execute file: {resp.status_code} - {resp.text}")
174
+ return False
175
+
176
+ result = resp.json()
177
+ print_success("File executed successfully")
178
+ print(f" Output:\n{result['stdout']}")
179
+ print(f" Exit code: {result['exit_code']}")
180
+
181
+ return True
182
+
183
+
184
+ def test_5_persistent_state(session_id):
185
+ """Test that files persist across executions"""
186
+ print_test("Persistent State Verification")
187
+
188
+ if not session_id:
189
+ print_error("No session available")
190
+ return False
191
+
192
+ # First execution: create a file
193
+ print_info("Creating data file in first execution...")
194
+ resp = requests.post(
195
+ f"{BASE_URL}/sessions/{session_id}/execute",
196
+ json={
197
+ "code": "with open('/workspace/persistent.txt', 'w') as f: f.write('Data persists!')",
198
+ "language": "python"
199
+ }
200
+ )
201
+
202
+ if resp.status_code != 200:
203
+ print_error(f"Failed first execution: {resp.status_code}")
204
+ return False
205
+
206
+ print_success("File created in first execution")
207
+
208
+ # Second execution: read the file
209
+ print_info("Reading file in second execution...")
210
+ resp = requests.post(
211
+ f"{BASE_URL}/sessions/{session_id}/execute",
212
+ json={
213
+ "code": "with open('/workspace/persistent.txt', 'r') as f: print(f'Read: {f.read()}')",
214
+ "language": "python"
215
+ }
216
+ )
217
+
218
+ if resp.status_code != 200:
219
+ print_error(f"Failed second execution: {resp.status_code}")
220
+ return False
221
+
222
+ result = resp.json()
223
+ if "Data persists!" in result['stdout']:
224
+ print_success("File persisted across executions!")
225
+ print(f" Output: {result['stdout'].strip()}")
226
+ else:
227
+ print_error("File did not persist")
228
+ return False
229
+
230
+ return True
231
+
232
+
233
+ def test_6_concurrent_sessions():
234
+ """Test creating multiple concurrent sessions"""
235
+ print_test("Concurrent Sessions")
236
+
237
+ session_ids = []
238
+
239
+ # Create 3 sessions
240
+ for i in range(3):
241
+ print_info(f"Creating session {i+1}/3...")
242
+ resp = requests.post(f"{BASE_URL}/sessions", json={
243
+ "metadata": {"test": "concurrent", "index": i}
244
+ })
245
+
246
+ if resp.status_code == 201:
247
+ session_id = resp.json()["session_id"]
248
+ session_ids.append(session_id)
249
+ print_success(f"Session {i+1} created: {session_id[:8]}...")
250
+ else:
251
+ print_error(f"Failed to create session {i+1}")
252
+
253
+ # List all sessions
254
+ resp = requests.get(f"{BASE_URL}/sessions")
255
+ if resp.status_code == 200:
256
+ all_sessions = resp.json()
257
+ print_success(f"Total active sessions: {len(all_sessions)}")
258
+
259
+ # Cleanup
260
+ print_info("Cleaning up concurrent sessions...")
261
+ for sid in session_ids:
262
+ requests.delete(f"{BASE_URL}/sessions/{sid}")
263
+
264
+ print_success("Concurrent session test completed")
265
+ return True
266
+
267
+
268
+ def test_7_cleanup(session_id):
269
+ """Test session cleanup"""
270
+ print_test("Session Cleanup")
271
+
272
+ if not session_id:
273
+ print_info("No session to cleanup")
274
+ return True
275
+
276
+ print_info(f"Destroying session {session_id[:8]}...")
277
+ resp = requests.delete(f"{BASE_URL}/sessions/{session_id}")
278
+
279
+ if resp.status_code == 200:
280
+ print_success("Session destroyed successfully")
281
+
282
+ # Verify it's gone
283
+ resp = requests.get(f"{BASE_URL}/sessions/{session_id}")
284
+ if resp.status_code == 404:
285
+ print_success("Session verified as destroyed")
286
+ return True
287
+ else:
288
+ print_error("Session still exists after destruction")
289
+ return False
290
+ else:
291
+ print_error(f"Failed to destroy session: {resp.status_code}")
292
+ return False
293
+
294
+
295
+ def test_8_stateless_execution():
296
+ """Test backward compatibility - stateless execution"""
297
+ print_test("Stateless Execution (Backward Compatibility)")
298
+
299
+ print_info("Executing code without session...")
300
+ resp = requests.post(f"{BASE_URL}/execute", json={
301
+ "code": "print('Stateless execution works!')\nprint(f'Result: {10 * 10}')",
302
+ "language": "python"
303
+ })
304
+
305
+ if resp.status_code == 200:
306
+ result = resp.json()
307
+ print_success("Stateless execution successful")
308
+ print(f" Output: {result['stdout'].strip()}")
309
+ print(f" Exit code: {result['exit_code']}")
310
+ return True
311
+ else:
312
+ print_error(f"Stateless execution failed: {resp.status_code}")
313
+ return False
314
+
315
+
316
+ def main():
317
+ """Run all verification tests"""
318
+ print("\n" + "="*60)
319
+ print("ENHANCED SANDBOX - MANUAL VERIFICATION")
320
+ print("="*60)
321
+
322
+ # Check if API is running
323
+ try:
324
+ resp = requests.get(f"{BASE_URL}/health", timeout=2)
325
+ if resp.status_code != 200:
326
+ print_error("API server is not healthy")
327
+ sys.exit(1)
328
+ print_success("API server is running")
329
+ except requests.exceptions.RequestException as e:
330
+ print_error(f"Cannot connect to API server at {BASE_URL}")
331
+ print_info("Please run 'python app.py' first")
332
+ sys.exit(1)
333
+
334
+ results = {}
335
+ session_id = None
336
+
337
+ # Run all tests
338
+ try:
339
+ session_id = test_1_session_lifecycle()
340
+ results['session_lifecycle'] = session_id is not None
341
+
342
+ results['file_operations'] = test_2_file_operations(session_id)
343
+ results['session_execution'] = test_3_session_execution(session_id)
344
+ results['file_execution'] = test_4_file_execution(session_id)
345
+ results['persistent_state'] = test_5_persistent_state(session_id)
346
+ results['concurrent_sessions'] = test_6_concurrent_sessions()
347
+ results['stateless_execution'] = test_8_stateless_execution()
348
+ results['cleanup'] = test_7_cleanup(session_id)
349
+
350
+ except Exception as e:
351
+ print_error(f"Test failed with exception: {e}")
352
+ import traceback
353
+ traceback.print_exc()
354
+
355
+ # Summary
356
+ print("\n" + "="*60)
357
+ print("VERIFICATION SUMMARY")
358
+ print("="*60)
359
+
360
+ passed = sum(1 for v in results.values() if v)
361
+ total = len(results)
362
+
363
+ for test, result in results.items():
364
+ status = "βœ… PASS" if result else "❌ FAIL"
365
+ print(f"{status} - {test.replace('_', ' ').title()}")
366
+
367
+ print(f"\nResults: {passed}/{total} tests passed")
368
+
369
+ if passed == total:
370
+ print_success("All verification tests passed!")
371
+ sys.exit(0)
372
+ else:
373
+ print_error(f"{total - passed} test(s) failed")
374
+ sys.exit(1)
375
+
376
+
377
+ if __name__ == "__main__":
378
+ main()
verify_phase6.ps1 ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Phase 6: Integration & Testing Verification Script
2
+ # Run this script after Docker is started to verify all enhanced sandbox features
3
+
4
+ Write-Host "========================================" -ForegroundColor Cyan
5
+ Write-Host "Enhanced Sandbox - Phase 6 Verification" -ForegroundColor Cyan
6
+ Write-Host "========================================" -ForegroundColor Cyan
7
+ Write-Host ""
8
+
9
+ # Check Docker
10
+ Write-Host "1. Checking Docker..." -ForegroundColor Yellow
11
+ try {
12
+ docker ps *> $null
13
+ if ($LASTEXITCODE -eq 0) {
14
+ Write-Host " βœ… Docker is running" -ForegroundColor Green
15
+ }
16
+ else {
17
+ Write-Host " ❌ Docker is not running. Please start Docker Desktop." -ForegroundColor Red
18
+ exit 1
19
+ }
20
+ }
21
+ catch {
22
+ Write-Host " ❌ Docker is not available." -ForegroundColor Red
23
+ exit 1
24
+ }
25
+ Write-Host ""
26
+
27
+ # Run unit tests
28
+ Write-Host "2. Running Unit Tests..." -ForegroundColor Yellow
29
+ pytest tests/test_executor.py -v
30
+ if ($LASTEXITCODE -ne 0) {
31
+ Write-Host " ⚠️ Some executor tests failed" -ForegroundColor Yellow
32
+ }
33
+ Write-Host ""
34
+
35
+ # Build devenv image
36
+ Write-Host "3. Building Development Environment Image..." -ForegroundColor Yellow
37
+ Write-Host " (This may take 10-15 minutes on first build)" -ForegroundColor Gray
38
+ docker build -f sandbox/images/devenv.Dockerfile -t sandbox-devenv:latest sandbox/images/
39
+ if ($LASTEXITCODE -eq 0) {
40
+ Write-Host " βœ… Development environment image built successfully" -ForegroundColor Green
41
+ }
42
+ else {
43
+ Write-Host " ❌ Failed to build image" -ForegroundColor Red
44
+ exit 1
45
+ }
46
+ Write-Host ""
47
+
48
+ # Verify environment
49
+ Write-Host "4. Verifying Development Environment..." -ForegroundColor Yellow
50
+ docker run --rm sandbox-devenv:latest /opt/environment_check.sh
51
+ if ($LASTEXITCODE -eq 0) {
52
+ Write-Host " βœ… Environment verified" -ForegroundColor Green
53
+ }
54
+ else {
55
+ Write-Host " ⚠️ Some tools missing in environment" -ForegroundColor Yellow
56
+ }
57
+ Write-Host ""
58
+
59
+ # Start API server in background
60
+ Write-Host "5. Starting API Server..." -ForegroundColor Yellow
61
+ $apiProcess = Start-Process python -ArgumentList "app.py" -PassThru -NoNewWindow
62
+ Start-Sleep -Seconds 5
63
+
64
+ # Check if server started
65
+ try {
66
+ $response = Invoke-WebRequest -Uri "http://localhost:7860/health" -UseBasicParsing
67
+ if ($response.StatusCode -eq 200) {
68
+ Write-Host " βœ… API server started successfully" -ForegroundColor Green
69
+ }
70
+ }
71
+ catch {
72
+ Write-Host " ❌ API server failed to start" -ForegroundColor Red
73
+ Stop-Process -Id $apiProcess.Id -Force
74
+ exit 1
75
+ }
76
+ Write-Host ""
77
+
78
+ # Run session management tests
79
+ Write-Host "6. Testing Session Management..." -ForegroundColor Yellow
80
+ pytest tests/test_session_manager.py -v
81
+ if ($LASTEXITCODE -eq 0) {
82
+ Write-Host " βœ… Session management tests passed" -ForegroundColor Green
83
+ }
84
+ else {
85
+ Write-Host " ⚠️ Some session tests failed" -ForegroundColor Yellow
86
+ }
87
+ Write-Host ""
88
+
89
+ # Run file operation tests
90
+ Write-Host "7. Testing File Operations..." -ForegroundColor Yellow
91
+ pytest tests/test_file_operations.py -v
92
+ if ($LASTEXITCODE -eq 0) {
93
+ Write-Host " βœ… File operation tests passed" -ForegroundColor Green
94
+ }
95
+ else {
96
+ Write-Host " ⚠️ Some file operation tests failed" -ForegroundColor Yellow
97
+ }
98
+ Write-Host ""
99
+
100
+ # Run API integration tests
101
+ Write-Host "8. Testing API Integration..." -ForegroundColor Yellow
102
+ pytest tests/test_api.py -v
103
+ if ($LASTEXITCODE -eq 0) {
104
+ Write-Host " βœ… API integration tests passed" -ForegroundColor Green
105
+ }
106
+ else {
107
+ Write-Host " ⚠️ Some API tests failed" -ForegroundColor Yellow
108
+ }
109
+ Write-Host ""
110
+
111
+ # Manual verification tests
112
+ Write-Host "9. Running Manual Verification Tests..." -ForegroundColor Yellow
113
+ python verify_enhanced_features.py
114
+ if ($LASTEXITCODE -eq 0) {
115
+ Write-Host " βœ… Manual verification passed" -ForegroundColor Green
116
+ }
117
+ else {
118
+ Write-Host " ⚠️ Some manual tests failed" -ForegroundColor Yellow
119
+ }
120
+ Write-Host ""
121
+
122
+ # Cleanup
123
+ Write-Host "10. Cleanup..." -ForegroundColor Yellow
124
+ Stop-Process -Id $apiProcess.Id -Force
125
+ Write-Host " βœ… API server stopped" -ForegroundColor Green
126
+ Write-Host ""
127
+
128
+ Write-Host "========================================" -ForegroundColor Cyan
129
+ Write-Host "Verification Complete!" -ForegroundColor Cyan
130
+ Write-Host "========================================" -ForegroundColor Cyan
131
+ Write-Host ""
132
+ Write-Host "Summary:" -ForegroundColor White
133
+ Write-Host " - Docker: Running" -ForegroundColor Green
134
+ Write-Host " - DevEnv Image: Built" -ForegroundColor Green
135
+ Write-Host " - Unit Tests: Check output above" -ForegroundColor Yellow
136
+ Write-Host " - Integration Tests: Check output above" -ForegroundColor Yellow
137
+ Write-Host " - API Server: Tested" -ForegroundColor Green
138
+ Write-Host ""
139
+ Write-Host "Next steps:" -ForegroundColor White
140
+ Write-Host " 1. Review test results above" -ForegroundColor Gray
141
+ Write-Host " 2. Run 'python app.py' to start the server" -ForegroundColor Gray
142
+ Write-Host " 3. Test with example requests from README.md" -ForegroundColor Gray