File size: 7,593 Bytes
ec4aa90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
"""
Python test runner for Modal sandbox execution.
Handles pytest execution with proper path setup and result parsing.
"""

import subprocess
import tempfile
import time
import logging
import re
from pathlib import Path
from typing import Dict, List

logger = logging.getLogger(__name__)


def _validate_python_tests(tests: str) -> tuple:
    """
    Validate Python test code before execution.
    
    Returns:
        (is_valid, error_message)
    """
    # Check for basic pytest structure
    if "def test_" not in tests and "class Test" not in tests:
        return False, "No test functions found (must start with 'test_' or be in 'Test' class)"
    
    # Check for imports
    if "import" not in tests:
        return False, "No import statements found"
    
    # Check for basic syntax issues
    try:
        compile(tests, '<string>', 'exec')
    except SyntaxError as e:
        return False, f"Syntax error in test code: {str(e)}"
    
    return True, ""


def run_python_tests(code: str, tests: str, requirements: List[str], module_name: str) -> Dict:
    """
    Run Python tests using pytest in Modal container.
    
    Args:
        code: Python source code
        tests: Pytest test code
        requirements: List of pip packages to install
        module_name: Name of the module
    
    Returns:
        Dictionary with test results
    """
    # Validate tests before execution
    is_valid, error_msg = _validate_python_tests(tests)
    if not is_valid:
        logger.error(f"Test validation failed: {error_msg}")
        return {
            "success": False,
            "error": f"Test validation failed: {error_msg}",
            "tests_run": 0,
            "tests_passed": 0,
            "tests_failed": 0,
            "execution_mode": "modal",
            "language": "python"
        }
    
    with tempfile.TemporaryDirectory() as tmpdir:
        tmpdir_path = Path(tmpdir)
        
        # Write code and tests in same directory for proper imports
        code_file = tmpdir_path / f"{module_name}.py"
        test_file = tmpdir_path / f"test_{module_name}.py"
        
        # Ensure tests have proper path setup
        if "sys.path" not in tests and "import sys" not in tests:
            path_setup = """import sys
import os
# Ensure module can be imported
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

"""
            tests = path_setup + tests
        
        code_file.write_text(code, encoding='utf-8')
        test_file.write_text(tests, encoding='utf-8')
        
        # Install additional requirements
        if requirements:
            try:
                logger.info(f"Installing requirements: {requirements}")
                install_result = subprocess.run(
                    ["pip", "install", "-q", "--no-cache-dir"] + requirements,
                    capture_output=True,
                    text=True,
                    timeout=120
                )
                if install_result.returncode != 0:
                    logger.warning(f"Some requirements failed to install: {install_result.stderr}")
            except Exception as e:
                logger.warning(f"Failed to install requirements: {e}")
        
        start_time = time.time()
        
        try:
            # Run pytest with coverage and verbose output
            result = subprocess.run(
                [
                    "pytest",
                    str(test_file),
                    "-v",
                    "--tb=short",
                    "--timeout=30",
                    "-p", "no:warnings",
                    "--cov=" + module_name,
                    "--cov-report=term-missing"
                ],
                cwd=tmpdir,
                capture_output=True,
                text=True,
                timeout=120
            )
        except subprocess.TimeoutExpired:
            return {
                "success": False,
                "error": "Test execution timeout (>2 minutes)",
                "tests_run": 0,
                "tests_passed": 0,
                "tests_failed": 0,
                "execution_time": 120.0,
                "execution_mode": "modal",
                "language": "python"
            }
        except FileNotFoundError:
            return {
                "success": False,
                "error": "pytest not found in container",
                "tests_run": 0,
                "tests_passed": 0,
                "tests_failed": 0,
                "execution_mode": "modal",
                "language": "python"
            }
        
        execution_time = time.time() - start_time
        stdout = result.stdout[:10000]  # Truncate to prevent memory issues
        stderr = result.stderr[:10000]
        
        # Parse pytest output from summary line (more reliable than counting)
        # Format: "3 passed, 1 failed, 1 skipped in 0.5s" or "3 passed in 0.5s"
        tests_run = 0
        tests_passed = 0
        tests_failed = 0
        tests_errors = 0
        tests_skipped = 0
        
        # Look for summary line
        summary_match = re.search(r'=+\s*(.*?)\s+in\s+[\d.]+s\s*=+', stdout)
        if summary_match:
            summary = summary_match.group(1)
            
            # Parse each component
            passed_match = re.search(r'(\d+)\s+passed', summary)
            if passed_match:
                tests_passed = int(passed_match.group(1))
            
            failed_match = re.search(r'(\d+)\s+failed', summary)
            if failed_match:
                tests_failed = int(failed_match.group(1))
            
            error_match = re.search(r'(\d+)\s+error', summary)
            if error_match:
                tests_errors = int(error_match.group(1))
            
            skipped_match = re.search(r'(\d+)\s+skipped', summary)
            if skipped_match:
                tests_skipped = int(skipped_match.group(1))
            
            tests_run = tests_passed + tests_failed + tests_errors + tests_skipped
        
        # Fallback: count individual test results if summary not found
        if tests_run == 0:
            passed = stdout.count(" PASSED\n")
            failed = stdout.count(" FAILED\n")
            errors = stdout.count(" ERROR\n")
            skipped = stdout.count(" SKIPPED\n")
            tests_run = passed + failed + errors
            tests_passed = passed
            tests_failed = failed
            tests_errors = errors
            tests_skipped = skipped
        
        # Extract coverage percentage from summary
        coverage_percent = 0.0
        # Look for coverage summary: "TOTAL    100   20    80%"
        cov_match = re.search(r'TOTAL\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+)%', stdout)
        if cov_match:
            coverage_percent = float(cov_match.group(1))
        else:
            # Alternative format: "TOTAL    80%"
            cov_match = re.search(r'TOTAL.*?(\d+)%', stdout)
            if cov_match:
                coverage_percent = float(cov_match.group(1))
        
        return {
            "success": result.returncode == 0,
            "tests_run": tests_run,
            "tests_passed": tests_passed,
            "tests_failed": tests_failed,
            "tests_errors": tests_errors,
            "tests_skipped": tests_skipped,
            "execution_time": round(execution_time, 2),
            "coverage_percent": coverage_percent,
            "stdout": stdout,
            "stderr": stderr,
            "exit_code": result.returncode,
            "execution_mode": "modal",
            "language": "python"
        }