NullVoider commited on
Commit
3c9ed87
·
verified ·
1 Parent(s): f68b55d

Delete scripts

Browse files
scripts/entrypoint.sh DELETED
@@ -1,94 +0,0 @@
1
- #!/bin/bash
2
- set -e
3
-
4
- # --- Configuration ---
5
- OPENCORE_ISO="/vm/OpenCore.iso"
6
- SOURCE_IMG="/vm/source.qcow2"
7
- EPISODE_DISK="/run/episode.qcow2"
8
-
9
- # UEFI Firmware
10
- OVMF_CODE="/vm/OVMF_CODE.fd"
11
- OVMF_VARS_ORIG="/vm/OVMF_VARS.fd"
12
- OVMF_VARS_RUN="/run/OVMF_VARS.fd"
13
-
14
- # Apple OSK Key
15
- OSK="ourhardworkbythesewordsguardedpleasedontsteal(c)AppleComputerInc"
16
-
17
- echo "--- macOS 15 Standard Boot ---"
18
-
19
- # 1. Create Ephemeral Overlay (Reset State)
20
- echo "Creating ephemeral overlay..."
21
- qemu-img create -f qcow2 -b "$SOURCE_IMG" -F qcow2 "$EPISODE_DISK"
22
-
23
- # 2. Prepare UEFI Variables
24
- if [ ! -f "$OVMF_VARS_RUN" ]; then
25
- cp "$OVMF_VARS_ORIG" "$OVMF_VARS_RUN"
26
- fi
27
-
28
- # 3. Start Web Viewer
29
- echo "Starting web viewer..."
30
- websockify -D --web=/usr/share/novnc/ 8006 localhost:5900 &
31
-
32
- # 4. Automated Boot Menu Interaction
33
- cat <<EOF > /run/boot_robot.py
34
- import socket, time
35
- print("ROBOT: Looking for QEMU Monitor on port 4445...")
36
- while True:
37
- try:
38
- s = socket.socket()
39
- s.connect(("127.0.0.1", 4445))
40
- print("ROBOT: Connected! Waiting 2 seconds for BIOS splash to clear...")
41
- time.sleep(2)
42
-
43
- # Spam Enter for 20 seconds to guarantee selection
44
- for i in range(8):
45
- print(f"ROBOT: Sending Boot Command {i+1}/1...")
46
- try:
47
- s.send(b"sendkey ret\n")
48
- except:
49
- pass
50
- time.sleep(0.5)
51
-
52
- print("ROBOT: Boot sequence finished. Closing.")
53
- s.close()
54
- break
55
- except ConnectionRefusedError:
56
- # QEMU not ready yet, keep waiting
57
- time.sleep(1)
58
- except Exception as e:
59
- print(f"ROBOT: Error {e}")
60
- time.sleep(1)
61
- EOF
62
-
63
- python3 /run/boot_robot.py &
64
-
65
- echo "Booting macOS..."
66
-
67
- exec qemu-system-x86_64 \
68
- -enable-kvm \
69
- -m 24G \
70
- -smp 4,cores=4,threads=1,sockets=1 \
71
- -machine q35,accel=kvm \
72
- -no-reboot \
73
- -boot menu=on,splash-time=0 \
74
- -cpu Skylake-Client-noTSX-IBRS,vendor=GenuineIntel,+invtsc,+pcid,+ssse3,+sse4.2,+popcnt,+avx,+avx2,+aes,+fma,+bmi1,+bmi2,+xsave,+xsaveopt,check \
75
- -smbios type=2 \
76
- -device isa-applesmc,osk="$OSK" \
77
- -drive if=pflash,format=raw,readonly=on,file="$OVMF_CODE" \
78
- -drive if=pflash,format=raw,file="$OVMF_VARS_RUN" \
79
- -vga vmware \
80
- -device ide-hd,bus=ide.0,drive=OpenCoreDisk,bootindex=0 \
81
- -drive id=OpenCoreDisk,if=none,format=raw,file="$OPENCORE_ISO" \
82
- -device virtio-blk-pci,drive=SystemDisk,bootindex=1,serial=bootdisk \
83
- -drive id=SystemDisk,if=none,format=qcow2,file="$EPISODE_DISK",cache=writeback,aio=threads \
84
- -netdev user,id=net0,hostfwd=tcp::2222-:22,hostfwd=tcp::4000-:4000,hostfwd=tcp::9090-:9090 \
85
- -device vmxnet3,netdev=net0,id=net0,mac=52:54:00:c9:18:27 \
86
- -audiodev id=snd0,driver=none \
87
- -device ich9-intel-hda \
88
- -device hda-duplex,audiodev=snd0 \
89
- -vnc 0.0.0.0:0 \
90
- -device qemu-xhci,id=usb \
91
- -device usb-kbd,bus=usb.0 \
92
- -device usb-tablet,bus=usb.0 \
93
- -qmp tcp:0.0.0.0:4444,server,nowait \
94
- -monitor tcp:0.0.0.0:4445,server,nowait
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/mac15.yaml DELETED
@@ -1,31 +0,0 @@
1
- services:
2
- macos:
3
- image: dockurr/macos
4
- container_name: macos
5
- environment:
6
- VERSION: "15"
7
- DHCP: "N"
8
- DISK_SIZE: "4T"
9
- RAM_SIZE: "8G"
10
- CPU_CORES: "4"
11
- ARGUMENTS: "-device usb-tablet -qmp tcp:0.0.0.0:4444,server,nowait"
12
- devices:
13
- - /dev/kvm
14
- - /dev/net/tun
15
- - /dev/vhost-net
16
- device_cgroup_rules:
17
- - 'c *:* rwm'
18
- cap_add:
19
- - NET_ADMIN
20
- ports:
21
- - 8006:8006
22
- - 5900:5900/tcp
23
- - 5900:5900/udp
24
- - 4444:4444
25
- - 4000:4000
26
- - 9090:9090
27
- - 2222:22
28
- volumes:
29
- - ./macos:/storage
30
- restart: "no"
31
- stop_grace_period: 2m
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/setup-mac15.ps1 DELETED
@@ -1,84 +0,0 @@
1
- # =============================================================================
2
- # setup.mac15.ps1
3
- # One-command clone + automatic download of mac15 files into mac15-image/
4
- # =============================================================================
5
-
6
- $ErrorActionPreference = 'Stop' # Exit immediately if any command fails
7
-
8
- $GithubRepo = "https://github.com/nullvoider07/mac15-base"
9
- $RepoName = Split-Path $GithubRepo -Leaf
10
-
11
- Write-Host "🚀 Cloning GitHub repo: $GithubRepo"
12
-
13
- # ----------------------------- Clone with GitHub CLI -----------------------
14
- Write-Host "🔧 Checking GitHub CLI..."
15
-
16
- if (-not (Get-Command gh -ErrorAction SilentlyContinue)) {
17
- Write-Host " GitHub CLI not found. Installing via winget..."
18
-
19
- # Install gh silently, accepting agreements automatically
20
- winget install --id GitHub.cli --exact --accept-source-agreements --accept-package-agreements
21
-
22
- # Refresh the PATH variables in the current session so 'gh' is recognized immediately
23
- $env:Path = [System.Environment]::GetEnvironmentVariable("Path", "Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path", "User")
24
-
25
- # Verify installation succeeded
26
- if (-not (Get-Command gh -ErrorAction SilentlyContinue)) {
27
- throw "❌ Failed to install GitHub CLI automatically. Please install it manually from https://cli.github.com"
28
- }
29
- Write-Host " ✅ GitHub CLI installed successfully."
30
- } else {
31
- Write-Host " ✅ GitHub CLI already available."
32
- }
33
-
34
- gh repo clone $GithubRepo $RepoName -- --depth=1
35
-
36
- # ----------------------------- Create folder -------------------------------
37
- Write-Host "📁 Creating folder: mac15-image/"
38
- $ImagePath = Join-Path $RepoName "mac15-image"
39
- New-Item -ItemType Directory -Force -Path $ImagePath | Out-Null
40
-
41
- # ----------------------------- Ensure uv is available ---------------------
42
- Write-Host "🔧 Checking uv..."
43
-
44
- if (Get-Command uv -ErrorAction SilentlyContinue) {
45
- Write-Host " ✅ uv already available — skipping installation."
46
- } else {
47
- Write-Host " Installing uv package manager..."
48
- Invoke-RestMethod -Uri https://astral.sh/uv/install.ps1 | Invoke-Expression
49
- $env:Path += ";$HOME\.cargo\bin;$HOME\.local\bin"
50
- }
51
-
52
- # ----------------------------- Ephemeral venv for hf -----------------------
53
- Write-Host "🔧 Creating ephemeral venv for hf..."
54
-
55
- $TempDir = [System.IO.Path]::GetTempPath()
56
- $HfVenv = Join-Path $TempDir "hf-venv-$(New-Guid)"
57
-
58
- # uv venv picks the correct current Python automatically
59
- uv venv $HfVenv --quiet
60
-
61
- # Install huggingface_hub (lighter than transformers) directly into the venv
62
- uv pip install --python "$HfVenv\Scripts\python.exe" huggingface_hub --quiet
63
-
64
- Write-Host " ✅ hf installed in ephemeral venv."
65
-
66
- # ----------------------------- Download Files ------------------------------
67
- Write-Host "📥 Downloading base.dmg and mac15.qcow2 (large files) into $RepoName\mac15-image\ ..."
68
- Write-Host " (This may take a while — progress bar will show)"
69
-
70
- # Call the 'hf' command directly by its venv path for both files
71
- & "$HfVenv\Scripts\hf.exe" download NullVoider/mac15-base base.dmg --local-dir $ImagePath
72
- & "$HfVenv\Scripts\hf.exe" download NullVoider/mac15-base mac15.qcow2 --local-dir $ImagePath
73
-
74
- # ----------------------------- Cleanup venv --------------------------------
75
- Write-Host "🧹 Cleaning up ephemeral venv..."
76
- Remove-Item -Recurse -Force $HfVenv
77
-
78
- # ----------------------------- Final message -------------------------------
79
- Write-Host ""
80
- Write-Host "✅ SUCCESS!" -ForegroundColor Green
81
- Write-Host " Repository cloned → $RepoName\"
82
- Write-Host " Images ready at: $RepoName\mac15-image\"
83
- Write-Host ""
84
- Write-Host " Next time just run: cd $RepoName ; git pull"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/setup-mac15.sh DELETED
@@ -1,93 +0,0 @@
1
- #!/bin/bash
2
- # =============================================================================
3
- # setup-mac15.sh
4
- # One-command clone + automatic download of mac15.qcow2 into mac15-image/
5
- # =============================================================================
6
-
7
- set -e # Exit immediately if any command fails
8
-
9
- GITHUB_REPO="https://github.com/nullvoider07/mac15-base"
10
- REPO_NAME=$(basename "$GITHUB_REPO")
11
-
12
- echo "🚀 Cloning GitHub repo: $GITHUB_REPO"
13
-
14
- # ----------------------------- Clone with GitHub CLI -----------------------
15
- echo "🔧 Checking GitHub CLI..."
16
-
17
- if ! command -v gh >/dev/null 2>&1; then
18
- echo " GitHub CLI not found. Attempting to install..."
19
-
20
- # Try Homebrew (macOS/Linux)
21
- if command -v brew >/dev/null 2>&1; then
22
- brew install gh
23
- # Try APT (Debian/Ubuntu)
24
- elif command -v apt-get >/dev/null 2>&1; then
25
- curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg && \
26
- sudo chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg && \
27
- echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null && \
28
- sudo apt-get update && sudo apt-get install gh -y
29
- else
30
- echo "❌ Unsupported package manager. Please install GitHub CLI manually:"
31
- echo " https://cli.github.com"
32
- exit 1
33
- fi
34
- else
35
- echo " ✅ GitHub CLI already available."
36
- fi
37
-
38
- gh repo clone "$GITHUB_REPO" "$REPO_NAME" -- --depth=1
39
-
40
- # ----------------------------- Create folder -------------------------------
41
- echo "📁 Creating folder: mac15-image/"
42
- mkdir -p "$REPO_NAME/mac15-image"
43
-
44
- # ----------------------------- Ensure uv is available ---------------------
45
- echo "🔧 Checking uv..."
46
-
47
- if command -v uv >/dev/null 2>&1; then
48
- echo " uv already available — skipping installation."
49
- else
50
- echo " Installing uv package manager..."
51
- curl -LsSf https://astral.sh/uv/install.sh | sh
52
- export PATH="$HOME/.cargo/bin:$HOME/.local/bin:$PATH"
53
- hash -r
54
- fi
55
-
56
- # ----------------------------- Ephemeral venv for huggingface-cli ----------
57
- # Create a temporary venv, install huggingface-hub into it, run the download,
58
- # then delete the venv. This avoids all PATH/hash-cache/interpreter-mismatch
59
- # issues caused by stale system-wide or tool-level installs.
60
- echo "🔧 Creating ephemeral venv for huggingface-cli..."
61
-
62
- HF_VENV="$(mktemp -d)/hf-venv"
63
-
64
- # uv venv picks the correct current Python automatically
65
- uv venv "$HF_VENV" --quiet
66
-
67
- # Install directly into the venv — no --system, no activation needed
68
- uv pip install --python "$HF_VENV/bin/python" transformers --quiet
69
-
70
- echo " ✅ huggingface-hub installed in ephemeral venv."
71
-
72
- # ----------------------------- Download QCOW2 ------------------------------
73
- echo "📥 Downloading mac15.qcow2 (large file) into $REPO_NAME/mac15-image/ ..."
74
- echo " (This may take a while — progress bar will show)"
75
-
76
- # Call huggingface-cli directly by its venv path — no PATH lookup, no cache
77
- "$HF_VENV/bin/hf" download NullVoider/mac15-base base.dmg \
78
- --local-dir "$REPO_NAME/mac15-image"
79
-
80
- "$HF_VENV/bin/hf" download NullVoider/mac15-base mac15.qcow2 \
81
- --local-dir "$REPO_NAME/mac15-image"
82
-
83
- # ----------------------------- Cleanup venv --------------------------------
84
- echo "🧹 Cleaning up ephemeral venv..."
85
- rm -rf "$HF_VENV"
86
-
87
- # ----------------------------- Final message -------------------------------
88
- echo ""
89
- echo "✅ SUCCESS!"
90
- echo " Repository cloned → $REPO_NAME/"
91
- echo " QCOW2 image ready at: $REPO_NAME/mac15-image/mac15.qcow2"
92
- echo ""
93
- echo " Next time just run: cd $REPO_NAME && git pull"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/task_executor.py DELETED
@@ -1,716 +0,0 @@
1
- """
2
- task_executor.py — REST API task executor for the macOS AI Agent environment.
3
- """
4
-
5
- import difflib
6
- import logging
7
- import os
8
- import re
9
- import shutil
10
- import signal
11
- import subprocess
12
- import threading
13
- import time
14
- import uuid
15
- from http import HTTPStatus
16
-
17
- from flask import Flask, jsonify, request
18
- from waitress import serve
19
-
20
- # ---------------------------------------------------------------------------
21
- # Configuration
22
- # ---------------------------------------------------------------------------
23
-
24
- TASK_BASE_DIR = os.environ.get("TASK_BASE_DIR", "/Users/AgentUser/tasks")
25
- API_PORT = int(os.environ.get("API_PORT", "9090"))
26
- API_TOKEN = os.environ.get("API_TOKEN", "")
27
-
28
- # ---------------------------------------------------------------------------
29
- # Logging
30
- # ---------------------------------------------------------------------------
31
- os.makedirs(TASK_BASE_DIR, exist_ok=True)
32
- LOG_FILE = os.path.join(TASK_BASE_DIR, "task_executor.log")
33
-
34
- logging.basicConfig(
35
- level=logging.INFO,
36
- format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
37
- handlers=[
38
- logging.FileHandler(LOG_FILE, encoding="utf-8"),
39
- logging.StreamHandler(),
40
- ],
41
- )
42
- log = logging.getLogger("task_executor")
43
-
44
- # ---------------------------------------------------------------------------
45
- # In-memory task store
46
- # ---------------------------------------------------------------------------
47
- _tasks: dict[str, dict] = {}
48
- _tasks_lock = threading.Lock()
49
- _TASK_MAX_AGE = int(os.environ.get("TASK_MAX_AGE", "3600")) # 1 hour default
50
-
51
-
52
- def _evict_old_tasks() -> None:
53
- """Drop completed/failed tasks older than TASK_MAX_AGE seconds."""
54
- cutoff = time.monotonic() - _TASK_MAX_AGE
55
- with _tasks_lock:
56
- stale = [
57
- tid for tid, t in _tasks.items()
58
- if t["status"] not in ("pending", "running")
59
- and t.get("_created", 0) < cutoff
60
- ]
61
- for tid in stale:
62
- _tasks.pop(tid)
63
-
64
- app = Flask(__name__)
65
-
66
- def _check_auth() -> bool:
67
- """Return True if the request is authorised (or auth is disabled)."""
68
- if not API_TOKEN:
69
- return True # auth disabled if no token configured
70
- auth = request.headers.get("Authorization", "")
71
- return auth == f"Bearer {API_TOKEN}"
72
-
73
- # ===========================================================================
74
- # Custom exceptions
75
- # ===========================================================================
76
-
77
- class _TaskTimeoutError(RuntimeError):
78
- """Raised by _run() when a subprocess exceeds its allotted time."""
79
-
80
-
81
- # ===========================================================================
82
- # Subprocess helper
83
- # ===========================================================================
84
-
85
- def _run(
86
- command: list[str] | str,
87
- cwd: str | None = None,
88
- timeout: int = 120,
89
- shell: bool = False,
90
- ) -> tuple[int, str, str]:
91
- """
92
- Run a command in a new process group so the entire child tree can be
93
- killed on timeout via os.killpg + SIGKILL (POSIX).
94
-
95
- • list[str] → all internal commands (git clone/checkout/apply/diff).
96
- argv passed directly to execvp; no shell, no injection.
97
- • shell=True → user-supplied test_command and lint_command only.
98
-
99
- Raises _TaskTimeoutError on timeout.
100
- Returns (exit_code, stdout, stderr).
101
- """
102
- proc = subprocess.Popen(
103
- command,
104
- cwd=cwd,
105
- shell=shell,
106
- stdout=subprocess.PIPE,
107
- stderr=subprocess.PIPE,
108
- text=True,
109
- env=os.environ.copy(),
110
- start_new_session=True,
111
- )
112
- try:
113
- out, err = proc.communicate(timeout=timeout)
114
- return proc.returncode, out, err
115
- except subprocess.TimeoutExpired:
116
- try:
117
- os.killpg(os.getpgid(proc.pid), signal.SIGKILL)
118
- except ProcessLookupError:
119
- pass
120
- proc.wait()
121
- raise _TaskTimeoutError(f"Command timed out after {timeout}s")
122
-
123
-
124
- # ===========================================================================
125
- # Test result parsers
126
- # ===========================================================================
127
-
128
- def _parse_pytest(text: str) -> tuple[int, int]:
129
- """pytest: '5 passed, 2 failed, 1 error in 3.14s'"""
130
- passed, failed = 0, 0
131
- m = re.search(r"(\d+)\s+passed", text)
132
- if m:
133
- passed = int(m.group(1))
134
- m = re.search(r"(\d+)\s+failed", text)
135
- if m:
136
- failed = int(m.group(1))
137
- m = re.search(r"(\d+)\s+error", text)
138
- if m:
139
- failed += int(m.group(1))
140
- return passed, failed
141
-
142
-
143
- def _parse_cargo(text: str) -> tuple[int, int]:
144
- """cargo test: 'test result: ok. 5 passed; 0 failed; ...' — sums across binaries."""
145
- passed, failed = 0, 0
146
- for m in re.finditer(r"test result:.*?(\d+)\s+passed;\s*(\d+)\s+failed", text):
147
- passed += int(m.group(1))
148
- failed += int(m.group(2))
149
- return passed, failed
150
-
151
-
152
- def _parse_go(text: str) -> tuple[int, int]:
153
- """go test: '--- PASS/FAIL:' lines; falls back to package-level ok/FAIL."""
154
- passed = len(re.findall(r"^--- PASS:", text, re.MULTILINE))
155
- failed = len(re.findall(r"^--- FAIL:", text, re.MULTILINE))
156
- if passed == 0 and failed == 0:
157
- passed = len(re.findall(r"^ok\s+\S+", text, re.MULTILINE))
158
- failed = len(re.findall(r"^FAIL\s+\S+", text, re.MULTILINE))
159
- return passed, failed
160
-
161
-
162
- def _parse_jest(text: str) -> tuple[int, int]:
163
- """Jest: 'Tests: 2 failed, 5 passed, 7 total'"""
164
- passed, failed = 0, 0
165
- m = re.search(r"^Tests:\s+(.+)$", text, re.MULTILINE)
166
- if m:
167
- summary = m.group(1)
168
- p = re.search(r"(\d+)\s+passed", summary)
169
- f = re.search(r"(\d+)\s+failed", summary)
170
- if p:
171
- passed = int(p.group(1))
172
- if f:
173
- failed = int(f.group(1))
174
- return passed, failed
175
-
176
-
177
- def _parse_dotnet(text: str) -> tuple[int, int]:
178
- """dotnet test: 'Failed: 2, Passed: 3, Skipped: 0, Total: 5'"""
179
- passed, failed = 0, 0
180
- m = re.search(r"Failed:\s*(\d+),\s*Passed:\s*(\d+)", text)
181
- if m:
182
- failed = int(m.group(1))
183
- passed = int(m.group(2))
184
- return passed, failed
185
-
186
-
187
- def _parse_junit(text: str) -> tuple[int, int]:
188
- """Maven / Gradle / sbt: 'Tests run: 7, Failures: 2, Errors: 0, Skipped: 0'"""
189
- passed_total = failed_total = 0
190
- for m in re.finditer(
191
- r"Tests run:\s*(\d+),\s*Failures:\s*(\d+),\s*Errors:\s*(\d+)", text
192
- ):
193
- run = int(m.group(1))
194
- failures = int(m.group(2))
195
- errors = int(m.group(3))
196
- failed_total += failures + errors
197
- passed_total += max(run - failures - errors, 0)
198
- return passed_total, failed_total
199
-
200
-
201
- def _dispatch_test_parser(test_command: str, text: str) -> tuple[int, int]:
202
- """Route to the correct test parser; falls back to trying all."""
203
- cmd = test_command.lower()
204
- if "pytest" in cmd or "py.test" in cmd:
205
- return _parse_pytest(text)
206
- if "cargo" in cmd:
207
- return _parse_cargo(text)
208
- if "go test" in cmd:
209
- return _parse_go(text)
210
- if (
211
- "jest" in cmd
212
- or ("npm" in cmd and "test" in cmd)
213
- or ("yarn" in cmd and "test" in cmd)
214
- or ("pnpm" in cmd and "test" in cmd)
215
- ):
216
- return _parse_jest(text)
217
- if "dotnet" in cmd:
218
- return _parse_dotnet(text)
219
- if "mvn" in cmd or "gradle" in cmd or "sbt" in cmd or "junit" in cmd:
220
- return _parse_junit(text)
221
- for parser in (
222
- _parse_pytest, _parse_cargo, _parse_go,
223
- _parse_jest, _parse_dotnet, _parse_junit,
224
- ):
225
- p, f = parser(text)
226
- if p or f:
227
- return p, f
228
- return 0, 0
229
-
230
-
231
- # ===========================================================================
232
- # Lint error parser
233
- # ===========================================================================
234
-
235
- def _parse_lint_errors(lint_command: str, text: str, exit_code: int) -> int:
236
- """
237
- Extract an error count from linter output.
238
-
239
- Each branch targets the canonical output format of the most common CLI
240
- linters. The fallback counts lines that contain the word 'error'
241
- (case-insensitive), which covers the long tail of less common tools.
242
-
243
- Soft scoring only — the result is stored but never changes task status.
244
- """
245
- cmd = lint_command.lower()
246
-
247
- # ── ruff ────────────────────────────────────────────────────────────────
248
- # "Found 3 errors." on the final summary line.
249
- if "ruff" in cmd:
250
- m = re.search(r"Found\s+(\d+)\s+error", text)
251
- if m:
252
- return int(m.group(1))
253
- # ruff check --output-format json → array length
254
- if "--output-format json" in cmd or "-o json" in cmd:
255
- try:
256
- import json
257
- return len(json.loads(text))
258
- except Exception:
259
- pass
260
-
261
- # ── flake8 ──────────────────────────────────────────────────────────────
262
- # One error per output line: "path/to/file.py:10:1: E302 ..."
263
- if "flake8" in cmd:
264
- return len([l for l in text.splitlines() if re.match(r".+:\d+:\d+:\s+[EWF]", l)])
265
-
266
- # ── mypy ────────────────────────────────────────────────────────────────
267
- # "Found 4 errors in 2 files" on the summary line.
268
- if "mypy" in cmd:
269
- m = re.search(r"Found\s+(\d+)\s+error", text)
270
- if m:
271
- return int(m.group(1))
272
- return text.count(": error:")
273
-
274
- # ── pylint ──────────────────────────────────────────────────────────────
275
- # "Your code has been rated at X/10" — count E/F severity messages.
276
- if "pylint" in cmd:
277
- return len(re.findall(r"^\S+:\d+:\d+:\s+[EF]\d{4}:", text, re.MULTILINE))
278
-
279
- # ── cargo clippy ────────────────────────────────────────────────────────
280
- # "error[E...]" lines; distinguish from "error: aborting" summary lines.
281
- if "clippy" in cmd or ("cargo" in cmd and "check" in cmd):
282
- return len(re.findall(r"^error\[", text, re.MULTILINE))
283
-
284
- # ── eslint ──────────────────────────────────────────────────────────────
285
- # Summary line: "3 errors, 1 warning"
286
- # JSON mode: array of objects each with messages array filtered by severity=2.
287
- if "eslint" in cmd:
288
- if "--format json" in cmd or "-f json" in cmd:
289
- try:
290
- import json
291
- data = json.loads(text)
292
- return sum(
293
- sum(1 for msg in f.get("messages", []) if msg.get("severity") == 2)
294
- for f in data
295
- )
296
- except Exception:
297
- pass
298
- m = re.search(r"(\d+)\s+error", text)
299
- return int(m.group(1)) if m else 0
300
-
301
- # ── go vet / staticcheck ─────────────────────────────────────────────────
302
- # Each non-empty output line is a diagnostic.
303
- if "go vet" in cmd or "staticcheck" in cmd:
304
- return len([l for l in text.splitlines() if l.strip()])
305
-
306
- # ── clang-tidy / cppcheck ────────────────────────────────────────────────
307
- if "clang-tidy" in cmd or "cppcheck" in cmd:
308
- return len(re.findall(r"\berror\b", text, re.IGNORECASE))
309
-
310
- # ── dotnet build (lint-as-build) ─────────────────────────────────────────
311
- if "dotnet" in cmd and "build" in cmd:
312
- m = re.search(r"(\d+)\s+Error\(s\)", text)
313
- return int(m.group(1)) if m else 0
314
-
315
- # ── generic fallback ────────────────────────────────────────────────────
316
- # Non-zero exit + no recognised pattern → count 'error' occurrences.
317
- if exit_code != 0:
318
- return len(re.findall(r"\berror\b", text, re.IGNORECASE))
319
- return 0
320
-
321
-
322
- # ===========================================================================
323
- # Patch normaliser + similarity scorer
324
- # ===========================================================================
325
-
326
- def _normalise_patch(patch: str) -> list[str]:
327
- """
328
- Strip unified-diff metadata lines and return only the content lines
329
- (lines starting with +, -, or space) with leading +/- preserved.
330
-
331
- Lines stripped:
332
- diff --git ...
333
- index ...
334
- --- a/...
335
- +++ b/...
336
- @@ ... @@ (hunk headers)
337
-
338
- This lets difflib compare the actual code changes regardless of line
339
- numbers, file paths, or git object hashes — so minor reformatting of
340
- the patch header doesn't penalise an otherwise identical solution.
341
- """
342
- kept: list[str] = []
343
- for line in patch.splitlines():
344
- if (
345
- line.startswith("diff ")
346
- or line.startswith("index ")
347
- or line.startswith("--- ")
348
- or line.startswith("+++ ")
349
- or line.startswith("@@ ")
350
- ):
351
- continue
352
- kept.append(line)
353
- return kept
354
-
355
-
356
- def _patch_similarity(agent_patch: str, reference_patch: str) -> float:
357
- """
358
- Return a similarity ratio in [0.0, 1.0] between two unified diffs after
359
- normalisation. Uses difflib.SequenceMatcher (Ratcliff/Obershelp algorithm).
360
-
361
- 1.0 = identical changes after stripping metadata.
362
- 0.0 = completely different changes.
363
-
364
- This is an informational signal — the caller decides what threshold
365
- constitutes an acceptable match.
366
- """
367
- a = _normalise_patch(agent_patch)
368
- b = _normalise_patch(reference_patch)
369
- if not a and not b:
370
- return 1.0
371
- if not a or not b:
372
- return 0.0
373
- return difflib.SequenceMatcher(None, a, b).ratio()
374
-
375
-
376
- # ===========================================================================
377
- # Core task executor
378
- # ===========================================================================
379
-
380
- def _execute(
381
- task_id: str,
382
- repo_url: str,
383
- base_commit: str,
384
- patch: str,
385
- test_command: str,
386
- timeout: int,
387
- lint_command: str,
388
- capture_diff: bool,
389
- reference_patch: str,
390
- ) -> None:
391
- """
392
- Full task lifecycle:
393
-
394
- 1. Create isolated workspace
395
- 2. git clone
396
- 3. git checkout <base_commit>
397
- 4. git apply <agent patch> (if patch provided)
398
- 5. Run test_command → pass/fail counts
399
- 6. Run lint_command (if provided; soft score)
400
- 7. git diff <base_commit> (if capture_diff or reference_patch)
401
- 8. Compute patch_similarity (if reference_patch provided)
402
- 9. Single atomic _update (status + all signals)
403
- 10. shutil.rmtree cleanup
404
- """
405
- task_dir = os.path.join(TASK_BASE_DIR, task_id)
406
- repo_dir = os.path.join(task_dir, "repo")
407
- patch_file = os.path.join(task_dir, "task.patch")
408
-
409
- stdout_parts: list[str] = []
410
- stderr_parts: list[str] = []
411
- start = time.monotonic()
412
-
413
- # Failure sentinel — overwritten on success.
414
- final_update: dict = {
415
- "status": "failed",
416
- "exit_code": -1,
417
- "stdout": "",
418
- "stderr": "",
419
- "tests_passed": 0,
420
- "tests_failed": 0,
421
- "lint_errors": None,
422
- "lint_output": None,
423
- "patch_diff": None,
424
- "patch_similarity": None,
425
- "execution_time": 0.0,
426
- }
427
-
428
- def _update(**kw: object) -> None:
429
- with _tasks_lock:
430
- _tasks[task_id].update(kw)
431
-
432
- _update(status="running")
433
-
434
- try:
435
- os.makedirs(task_dir, exist_ok=True)
436
-
437
- # ── 1. Clone ───────────────────────────────────────────────────────
438
- rc, out, err = _run(["git", "clone", repo_url, repo_dir], timeout=120)
439
- stdout_parts.append(out); stderr_parts.append(err)
440
- if rc != 0:
441
- raise RuntimeError(f"git clone failed (rc={rc}): {err.strip()}")
442
-
443
- # ── 2. Checkout ────────────────────────────────────────────────────
444
- rc, out, err = _run(["git", "checkout", base_commit], cwd=repo_dir, timeout=60)
445
- stdout_parts.append(out); stderr_parts.append(err)
446
- if rc != 0:
447
- raise RuntimeError(f"git checkout failed (rc={rc}): {err.strip()}")
448
-
449
- # ── 3. Apply agent patch ───────────────────────────────────────────
450
- if patch and patch.strip():
451
- with open(patch_file, "w", encoding="utf-8") as fh:
452
- fh.write(patch)
453
- rc, out, err = _run(["git", "apply", patch_file], cwd=repo_dir, timeout=30)
454
- stdout_parts.append(out); stderr_parts.append(err)
455
- if rc != 0:
456
- raise RuntimeError(f"git apply failed (rc={rc}): {err.strip()}")
457
-
458
- # ── 4. Run tests ───────────────────────────────────────────────────
459
- rc, out, err = _run(test_command, cwd=repo_dir, timeout=timeout, shell=True)
460
- stdout_parts.append(out); stderr_parts.append(err)
461
- test_exit_code = rc
462
-
463
- combined_stdout = "\n".join(filter(None, stdout_parts))
464
- combined_stderr = "\n".join(filter(None, stderr_parts))
465
- passed, failed = _dispatch_test_parser(
466
- test_command, combined_stdout + "\n" + combined_stderr
467
- )
468
-
469
- # ── 5. Lint (soft score — never mutates status) ────────────────────
470
- lint_errors_count: int | None = None
471
- lint_out: str | None = None
472
-
473
- if lint_command and lint_command.strip():
474
- try:
475
- lint_rc, l_out, l_err = _run(
476
- lint_command, cwd=repo_dir, timeout=120, shell=True
477
- )
478
- lint_out = (l_out + "\n" + l_err).strip() or None
479
- lint_errors_count = _parse_lint_errors(
480
- lint_command, lint_out or "", lint_rc
481
- )
482
- log.info(
483
- "Task %s lint finished — rc=%d errors=%s",
484
- task_id, lint_rc, lint_errors_count,
485
- )
486
- except _TaskTimeoutError:
487
- lint_out = "Lint timed out after 120s"
488
- lint_errors_count = None
489
- log.warning("Task %s lint timed out", task_id)
490
- except Exception as exc:
491
- lint_out = f"Lint error: {exc}"
492
- lint_errors_count = None
493
- log.warning("Task %s lint exception: %s", task_id, exc)
494
-
495
- # ── 6. Capture git diff ────────────────────────────────────────────
496
- # Always run when reference_patch is provided (needed for similarity).
497
- # Also run when capture_diff=True (agent-produced code path).
498
- patch_diff_text: str | None = None
499
-
500
- if capture_diff or (reference_patch and reference_patch.strip()):
501
- try:
502
- _, diff_out, _ = _run(
503
- ["git", "diff", base_commit], cwd=repo_dir, timeout=30
504
- )
505
- patch_diff_text = diff_out.strip() or None
506
- except Exception as exc:
507
- log.warning("Task %s git diff failed: %s", task_id, exc)
508
-
509
- # ── 7. Reference patch similarity ─────────────────────────────────
510
- similarity: float | None = None
511
-
512
- if reference_patch and reference_patch.strip():
513
- try:
514
- agent_diff = patch_diff_text or (patch if patch and patch.strip() else "")
515
- if agent_diff:
516
- similarity = round(
517
- _patch_similarity(agent_diff, reference_patch), 4
518
- )
519
- log.info("Task %s patch_similarity=%.4f", task_id, similarity)
520
- except Exception as exc:
521
- log.warning("Task %s similarity computation failed: %s", task_id, exc)
522
-
523
- # ── 8. Build success record ────────────────────────────────────────
524
- final_update = {
525
- "status": "completed",
526
- "exit_code": test_exit_code,
527
- "stdout": combined_stdout,
528
- "stderr": combined_stderr,
529
- "tests_passed": passed,
530
- "tests_failed": failed,
531
- "lint_errors": lint_errors_count,
532
- "lint_output": lint_out,
533
- "patch_diff": patch_diff_text,
534
- "patch_similarity": similarity,
535
- "execution_time": round(time.monotonic() - start, 3),
536
- }
537
-
538
- except _TaskTimeoutError as exc:
539
- stderr_parts.append(str(exc))
540
- log.error("Task %s timed out after %ds", task_id, timeout)
541
- final_update.update({
542
- "stdout": "\n".join(filter(None, stdout_parts)),
543
- "stderr": "\n".join(filter(None, stderr_parts)),
544
- "execution_time": round(time.monotonic() - start, 3),
545
- })
546
-
547
- except Exception as exc:
548
- stderr_parts.append(str(exc))
549
- log.exception("Task %s failed: %s", task_id, exc)
550
- final_update.update({
551
- "stdout": "\n".join(filter(None, stdout_parts)),
552
- "stderr": "\n".join(filter(None, stderr_parts)),
553
- "execution_time": round(time.monotonic() - start, 3),
554
- })
555
-
556
- finally:
557
- _update(**final_update)
558
- try:
559
- shutil.rmtree(task_dir, ignore_errors=True)
560
- except Exception:
561
- pass
562
-
563
-
564
- # ===========================================================================
565
- # REST endpoints
566
- # ===========================================================================
567
-
568
- @app.route("/task/submit", methods=["POST"])
569
- def submit():
570
- """
571
- POST /task/submit
572
-
573
- Body (JSON):
574
- repo_url str required
575
- base_commit str optional (default: HEAD)
576
- patch str optional — agent's unified diff
577
- test_command str required — e.g. "python3 -m pytest tests/ -x"
578
- timeout int optional (default: 300)
579
- lint_command str optional — e.g. "ruff check . --output-format json"
580
- capture_diff bool optional (default: false)
581
- reference_patch str optional — ground-truth unified diff
582
-
583
- Returns 202: { "task_id": "<uuid>", "status": "pending" }
584
- """
585
- if not _check_auth():
586
- return jsonify(error="Unauthorized"), HTTPStatus.UNAUTHORIZED
587
-
588
- _evict_old_tasks()
589
-
590
- body = request.get_json(force=True, silent=True)
591
- if not body:
592
- return jsonify(error="Request body must be valid JSON"), HTTPStatus.BAD_REQUEST
593
-
594
- missing = [f for f in ("repo_url", "test_command") if not body.get(f)]
595
- if missing:
596
- return jsonify(error=f"Missing required fields: {missing}"), HTTPStatus.BAD_REQUEST
597
-
598
- task_id = str(uuid.uuid4())
599
- record: dict = {
600
- "task_id": task_id,
601
- "status": "pending",
602
- "_created": time.monotonic(),
603
- "repo_url": body["repo_url"],
604
- "base_commit": body.get("base_commit", "HEAD"),
605
- "test_command": body["test_command"],
606
- "timeout": int(body.get("timeout", 300)),
607
- "exit_code": None,
608
- "stdout": None,
609
- "stderr": None,
610
- "tests_passed": None,
611
- "tests_failed": None,
612
- "lint_errors": None,
613
- "lint_output": None,
614
- "patch_diff": None,
615
- "patch_similarity": None,
616
- "execution_time": None,
617
- }
618
-
619
- with _tasks_lock:
620
- _tasks[task_id] = record
621
-
622
- threading.Thread(
623
- target=_execute,
624
- args=(
625
- task_id,
626
- body["repo_url"],
627
- body.get("base_commit", "HEAD"),
628
- body.get("patch", ""),
629
- body["test_command"],
630
- int(body.get("timeout", 300)),
631
- body.get("lint_command", ""),
632
- bool(body.get("capture_diff", False)),
633
- body.get("reference_patch", ""),
634
- ),
635
- daemon=True,
636
- ).start()
637
-
638
- log.info("Task %s submitted — repo=%s", task_id, body["repo_url"])
639
- return jsonify(task_id=task_id, status="pending"), HTTPStatus.ACCEPTED
640
-
641
-
642
- @app.route("/task/<task_id>", methods=["GET"])
643
- def status(task_id: str):
644
- """
645
- GET /task/<task_id>
646
- Returns: { "task_id": "...", "status": "pending|running|completed|failed" }
647
- """
648
- if not _check_auth():
649
- return jsonify(error="Unauthorized"), HTTPStatus.UNAUTHORIZED
650
- with _tasks_lock:
651
- t = _tasks.get(task_id)
652
- if t is None:
653
- return jsonify(error="Task not found"), HTTPStatus.NOT_FOUND
654
- return jsonify(task_id=t["task_id"], status=t["status"])
655
-
656
-
657
- @app.route("/task/<task_id>/result", methods=["GET"])
658
- def result(task_id: str):
659
- """
660
- GET /task/<task_id>/result
661
-
662
- Returns 202 while running.
663
- Returns 200 with full record on completion:
664
- {
665
- "task_id": "...",
666
- "status": "completed|failed",
667
- "exit_code": 0,
668
- "stdout": "...",
669
- "stderr": "...",
670
- "tests_passed": 5,
671
- "tests_failed": 1,
672
- "lint_errors": 3,
673
- "lint_output": "...",
674
- "patch_diff": "...",
675
- "patch_similarity": 0.9412,
676
- "execution_time": 14.2
677
- }
678
- """
679
- if not _check_auth():
680
- return jsonify(error="Unauthorized"), HTTPStatus.UNAUTHORIZED
681
- with _tasks_lock:
682
- t = _tasks.get(task_id)
683
- if t is None:
684
- return jsonify(error="Task not found"), HTTPStatus.NOT_FOUND
685
- if t["status"] in ("pending", "running"):
686
- return jsonify(
687
- task_id=task_id,
688
- status=t["status"],
689
- message="Task not yet complete — poll again shortly",
690
- ), HTTPStatus.ACCEPTED
691
- return jsonify(t)
692
-
693
-
694
- @app.route("/task/<task_id>", methods=["DELETE"])
695
- def delete(task_id: str):
696
- """
697
- DELETE /task/<task_id>
698
- Returns: { "task_id": "...", "deleted": true }
699
- """
700
- if not _check_auth():
701
- return jsonify(error="Unauthorized"), HTTPStatus.UNAUTHORIZED
702
- with _tasks_lock:
703
- if task_id not in _tasks:
704
- return jsonify(error="Task not found"), HTTPStatus.NOT_FOUND
705
- _tasks.pop(task_id)
706
- log.info("Task %s deleted", task_id)
707
- return jsonify(task_id=task_id, deleted=True)
708
-
709
-
710
- # ===========================================================================
711
- # Entry point
712
- # ===========================================================================
713
-
714
- if __name__ == "__main__":
715
- log.info("Task executor starting on 0.0.0.0:%d", API_PORT)
716
- serve(app, host="0.0.0.0", port=API_PORT, threads=16)