Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

scripts/entrypoint.sh +66 -0
scripts/setup-win11.ps1 +83 -0
scripts/setup-win11.sh +90 -0
scripts/task_executor.py +604 -0
scripts/win11.yaml +30 -0

scripts/entrypoint.sh ADDED Viewed

	@@ -0,0 +1,66 @@

+#!/bin/bash
+set -e
+# --- Configuration ---
+SOURCE_IMG="/vm/source.qcow2"
+EPISODE_DISK="/run/episode.qcow2"
+VARS_TEMPLATE="/usr/share/OVMF/OVMF_VARS_4M.fd"
+OVMF_CODE="/usr/share/OVMF/OVMF_CODE_4M.fd"
+DATA_DIR="/run/storage"
+mkdir -p "$DATA_DIR"
+VARS_FILE="$DATA_DIR/OVMF_VARS.fd"
+export QEMU_AUDIO_DRV=none
+echo "--- Windows 11 Standard Boot ---"
+# 1. Create Ephemeral Overlay
+echo "Creating ephemeral overlay..."
+qemu-img create -f qcow2 -b "$SOURCE_IMG" -F qcow2 "$EPISODE_DISK"
+# 2. Prepare UEFI Variables
+if [ ! -f "$VARS_FILE" ]; then
+    echo "Initializing UEFI variables..."
+    cp "$VARS_TEMPLATE" "$VARS_FILE"
+fi
+# 2. Start TPM Emulator
+mkdir -p /run/tpm
+swtpm socket --tpmstate dir=/run/tpm --ctrl type=unixio,path=/run/tpm/swtpm-sock --tpm2 -d
+sleep 1
+# 3. Start Web Viewer
+echo "Starting web viewer..."
+websockify -D --web=/usr/share/novnc/ 8006 localhost:5900
+echo "Booting Windows 11..."
+# 5. Launch QEMU
+exec qemu-system-x86_64 \
+  -enable-kvm \
+  -m 8G \
+  -smp 4,cores=4,threads=1,sockets=1 \
+  -machine q35,accel=kvm \
+  -boot menu=on,splash-time=0 \
+  -cpu host,hv_relaxed,hv_spinlocks=0x1fff,hv_vapic,hv_time,+invtsc \
+  -device intel-hda -device hda-output,audiodev=nomix \
+  -audiodev id=nomix,driver=none \
+  -drive if=pflash,format=raw,readonly=on,file="$OVMF_CODE" \
+  -drive if=pflash,format=raw,file="$VARS_FILE" \
+  -chardev socket,id=chrtpm,path=/run/tpm/swtpm-sock \
+  -tpmdev emulator,id=tpm0,chardev=chrtpm \
+  -device tpm-tis,tpmdev=tpm0 \
+  -device virtio-balloon-pci,free-page-reporting=on,deflate-on-oom=on \
+  -vga std \
+  -object iothread,id=iothread0 \
+  -device virtio-scsi-pci,id=scsi0,iothread=iothread0,num_queues=4 \
+  -drive file="$EPISODE_DISK",format=qcow2,if=none,id=disk0,cache=writeback,aio=threads,discard=unmap,l2-cache-size=4M \
+  -device scsi-hd,drive=disk0,bootindex=1,rotation_rate=1 \
+  -netdev user,id=net0,hostfwd=tcp::3389-:3389,hostfwd=tcp::2222-:22,hostfwd=tcp::9090-:9090 \
+  -device virtio-net-pci,netdev=net0,id=net0,romfile="" \
+  -vnc 0.0.0.0:0 \
+  -usb \
+  -device usb-kbd \
+  -device usb-tablet \
+  -monitor tcp:0.0.0.0:4444,server,nowait \
+  -qmp tcp:0.0.0.0:4445,server,nowait

scripts/setup-win11.ps1 ADDED Viewed

	@@ -0,0 +1,83 @@

+# =============================================================================
+# setup-win11.ps1
+# One-command clone + automatic download of win11.qcow2 into win11-image/
+# =============================================================================
+$ErrorActionPreference = 'Stop' # Exit immediately if any command fails
+$GithubRepo = "https://github.com/nullvoider07/windows11-base"
+$RepoName = Split-Path $GithubRepo -Leaf
+Write-Host "🚀 Cloning GitHub repo: $GithubRepo"
+# ----------------------------- Clone with GitHub CLI -----------------------
+Write-Host "🔧 Checking GitHub CLI..."
+if (-not (Get-Command gh -ErrorAction SilentlyContinue)) {
+    Write-Host "   GitHub CLI not found. Installing via winget..."
+    # Install gh silently, accepting agreements automatically
+    winget install --id GitHub.cli --exact --accept-source-agreements --accept-package-agreements
+    # Refresh the PATH variables in the current session so 'gh' is recognized immediately
+    $env:Path = [System.Environment]::GetEnvironmentVariable("Path", "Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path", "User")
+    # Verify installation succeeded
+    if (-not (Get-Command gh -ErrorAction SilentlyContinue)) {
+        throw "❌ Failed to install GitHub CLI automatically. Please install it manually from https://cli.github.com"
+    }
+    Write-Host "   ✅ GitHub CLI installed successfully."
+} else {
+    Write-Host "   ✅ GitHub CLI already available."
+}
+gh repo clone $GithubRepo $RepoName -- --depth=1
+# ----------------------------- Create folder -------------------------------
+Write-Host "📁 Creating folder: win11-image/"
+$ImagePath = Join-Path $RepoName "win11-image"
+New-Item -ItemType Directory -Force -Path $ImagePath | Out-Null
+# ----------------------------- Ensure uv is available ---------------------
+Write-Host "🔧 Checking uv..."
+if (Get-Command uv -ErrorAction SilentlyContinue) {
+    Write-Host "   uv already available — skipping installation."
+} else {
+    Write-Host "   Installing uv package manager..."
+    Invoke-RestMethod -Uri https://astral.sh/uv/install.ps1 | Invoke-Expression
+    $env:Path += ";$HOME\.cargo\bin;$HOME\.local\bin"
+}
+# ----------------------------- Ephemeral venv for huggingface-cli ----------
+Write-Host "🔧 Creating ephemeral venv for huggingface-cli..."
+$TempDir = [System.IO.Path]::GetTempPath()
+$HfVenv = Join-Path $TempDir "hf-venv-$(New-Guid)"
+# uv venv picks the correct current Python automatically
+uv venv $HfVenv --quiet
+# Install directly into the venv — no --system, no activation needed
+uv pip install --python "$HfVenv\Scripts\python.exe" transformers --quiet
+Write-Host "   ✅ huggingface-hub installed in ephemeral venv."
+# ----------------------------- Download QCOW2 ------------------------------
+Write-Host "📥 Downloading win11.qcow2 (large file) into $RepoName\win11-image\ ..."
+Write-Host "    (This may take a while — progress bar will show)"
+# Call huggingface-cli directly by its venv path — no PATH lookup, no cache
+& "$HfVenv\Scripts\hf.exe" download NullVoider/windows11-base win11.qcow2 --local-dir $ImagePath
+# ----------------------------- Cleanup venv --------------------------------
+Write-Host "🧹 Cleaning up ephemeral venv..."
+Remove-Item -Recurse -Force $HfVenv
+# ----------------------------- Final message -------------------------------
+Write-Host ""
+Write-Host "✅ SUCCESS!" -ForegroundColor Green
+Write-Host "   Repository cloned → $RepoName\"
+Write-Host "   QCOW2 image ready at: $RepoName\win11-image\win11.qcow2"
+Write-Host ""
+Write-Host "   Next time just run: cd $RepoName ; git pull"

scripts/setup-win11.sh ADDED Viewed

	@@ -0,0 +1,90 @@

+#!/bin/bash
+# =============================================================================
+# setup-win11.sh
+# One-command clone + automatic download of win11.qcow2 into win11-image/
+# =============================================================================
+set -e  # Exit immediately if any command fails
+GITHUB_REPO="https://github.com/nullvoider07/windows11-base"
+REPO_NAME=$(basename "$GITHUB_REPO")
+echo "🚀 Cloning GitHub repo: $GITHUB_REPO"
+# ----------------------------- Clone with GitHub CLI -----------------------
+echo "🔧 Checking GitHub CLI..."
+if ! command -v gh >/dev/null 2>&1; then
+    echo "   GitHub CLI not found. Attempting to install..."
+    # Try Homebrew (macOS/Linux)
+    if command -v brew >/dev/null 2>&1; then
+        brew install gh
+    # Try APT (Debian/Ubuntu)
+    elif command -v apt-get >/dev/null 2>&1; then
+        curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg && \
+        sudo chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg && \
+        echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null && \
+        sudo apt-get update && sudo apt-get install gh -y
+    else
+        echo "❌ Unsupported package manager. Please install GitHub CLI manually:"
+        echo "   https://cli.github.com"
+        exit 1
+    fi
+else
+    echo "   ✅ GitHub CLI already available."
+fi
+gh repo clone "$GITHUB_REPO" "$REPO_NAME" -- --depth=1
+# ----------------------------- Create folder -------------------------------
+echo "📁 Creating folder: win11-image/"
+mkdir -p "$REPO_NAME/win11-image"
+# ----------------------------- Ensure uv is available ---------------------
+echo "🔧 Checking uv..."
+if command -v uv >/dev/null 2>&1; then
+    echo "   uv already available — skipping installation."
+else
+    echo "   Installing uv package manager..."
+    curl -LsSf https://astral.sh/uv/install.sh | sh
+    export PATH="$HOME/.cargo/bin:$HOME/.local/bin:$PATH"
+    hash -r
+fi
+# ----------------------------- Ephemeral venv for huggingface-cli ----------
+# Create a temporary venv, install huggingface-hub into it, run the download,
+# then delete the venv. This avoids all PATH/hash-cache/interpreter-mismatch
+# issues caused by stale system-wide or tool-level installs.
+echo "🔧 Creating ephemeral venv for huggingface-cli..."
+HF_VENV="$(mktemp -d)/hf-venv"
+# uv venv picks the correct current Python automatically
+uv venv "$HF_VENV" --quiet
+# Install directly into the venv — no --system, no activation needed
+uv pip install --python "$HF_VENV/bin/python" transformers --quiet
+echo "   ✅ huggingface-hub installed in ephemeral venv."
+# ----------------------------- Download QCOW2 ------------------------------
+echo "📥 Downloading win11.qcow2 (large file) into $REPO_NAME/win11-image/ ..."
+echo "    (This may take a while — progress bar will show)"
+# Call huggingface-cli directly by its venv path — no PATH lookup, no cache
+"$HF_VENV/bin/hf" download NullVoider/windows11-base win11.qcow2 \
+    --local-dir "$REPO_NAME/win11-image"
+# ----------------------------- Cleanup venv --------------------------------
+echo "🧹 Cleaning up ephemeral venv..."
+rm -rf "$HF_VENV"
+# ----------------------------- Final message -------------------------------
+echo ""
+echo "✅ SUCCESS!"
+echo "   Repository cloned → $REPO_NAME/"
+echo "   QCOW2 image ready at: $REPO_NAME/win11-image/win11.qcow2"
+echo ""
+echo "   Next time just run: cd $REPO_NAME && git pull"

scripts/task_executor.py ADDED Viewed

	@@ -0,0 +1,604 @@

+"""
+task_executor_windows.py — REST API task executor for the Windows AI Agent environment.
+"""
+import difflib
+import logging
+import os
+import re
+import shutil
+import subprocess
+import threading
+import time
+import uuid
+from http import HTTPStatus
+from flask import Flask, jsonify, request
+from waitress import serve
+# ---------------------------------------------------------------------------
+# Configuration
+# ---------------------------------------------------------------------------
+TASK_BASE_DIR = os.environ.get("TASK_BASE_DIR", r"C:\Users\AgentUser\tasks")
+API_PORT      = int(os.environ.get("API_PORT", "9090"))
+API_TOKEN     = os.environ.get("API_TOKEN", "")
+# ---------------------------------------------------------------------------
+# Logging  →  C:\Users\AgentUser\tasks\task_executor.log
+# ---------------------------------------------------------------------------
+os.makedirs(TASK_BASE_DIR, exist_ok=True)
+LOG_FILE = os.path.join(TASK_BASE_DIR, "task_executor.log")
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+    handlers=[
+        logging.FileHandler(LOG_FILE, encoding="utf-8"),
+        logging.StreamHandler(),
+    ],
+)
+log = logging.getLogger("task_executor")
+# ---------------------------------------------------------------------------
+# In-memory task store
+# ---------------------------------------------------------------------------
+_tasks: dict[str, dict] = {}
+_tasks_lock = threading.Lock()
+_TASK_MAX_AGE = int(os.environ.get("TASK_MAX_AGE", "3600"))  # 1 hour default
+def _evict_old_tasks() -> None:
+    """Drop completed/failed tasks older than TASK_MAX_AGE seconds."""
+    cutoff = time.monotonic() - _TASK_MAX_AGE
+    with _tasks_lock:
+        stale = [
+            tid for tid, t in _tasks.items()
+            if t["status"] not in ("pending", "running")
+            and t.get("_created", 0) < cutoff
+        ]
+        for tid in stale:
+            _tasks.pop(tid)
+app = Flask(__name__)
+def _check_auth() -> bool:
+    """Return True if the request is authorised (or auth is disabled)."""
+    if not API_TOKEN:
+        return True  # auth disabled if no token configured
+    auth = request.headers.get("Authorization", "")
+    return auth == f"Bearer {API_TOKEN}"
+# ===========================================================================
+# Custom exceptions
+# ===========================================================================
+class _TaskTimeoutError(RuntimeError):
+    """Raised by _run() when a subprocess exceeds its allotted time."""
+# ===========================================================================
+# Subprocess helper
+# ===========================================================================
+def _run(
+    command: list[str] | str,
+    cwd: str | None = None,
+    timeout: int = 120,
+    shell: bool = False,
+) -> tuple[int, str, str]:
+    """
+    Run a command on Windows with a new process group so the entire child
+    tree can be killed on timeout via taskkill /F /T /PID.
+    • list[str]  →  all internal commands (git clone/checkout/apply/diff).
+                    argv passed directly; no shell, no injection.
+    • shell=True →  user-supplied test_command and lint_command only.
+    Windows-specific notes:
+      • CREATE_NEW_PROCESS_GROUP isolates the child in its own process group.
+        start_new_session and creationflags are mutually exclusive on Windows;
+        we use creationflags exclusively here.
+      • taskkill /F /T /PID forcefully terminates the process tree — the
+        Windows equivalent of POSIX os.killpg(SIGKILL).
+    Raises _TaskTimeoutError on timeout.
+    Returns (exit_code, stdout, stderr).
+    """
+    proc = subprocess.Popen(
+        command,
+        cwd=cwd,
+        shell=shell,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        env=os.environ.copy(),
+        creationflags=subprocess.CREATE_NEW_PROCESS_GROUP,
+    )
+    try:
+        out, err = proc.communicate(timeout=timeout)
+        return proc.returncode, out, err
+    except subprocess.TimeoutExpired:
+        subprocess.run(
+            ["taskkill", "/F", "/T", "/PID", str(proc.pid)],
+            capture_output=True,
+        )
+        proc.wait()
+        raise _TaskTimeoutError(f"Command timed out after {timeout}s")
+# ===========================================================================
+# Test result parsers
+# ===========================================================================
+def _parse_pytest(text: str) -> tuple[int, int]:
+    passed, failed = 0, 0
+    m = re.search(r"(\d+)\s+passed", text)
+    if m:
+        passed = int(m.group(1))
+    m = re.search(r"(\d+)\s+failed", text)
+    if m:
+        failed = int(m.group(1))
+    m = re.search(r"(\d+)\s+error", text)
+    if m:
+        failed += int(m.group(1))
+    return passed, failed
+def _parse_cargo(text: str) -> tuple[int, int]:
+    passed, failed = 0, 0
+    for m in re.finditer(r"test result:.*?(\d+)\s+passed;\s*(\d+)\s+failed", text):
+        passed += int(m.group(1))
+        failed += int(m.group(2))
+    return passed, failed
+def _parse_go(text: str) -> tuple[int, int]:
+    passed = len(re.findall(r"^--- PASS:", text, re.MULTILINE))
+    failed = len(re.findall(r"^--- FAIL:", text, re.MULTILINE))
+    if passed == 0 and failed == 0:
+        passed = len(re.findall(r"^ok\s+\S+",   text, re.MULTILINE))
+        failed = len(re.findall(r"^FAIL\s+\S+", text, re.MULTILINE))
+    return passed, failed
+def _parse_jest(text: str) -> tuple[int, int]:
+    passed, failed = 0, 0
+    m = re.search(r"^Tests:\s+(.+)$", text, re.MULTILINE)
+    if m:
+        summary = m.group(1)
+        p = re.search(r"(\d+)\s+passed", summary)
+        f = re.search(r"(\d+)\s+failed", summary)
+        if p:
+            passed = int(p.group(1))
+        if f:
+            failed = int(f.group(1))
+    return passed, failed
+def _parse_dotnet(text: str) -> tuple[int, int]:
+    passed, failed = 0, 0
+    m = re.search(r"Failed:\s*(\d+),\s*Passed:\s*(\d+)", text)
+    if m:
+        failed = int(m.group(1))
+        passed = int(m.group(2))
+    return passed, failed
+def _parse_junit(text: str) -> tuple[int, int]:
+    passed_total = failed_total = 0
+    for m in re.finditer(
+        r"Tests run:\s*(\d+),\s*Failures:\s*(\d+),\s*Errors:\s*(\d+)", text
+    ):
+        run      = int(m.group(1))
+        failures = int(m.group(2))
+        errors   = int(m.group(3))
+        failed_total += failures + errors
+        passed_total += max(run - failures - errors, 0)
+    return passed_total, failed_total
+def _dispatch_test_parser(test_command: str, text: str) -> tuple[int, int]:
+    cmd = test_command.lower()
+    if "pytest" in cmd or "py.test" in cmd:
+        return _parse_pytest(text)
+    if "cargo" in cmd:
+        return _parse_cargo(text)
+    if "go test" in cmd:
+        return _parse_go(text)
+    if (
+        "jest" in cmd
+        or ("npm" in cmd and "test" in cmd)
+        or ("yarn" in cmd and "test" in cmd)
+        or ("pnpm" in cmd and "test" in cmd)
+    ):
+        return _parse_jest(text)
+    if "dotnet" in cmd:
+        return _parse_dotnet(text)
+    if "mvn" in cmd or "gradle" in cmd or "sbt" in cmd or "junit" in cmd:
+        return _parse_junit(text)
+    for parser in (
+        _parse_pytest, _parse_cargo, _parse_go,
+        _parse_jest, _parse_dotnet, _parse_junit,
+    ):
+        p, f = parser(text)
+        if p or f:
+            return p, f
+    return 0, 0
+# ===========================================================================
+# Lint error parser
+# ===========================================================================
+def _parse_lint_errors(lint_command: str, text: str, exit_code: int) -> int:
+    """
+    Extract an error count from linter output.
+    Soft scoring only — never changes task status.
+    """
+    cmd = lint_command.lower()
+    if "ruff" in cmd:
+        m = re.search(r"Found\s+(\d+)\s+error", text)
+        if m:
+            return int(m.group(1))
+        if "--output-format json" in cmd or "-o json" in cmd:
+            try:
+                import json
+                return len(json.loads(text))
+            except Exception:
+                pass
+    if "flake8" in cmd:
+        return len([l for l in text.splitlines() if re.match(r".+:\d+:\d+:\s+[EWF]", l)])
+    if "mypy" in cmd:
+        m = re.search(r"Found\s+(\d+)\s+error", text)
+        if m:
+            return int(m.group(1))
+        return text.count(": error:")
+    if "pylint" in cmd:
+        return len(re.findall(r"^\S+:\d+:\d+:\s+[EF]\d{4}:", text, re.MULTILINE))
+    if "clippy" in cmd or ("cargo" in cmd and "check" in cmd):
+        return len(re.findall(r"^error\[", text, re.MULTILINE))
+    if "eslint" in cmd:
+        if "--format json" in cmd or "-f json" in cmd:
+            try:
+                import json
+                data = json.loads(text)
+                return sum(
+                    sum(1 for msg in f.get("messages", []) if msg.get("severity") == 2)
+                    for f in data
+                )
+            except Exception:
+                pass
+        m = re.search(r"(\d+)\s+error", text)
+        return int(m.group(1)) if m else 0
+    if "go vet" in cmd or "staticcheck" in cmd:
+        return len([l for l in text.splitlines() if l.strip()])
+    if "clang-tidy" in cmd or "cppcheck" in cmd:
+        return len(re.findall(r"\berror\b", text, re.IGNORECASE))
+    if "dotnet" in cmd and "build" in cmd:
+        m = re.search(r"(\d+)\s+Error\(s\)", text)
+        return int(m.group(1)) if m else 0
+    if exit_code != 0:
+        return len(re.findall(r"\berror\b", text, re.IGNORECASE))
+    return 0
+# ===========================================================================
+# Patch normaliser + similarity scorer
+# ===========================================================================
+def _normalise_patch(patch: str) -> list[str]:
+    kept: list[str] = []
+    for line in patch.splitlines():
+        if (
+            line.startswith("diff ")
+            or line.startswith("index ")
+            or line.startswith("--- ")
+            or line.startswith("+++ ")
+            or line.startswith("@@ ")
+        ):
+            continue
+        kept.append(line)
+    return kept
+def _patch_similarity(agent_patch: str, reference_patch: str) -> float:
+    a = _normalise_patch(agent_patch)
+    b = _normalise_patch(reference_patch)
+    if not a and not b:
+        return 1.0
+    if not a or not b:
+        return 0.0
+    return difflib.SequenceMatcher(None, a, b).ratio()
+# ===========================================================================
+# Core task executor
+# ===========================================================================
+def _execute(
+    task_id:          str,
+    repo_url:         str,
+    base_commit:      str,
+    patch:            str,
+    test_command:     str,
+    timeout:          int,
+    lint_command:     str,
+    capture_diff:     bool,
+    reference_patch:  str,
+) -> None:
+    task_dir   = os.path.join(TASK_BASE_DIR, task_id)
+    repo_dir   = os.path.join(task_dir, "repo")
+    patch_file = os.path.join(task_dir, "task.patch")
+    stdout_parts: list[str] = []
+    stderr_parts: list[str] = []
+    start = time.monotonic()
+    final_update: dict = {
+        "status":           "failed",
+        "exit_code":        -1,
+        "stdout":           "",
+        "stderr":           "",
+        "tests_passed":     0,
+        "tests_failed":     0,
+        "lint_errors":      None,
+        "lint_output":      None,
+        "patch_diff":       None,
+        "patch_similarity": None,
+        "execution_time":   0.0,
+    }
+    def _update(**kw: object) -> None:
+        with _tasks_lock:
+            _tasks[task_id].update(kw)
+    _update(status="running")
+    try:
+        os.makedirs(task_dir, exist_ok=True)
+        rc, out, err = _run(["git", "clone", repo_url, repo_dir], timeout=120)
+        stdout_parts.append(out); stderr_parts.append(err)
+        if rc != 0:
+            raise RuntimeError(f"git clone failed (rc={rc}): {err.strip()}")
+        rc, out, err = _run(["git", "checkout", base_commit], cwd=repo_dir, timeout=60)
+        stdout_parts.append(out); stderr_parts.append(err)
+        if rc != 0:
+            raise RuntimeError(f"git checkout failed (rc={rc}): {err.strip()}")
+        if patch and patch.strip():
+            with open(patch_file, "w", encoding="utf-8") as fh:
+                fh.write(patch)
+            rc, out, err = _run(["git", "apply", patch_file], cwd=repo_dir, timeout=30)
+            stdout_parts.append(out); stderr_parts.append(err)
+            if rc != 0:
+                raise RuntimeError(f"git apply failed (rc={rc}): {err.strip()}")
+        rc, out, err = _run(test_command, cwd=repo_dir, timeout=timeout, shell=True)
+        stdout_parts.append(out); stderr_parts.append(err)
+        test_exit_code = rc
+        combined_stdout = "\n".join(filter(None, stdout_parts))
+        combined_stderr = "\n".join(filter(None, stderr_parts))
+        passed, failed  = _dispatch_test_parser(
+            test_command, combined_stdout + "\n" + combined_stderr
+        )
+        lint_errors_count: int | None = None
+        lint_out:          str | None = None
+        if lint_command and lint_command.strip():
+            try:
+                lint_rc, l_out, l_err = _run(
+                    lint_command, cwd=repo_dir, timeout=120, shell=True
+                )
+                lint_out          = (l_out + "\n" + l_err).strip() or None
+                lint_errors_count = _parse_lint_errors(
+                    lint_command, lint_out or "", lint_rc
+                )
+                log.info("Task %s lint finished — rc=%d errors=%s",
+                         task_id, lint_rc, lint_errors_count)
+            except _TaskTimeoutError:
+                lint_out          = "Lint timed out after 120s"
+                lint_errors_count = None
+                log.warning("Task %s lint timed out", task_id)
+            except Exception as exc:
+                lint_out          = f"Lint error: {exc}"
+                lint_errors_count = None
+                log.warning("Task %s lint exception: %s", task_id, exc)
+        patch_diff_text: str | None = None
+        if capture_diff or (reference_patch and reference_patch.strip()):
+            try:
+                _, diff_out, _ = _run(
+                    ["git", "diff", base_commit], cwd=repo_dir, timeout=30
+                )
+                patch_diff_text = diff_out.strip() or None
+            except Exception as exc:
+                log.warning("Task %s git diff failed: %s", task_id, exc)
+        similarity: float | None = None
+        if reference_patch and reference_patch.strip():
+            try:
+                agent_diff = patch_diff_text or (patch if patch and patch.strip() else "")
+                if agent_diff:
+                    similarity = round(_patch_similarity(agent_diff, reference_patch), 4)
+                    log.info("Task %s patch_similarity=%.4f", task_id, similarity)
+            except Exception as exc:
+                log.warning("Task %s similarity computation failed: %s", task_id, exc)
+        final_update = {
+            "status":           "completed",
+            "exit_code":        test_exit_code,
+            "stdout":           combined_stdout,
+            "stderr":           combined_stderr,
+            "tests_passed":     passed,
+            "tests_failed":     failed,
+            "lint_errors":      lint_errors_count,
+            "lint_output":      lint_out,
+            "patch_diff":       patch_diff_text,
+            "patch_similarity": similarity,
+            "execution_time":   round(time.monotonic() - start, 3),
+        }
+    except _TaskTimeoutError as exc:
+        stderr_parts.append(str(exc))
+        log.error("Task %s timed out after %ds", task_id, timeout)
+        final_update.update({
+            "stdout":         "\n".join(filter(None, stdout_parts)),
+            "stderr":         "\n".join(filter(None, stderr_parts)),
+            "execution_time": round(time.monotonic() - start, 3),
+        })
+    except Exception as exc:
+        stderr_parts.append(str(exc))
+        log.exception("Task %s failed: %s", task_id, exc)
+        final_update.update({
+            "stdout":         "\n".join(filter(None, stdout_parts)),
+            "stderr":         "\n".join(filter(None, stderr_parts)),
+            "execution_time": round(time.monotonic() - start, 3),
+        })
+    finally:
+        _update(**final_update)
+        try:
+            shutil.rmtree(task_dir, ignore_errors=True)
+        except Exception:
+            pass
+# ===========================================================================
+# REST endpoints
+# ===========================================================================
+@app.route("/task/submit", methods=["POST"])
+def submit():
+    """
+    POST /task/submit
+    Body (JSON):
+        repo_url          str   required
+        base_commit       str   optional  (default: HEAD)
+        patch             str   optional
+        test_command      str   required
+        timeout           int   optional  (default: 300)
+        lint_command      str   optional
+        capture_diff      bool  optional  (default: false)
+        reference_patch   str   optional
+    Returns 202: { "task_id": "<uuid>", "status": "pending" }
+    """
+    if not _check_auth():
+        return jsonify(error="Unauthorized"), HTTPStatus.UNAUTHORIZED
+    _evict_old_tasks()
+    body = request.get_json(force=True, silent=True)
+    if not body:
+        return jsonify(error="Request body must be valid JSON"), HTTPStatus.BAD_REQUEST
+    missing = [f for f in ("repo_url", "test_command") if not body.get(f)]
+    if missing:
+        return jsonify(error=f"Missing required fields: {missing}"), HTTPStatus.BAD_REQUEST
+    task_id = str(uuid.uuid4())
+    record: dict = {
+        "task_id":          task_id,
+        "status":           "pending",
+        "_created":         time.monotonic(),
+        "repo_url":         body["repo_url"],
+        "base_commit":      body.get("base_commit", "HEAD"),
+        "test_command":     body["test_command"],
+        "timeout":          int(body.get("timeout", 300)),
+        "exit_code":        None,
+        "stdout":           None,
+        "stderr":           None,
+        "tests_passed":     None,
+        "tests_failed":     None,
+        "lint_errors":      None,
+        "lint_output":      None,
+        "patch_diff":       None,
+        "patch_similarity": None,
+        "execution_time":   None,
+    }
+    with _tasks_lock:
+        _tasks[task_id] = record
+    threading.Thread(
+        target=_execute,
+        args=(
+            task_id,
+            body["repo_url"],
+            body.get("base_commit", "HEAD"),
+            body.get("patch", ""),
+            body["test_command"],
+            int(body.get("timeout", 300)),
+            body.get("lint_command", ""),
+            bool(body.get("capture_diff", False)),
+            body.get("reference_patch", ""),
+        ),
+        daemon=True,
+    ).start()
+    log.info("Task %s submitted — repo=%s", task_id, body["repo_url"])
+    return jsonify(task_id=task_id, status="pending"), HTTPStatus.ACCEPTED
+@app.route("/task/<task_id>", methods=["GET"])
+def status(task_id: str):
+    if not _check_auth():
+        return jsonify(error="Unauthorized"), HTTPStatus.UNAUTHORIZED
+    with _tasks_lock:
+        t = _tasks.get(task_id)
+    if t is None:
+        return jsonify(error="Task not found"), HTTPStatus.NOT_FOUND
+    return jsonify(task_id=t["task_id"], status=t["status"])
+@app.route("/task/<task_id>/result", methods=["GET"])
+def result(task_id: str):
+    if not _check_auth():
+        return jsonify(error="Unauthorized"), HTTPStatus.UNAUTHORIZED
+    with _tasks_lock:
+        t = _tasks.get(task_id)
+    if t is None:
+        return jsonify(error="Task not found"), HTTPStatus.NOT_FOUND
+    if t["status"] in ("pending", "running"):
+        return jsonify(
+            task_id=task_id,
+            status=t["status"],
+            message="Task not yet complete — poll again shortly",
+        ), HTTPStatus.ACCEPTED
+    return jsonify(t)
+@app.route("/task/<task_id>", methods=["DELETE"])
+def delete(task_id: str):
+    if not _check_auth():
+        return jsonify(error="Unauthorized"), HTTPStatus.UNAUTHORIZED
+    with _tasks_lock:
+        if task_id not in _tasks:
+            return jsonify(error="Task not found"), HTTPStatus.NOT_FOUND
+        _tasks.pop(task_id)
+    log.info("Task %s deleted", task_id)
+    return jsonify(task_id=task_id, deleted=True)
+if __name__ == "__main__":
+    log.info("Task executor starting on 0.0.0.0:%d", API_PORT)
+    serve(app, host="0.0.0.0", port=API_PORT, threads=16)

scripts/win11.yaml ADDED Viewed

	@@ -0,0 +1,30 @@

+services:
+  windows:
+    image: dockurr/windows
+    container_name: windows
+    environment:
+      VERSION: "11"
+      DISK_SIZE: "2T"
+      RAM_SIZE: "8G"
+      CPU_CORES: "4"
+      USERNAME: "AgentUser"
+      PASSWORD: "AgentPassword1"
+      TIMEZONE: "UTC"
+      REGION: "en-US"
+      KEYBOARD: "en-US"
+      HOSTNAME: "Workspace"
+    devices:
+      - /dev/kvm
+      - /dev/net/tun
+    cap_add:
+      - NET_ADMIN
+    ports:
+      - 8006:8006
+      - 3389:3389/tcp
+      - 3389:3389/udp
+      - 4444:4444
+      - 2222:22
+    volumes:
+      - ./windows11-storage:/storage
+    restart: "no"
+    stop_grace_period: 2m