HuggingRun

Sleeping

tao-shen Claude Opus 4.6 commited on 15 days ago

Commit

3cf6b15

1 Parent(s): 39e2a50

feat: SSH access + stress test for Ubuntu desktop; always start sshd

- Dockerfile: pre-generate SSH host key at build time
- start-desktop.sh: sshd always starts (key-auth or password-less);
configurable SSH_LISTEN (0.0.0.0 for local, 127.0.0.1 for HF)
- monitor_and_test.py: add --ssh-test with connect, command exec,
stress (concurrent sessions), and brute-force ramp-up tests
- scripts/test_local.sh: full local Docker integration test
- scripts/verify_overnight.sh: multi-round overnight verification

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (6) hide show

Dockerfile +5 -0
Dockerfile.ubuntu-desktop +59 -0
scripts/monitor_and_test.py +171 -5
scripts/test_local.sh +143 -0
scripts/verify_overnight.sh +38 -0
ubuntu-desktop/start-desktop.sh +31 -8

Dockerfile CHANGED Viewed

@@ -34,6 +34,11 @@ RUN (useradd -m -u 1000 user 2>/dev/null) || \
 ENV HOME=/home/user
 RUN mkdir -p /data && chown 1000:1000 /data
 # HuggingRun scripts (build context = repo root)
 COPY scripts /scripts
 COPY ubuntu-desktop/start-desktop.sh /opt/start-desktop.sh

 ENV HOME=/home/user
 RUN mkdir -p /data && chown 1000:1000 /data
+# Pre-generate SSH host key so sshd can start without root
+RUN mkdir -p /home/user/.ssh && \
+    ssh-keygen -t ed25519 -f /home/user/.ssh/ssh_host_ed25519_key -N "" -C "" && \
+    chown -R 1000:1000 /home/user/.ssh
 # HuggingRun scripts (build context = repo root)
 COPY scripts /scripts
 COPY ubuntu-desktop/start-desktop.sh /opt/start-desktop.sh

Dockerfile.ubuntu-desktop ADDED Viewed

	@@ -0,0 +1,59 @@

+# Ubuntu 24.04 Desktop on HuggingRun — noVNC on 7860, SSH on 2222, persistence via /data
+FROM ubuntu:24.04
+ENV DEBIAN_FRONTEND=noninteractive
+# System + Python (for sync)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ca-certificates curl python3 python3-pip python3-venv \
+    && pip3 install --no-cache-dir --break-system-packages huggingface_hub \
+    && rm -rf /var/lib/apt/lists/*
+# Desktop stack: Xvfb, XFCE, dbus, x11vnc, Firefox; OpenSSH for local/reverse SSH
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    xvfb \
+    xfce4 xfce4-goodies \
+    dbus-x11 \
+    x11vnc \
+    firefox \
+    procps \
+    openssh-server openssh-client \
+    && rm -rf /var/lib/apt/lists/*
+# noVNC (web client on 7860)
+RUN apt-get update && apt-get install -y --no-install-recommends git \
+    && git clone --depth 1 https://github.com/novnc/noVNC.git /opt/noVNC \
+    && git clone --depth 1 https://github.com/novnc/websockify /opt/noVNC/utils/websockify \
+    && rm -rf /var/lib/apt/lists/* /opt/noVNC/.git
+# HF Spaces run as user 1000; UID 1000 may exist (e.g. ubuntu)
+RUN (useradd -m -u 1000 user 2>/dev/null) || \
+    (EXISTING=$(getent passwd 1000 | cut -d: -f1); \
+     usermod -l user $EXISTING; usermod -d /home/user user; \
+     mkdir -p /home/user && chown 1000:1000 /home/user)
+ENV HOME=/home/user
+RUN mkdir -p /data && chown user:user /data
+# Pre-generate SSH host key so sshd can start without root
+RUN mkdir -p /home/user/.ssh && \
+    ssh-keygen -t ed25519 -f /home/user/.ssh/ssh_host_ed25519_key -N "" -C "" && \
+    chown -R 1000:1000 /home/user/.ssh
+# HuggingRun scripts (build context = repo root)
+COPY scripts /scripts
+COPY ubuntu-desktop/start-desktop.sh /opt/start-desktop.sh
+RUN chmod +x /scripts/entrypoint.sh /opt/start-desktop.sh
+ENV PERSIST_PATH=/data
+ENV RUN_CMD="/opt/start-desktop.sh"
+ENV DESKTOP_HOME=/data/desktop-home
+ENV DISPLAY=:99
+ENV VNC_PORT=5901
+ENV NOVNC_PORT=7860
+# SSH_LISTEN: 0.0.0.0 for local Docker testing, 127.0.0.1 for HF (reverse SSH only)
+ENV SSH_LISTEN=0.0.0.0
+ENV SSH_PORT=2222
+USER user
+EXPOSE 7860 2222
+ENTRYPOINT ["/scripts/entrypoint.sh"]

scripts/monitor_and_test.py CHANGED Viewed

@@ -5,11 +5,13 @@ HuggingRun: 监控远端 Space 状态并执行基础/压力/持久化验证（
 用法:
   python3 scripts/monitor_and_test.py --test
-  HF_TOKEN=xxx python3 scripts/monitor_and_test.py --until-ok --url https://xxx.hf.space --expect noVNC  # 轮询 API 直到 RUNNING 再测，失败打日志尾
-  HF_TOKEN=xxx python3 scripts/monitor_and_test.py --wait-running --test
-  HF_TOKEN=xxx python3 scripts/monitor_and_test.py --logs run   # 流式拉取运行日志 (SSE)
-  HF_TOKEN=xxx python3 scripts/monitor_and_test.py --logs build # 流式拉取构建日志 (SSE)
-等价 curl:
   curl -N -H "Authorization: Bearer $HF_TOKEN" "https://huggingface.co/api/spaces/<SPACE_ID>/logs/run"
   curl -N -H "Authorization: Bearer $HF_TOKEN" "https://huggingface.co/api/spaces/<SPACE_ID>/logs/build"
 """
@@ -179,6 +181,95 @@ def test_persistence(url, rounds=3):
     return ok_rounds == rounds
 def _curl_logs_url(space_id: str, log_type: str) -> str:
     """Build the logs API URL (same as user's curl command)."""
     return f"https://huggingface.co/api/spaces/{space_id}/logs/{log_type}"
@@ -261,6 +352,18 @@ def main():
                    help="Poll URL until 200 and body contains one of --expect (no HF_TOKEN needed)")
     p.add_argument("--until-ok", action="store_true",
                    help="Poll API until RUNNING, then test; on any fail print log tail and exit 1. Loop until this exits 0.")
     args = p.parse_args()
     SPACE_ID = args.space_id
     APP_URL = args.url.rstrip("/")
@@ -270,6 +373,40 @@ def main():
         stream_logs(SPACE_ID, args.logs)
         return
     if args.until_ok:
         # 先立即查一次当前状态；已报错则马上用 curl 拉日志并退出，不空等
         if not os.environ.get("HF_TOKEN"):
@@ -334,6 +471,35 @@ def main():
         if not ok:
             sys.exit(1)
     if args.test:
         print(f"[test] Target: {APP_URL}")
         if not test_basic(APP_URL, expect_substrings=expect_substrings):

 用法:
   python3 scripts/monitor_and_test.py --test
+  python3 scripts/monitor_and_test.py --ssh-test --ssh-host localhost --ssh-port 2222 --ssh-user user
+  python3 scripts/monitor_and_test.py --ssh-test --ssh-stress-n 30 --ssh-host localhost
+  HF_TOKEN=xxx python3 scripts/monitor_and_test.py --watch
+  HF_TOKEN=xxx python3 scripts/monitor_and_test.py --until-ok --url https://xxx.hf.space --expect noVNC
+  HF_TOKEN=xxx python3 scripts/monitor_and_test.py --logs run
+  HF_TOKEN=xxx python3 scripts/monitor_and_test.py --logs build
+等价 curl（需 Bearer token）:
   curl -N -H "Authorization: Bearer $HF_TOKEN" "https://huggingface.co/api/spaces/<SPACE_ID>/logs/run"
   curl -N -H "Authorization: Bearer $HF_TOKEN" "https://huggingface.co/api/spaces/<SPACE_ID>/logs/build"
 """
     return ok_rounds == rounds
+# ── SSH Tests ────────────────────────────────────────────────────────────────
+def _ssh_cmd(host, port, user, command, timeout=15, identity_file=None):
+    """Run a command over SSH. Returns (returncode, stdout, stderr)."""
+    import subprocess
+    cmd = [
+        "ssh", "-o", "StrictHostKeyChecking=no",
+        "-o", "UserKnownHostsFile=/dev/null",
+        "-o", f"ConnectTimeout={timeout}",
+        "-o", "LogLevel=ERROR",
+        "-p", str(port),
+    ]
+    if identity_file:
+        cmd += ["-i", identity_file]
+    cmd += [f"{user}@{host}", command]
+    try:
+        proc = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout + 5)
+        return proc.returncode, proc.stdout, proc.stderr
+    except subprocess.TimeoutExpired:
+        return -1, "", "SSH command timed out"
+    except Exception as e:
+        return -1, "", str(e)
+def test_ssh_connect(host, port, user, identity_file=None):
+    """Test SSH connectivity: run 'echo ok' and verify output."""
+    rc, out, err = _ssh_cmd(host, port, user, "echo ok", identity_file=identity_file)
+    ok = rc == 0 and "ok" in out
+    print(f"[ssh-test] connect {user}@{host}:{port} -> rc={rc}, output={'ok' if ok else repr(out.strip())}")
+    if not ok and err:
+        print(f"[ssh-test]   stderr: {err.strip()}")
+    return ok
+def test_ssh_command(host, port, user, identity_file=None):
+    """Test SSH command execution: run several diagnostic commands."""
+    checks = [
+        ("whoami", lambda out: user in out),
+        ("uname -s", lambda out: "Linux" in out),
+        ("ls /opt/noVNC/vnc.html", lambda out: "vnc.html" in out),
+        ("pgrep -a Xvfb", lambda out: "Xvfb" in out),
+    ]
+    all_ok = True
+    for cmd, validate in checks:
+        rc, out, err = _ssh_cmd(host, port, user, cmd, identity_file=identity_file)
+        passed = rc == 0 and validate(out)
+        status = "PASS" if passed else "FAIL"
+        print(f"[ssh-test] cmd '{cmd}' -> {status} (rc={rc}, out={out.strip()[:80]})")
+        if not passed:
+            all_ok = False
+    return all_ok
+def test_ssh_stress(host, port, user, n=30, concurrency=10, identity_file=None):
+    """SSH stress test: n concurrent SSH sessions each running a command."""
+    import concurrent.futures
+    def one_session(i):
+        rc, out, _ = _ssh_cmd(host, port, user, f"echo session-{i} && uptime",
+                              timeout=20, identity_file=identity_file)
+        return rc == 0 and f"session-{i}" in out
+    with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as ex:
+        results = list(ex.map(one_session, range(n)))
+    passed = sum(results)
+    failed = n - passed
+    print(f"[ssh-stress] {n} sessions (concurrency={concurrency}): {passed} ok, {failed} failed")
+    return failed == 0
+def test_ssh_bruteforce(host, port, user, rounds=3, ramp_up=None, identity_file=None):
+    """Multi-round SSH stress with increasing concurrency (brute-force style)."""
+    if ramp_up is None:
+        ramp_up = [(20, 5), (40, 10), (60, 20)]
+    all_ok = True
+    for r in range(rounds):
+        n, conc = ramp_up[r % len(ramp_up)]
+        print(f"[ssh-bruteforce] Round {r+1}/{rounds}: {n} sessions, concurrency={conc}")
+        ok = test_ssh_stress(host, port, user, n=n, concurrency=conc, identity_file=identity_file)
+        if not ok:
+            all_ok = False
+            print(f"[ssh-bruteforce] Round {r+1} FAILED")
+            break
+        time.sleep(1)
+    if all_ok:
+        print(f"[ssh-bruteforce] ALL {rounds} rounds PASSED")
+    return all_ok
 def _curl_logs_url(space_id: str, log_type: str) -> str:
     """Build the logs API URL (same as user's curl command)."""
     return f"https://huggingface.co/api/spaces/{space_id}/logs/{log_type}"
                    help="Poll URL until 200 and body contains one of --expect (no HF_TOKEN needed)")
     p.add_argument("--until-ok", action="store_true",
                    help="Poll API until RUNNING, then test; on any fail print log tail and exit 1. Loop until this exits 0.")
+    p.add_argument("--watch", action="store_true",
+                   help="Use curl to poll run (and optional build) logs + app URL every N sec; don't stop (Ctrl+C to exit)")
+    p.add_argument("--watch-interval", type=int, default=20, help="Seconds between --watch polls (default 20)")
+    # SSH test options
+    p.add_argument("--ssh-test", action="store_true",
+                   help="Run SSH tests: connect + command + stress + bruteforce")
+    p.add_argument("--ssh-host", default="localhost", help="SSH host (default: localhost)")
+    p.add_argument("--ssh-port", type=int, default=2222, help="SSH port (default: 2222)")
+    p.add_argument("--ssh-user", default="user", help="SSH user (default: user)")
+    p.add_argument("--ssh-key", default=None, help="Path to SSH private key (optional)")
+    p.add_argument("--ssh-stress-n", type=int, default=30, help="SSH stress: total sessions (default: 30)")
+    p.add_argument("--ssh-concurrency", type=int, default=10, help="SSH stress: concurrent sessions (default: 10)")
     args = p.parse_args()
     SPACE_ID = args.space_id
     APP_URL = args.url.rstrip("/")
         stream_logs(SPACE_ID, args.logs)
         return
+    if args.watch:
+        # 用 curl + Bearer token 持续查看远端状态，不退出
+        if not os.environ.get("HF_TOKEN"):
+            print("HF_TOKEN required for --watch (use .env or export)", file=sys.stderr)
+            sys.exit(1)
+        import subprocess
+        interval = max(10, args.watch_interval)
+        run_url = _curl_logs_url(SPACE_ID, "run")
+        build_url = _curl_logs_url(SPACE_ID, "build")
+        token = os.environ.get("HF_TOKEN")
+        curl_h = ["-H", f"Authorization: Bearer {token}", "-N", "-sS", "--max-time", str(interval + 5)]
+        n = 0
+        while True:
+            n += 1
+            ts = time.strftime("%H:%M:%S", time.gmtime())
+            print(f"\n[watch #{n} {ts}] === runtime stage ===")
+            stage, _ = get_stage()
+            print(f"[watch] stage={stage}")
+            print(f"[watch] === GET {APP_URL} ===")
+            status, body = http_get(APP_URL, timeout=15)
+            print(f"[watch] HTTP {status}, body len={len(body)}, has noVNC={('noVNC' in body)}")
+            print(f"[watch] === run log (tail, curl --max-time {interval}) ===")
+            proc = subprocess.run(
+                ["curl"] + curl_h + ["--max-time", str(interval), run_url],
+                capture_output=True, text=True, timeout=interval + 10,
+            )
+            out = (proc.stdout or "") + (proc.stderr or "")
+            tail = out[-4000:] if len(out) > 4000 else out
+            for line in tail.strip().split("\n")[-25:]:
+                print(line)
+            print(f"[watch] next in {interval}s (Ctrl+C to stop)...")
+            time.sleep(interval)
+        return
     if args.until_ok:
         # 先立即查一次当前状态；已报错则马上用 curl 拉日志并退出，不空等
         if not os.environ.get("HF_TOKEN"):
         if not ok:
             sys.exit(1)
+    if args.ssh_test:
+        print(f"[ssh-test] Target: {args.ssh_user}@{args.ssh_host}:{args.ssh_port}")
+        print("=" * 60)
+        print("[Phase 1] SSH Connect")
+        if not test_ssh_connect(args.ssh_host, args.ssh_port, args.ssh_user, identity_file=args.ssh_key):
+            print("[ssh-test] CONNECT FAILED")
+            sys.exit(1)
+        print()
+        print("[Phase 2] SSH Command Execution")
+        if not test_ssh_command(args.ssh_host, args.ssh_port, args.ssh_user, identity_file=args.ssh_key):
+            print("[ssh-test] COMMAND EXEC FAILED")
+            sys.exit(1)
+        print()
+        print("[Phase 3] SSH Stress Test")
+        if not test_ssh_stress(args.ssh_host, args.ssh_port, args.ssh_user,
+                               n=args.ssh_stress_n, concurrency=args.ssh_concurrency,
+                               identity_file=args.ssh_key):
+            print("[ssh-test] STRESS FAILED")
+            sys.exit(1)
+        print()
+        print("[Phase 4] SSH Brute-force Ramp-up")
+        if not test_ssh_bruteforce(args.ssh_host, args.ssh_port, args.ssh_user,
+                                   identity_file=args.ssh_key):
+            print("[ssh-test] BRUTEFORCE FAILED")
+            sys.exit(1)
+        print("=" * 60)
+        print("[ssh-test] ALL SSH TESTS PASSED")
+        return
     if args.test:
         print(f"[test] Target: {APP_URL}")
         if not test_basic(APP_URL, expect_substrings=expect_substrings):

scripts/test_local.sh ADDED Viewed

	@@ -0,0 +1,143 @@

+#!/usr/bin/env bash
+# ─────────────────────────────────────────────────────────────────────
+# HuggingRun: Local integration test for Ubuntu desktop
+# Build Docker → run container → wait → test noVNC + SSH + stress → cleanup
+# Exit 0 only when ALL tests pass. Iterative TDD style.
+#
+# Usage:
+#   bash scripts/test_local.sh              # full run
+#   SKIP_BUILD=1 bash scripts/test_local.sh # reuse existing image
+# ─────────────────────────────────────────────────────────────────────
+set -euo pipefail
+REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+cd "$REPO_ROOT"
+IMAGE_NAME="huggingrun-ubuntu-desktop-test"
+CONTAINER_NAME="huggingrun-test-$$"
+NOVNC_PORT=7860
+SSH_PORT=2222
+HOST_NOVNC_PORT="${HOST_NOVNC_PORT:-17860}"
+HOST_SSH_PORT="${HOST_SSH_PORT:-12222}"
+MAX_WAIT=120  # seconds to wait for services to be ready
+SSH_USER="user"
+SSH_STRESS_N="${SSH_STRESS_N:-30}"
+SSH_CONCURRENCY="${SSH_CONCURRENCY:-10}"
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+cleanup() {
+    echo ""
+    echo -e "${YELLOW}[cleanup] Stopping and removing container ${CONTAINER_NAME}...${NC}"
+    docker stop "$CONTAINER_NAME" 2>/dev/null || true
+    docker rm -f "$CONTAINER_NAME" 2>/dev/null || true
+}
+trap cleanup EXIT
+# ── Phase 0: Build ──────────────────────────────────────────────────
+if [ "${SKIP_BUILD:-}" != "1" ]; then
+    echo -e "${YELLOW}[build] Building Docker image: ${IMAGE_NAME}${NC}"
+    docker build -f Dockerfile.ubuntu-desktop -t "$IMAGE_NAME" . 2>&1 | tail -20
+    echo -e "${GREEN}[build] Image built successfully${NC}"
+else
+    echo -e "${YELLOW}[build] SKIP_BUILD=1, using existing image${NC}"
+fi
+# ── Phase 1: Run container ──────────────────────────────────────────
+echo ""
+echo -e "${YELLOW}[run] Starting container: ${CONTAINER_NAME}${NC}"
+echo -e "${YELLOW}[run]   noVNC: localhost:${HOST_NOVNC_PORT} → :${NOVNC_PORT}${NC}"
+echo -e "${YELLOW}[run]   SSH:   localhost:${HOST_SSH_PORT} → :${SSH_PORT}${NC}"
+docker run -d \
+    --name "$CONTAINER_NAME" \
+    -p "${HOST_NOVNC_PORT}:${NOVNC_PORT}" \
+    -p "${HOST_SSH_PORT}:${SSH_PORT}" \
+    -e SSH_LISTEN=0.0.0.0 \
+    -e SSH_PORT=${SSH_PORT} \
+    "$IMAGE_NAME"
+echo -e "${GREEN}[run] Container started${NC}"
+# ── Phase 2: Wait for noVNC ─────────────────────────────────────────
+echo ""
+echo -e "${YELLOW}[wait] Waiting for noVNC on localhost:${HOST_NOVNC_PORT} (max ${MAX_WAIT}s)...${NC}"
+START=$(date +%s)
+NOVNC_READY=false
+while [ $(($(date +%s) - START)) -lt "$MAX_WAIT" ]; do
+    HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:${HOST_NOVNC_PORT}/vnc.html" 2>/dev/null || echo "000")
+    if [ "$HTTP_CODE" = "200" ]; then
+        NOVNC_READY=true
+        break
+    fi
+    echo -e "  noVNC not ready (HTTP ${HTTP_CODE}), waiting 3s..."
+    sleep 3
+done
+if [ "$NOVNC_READY" = false ]; then
+    echo -e "${RED}[FAIL] noVNC did not become ready within ${MAX_WAIT}s${NC}"
+    echo ""
+    echo "=== Container logs (last 50 lines) ==="
+    docker logs --tail 50 "$CONTAINER_NAME" 2>&1
+    exit 1
+fi
+echo -e "${GREEN}[wait] noVNC is ready (HTTP 200)${NC}"
+# ── Phase 3: Wait for SSH ───────────────────────────────────────────
+echo ""
+echo -e "${YELLOW}[wait] Waiting for SSH on localhost:${HOST_SSH_PORT} (max 60s)...${NC}"
+START=$(date +%s)
+SSH_READY=false
+while [ $(($(date +%s) - START)) -lt 60 ]; do
+    if ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+           -o ConnectTimeout=3 -o LogLevel=ERROR \
+           -p "$HOST_SSH_PORT" "${SSH_USER}@localhost" "echo ok" 2>/dev/null | grep -q "ok"; then
+        SSH_READY=true
+        break
+    fi
+    echo "  SSH not ready, waiting 3s..."
+    sleep 3
+done
+if [ "$SSH_READY" = false ]; then
+    echo -e "${RED}[FAIL] SSH did not become ready within 60s${NC}"
+    echo ""
+    echo "=== Container logs (last 50 lines) ==="
+    docker logs --tail 50 "$CONTAINER_NAME" 2>&1
+    exit 1
+fi
+echo -e "${GREEN}[wait] SSH is ready${NC}"
+# ── Phase 4: Run HTTP tests (noVNC) ─────────────────────────────────
+echo ""
+echo -e "${YELLOW}[test] Phase 4: HTTP basic + stress test on noVNC${NC}"
+python3 scripts/monitor_and_test.py \
+    --test \
+    --url "http://localhost:${HOST_NOVNC_PORT}" \
+    --expect "noVNC" --expect "vnc" \
+    --stress-n 50
+echo -e "${GREEN}[test] HTTP tests PASSED${NC}"
+# ── Phase 5: Run SSH tests ──────────────────────────────────────────
+echo ""
+echo -e "${YELLOW}[test] Phase 5: SSH connect + command + stress + bruteforce${NC}"
+python3 scripts/monitor_and_test.py \
+    --ssh-test \
+    --ssh-host localhost \
+    --ssh-port "$HOST_SSH_PORT" \
+    --ssh-user "$SSH_USER" \
+    --ssh-stress-n "$SSH_STRESS_N" \
+    --ssh-concurrency "$SSH_CONCURRENCY"
+echo -e "${GREEN}[test] SSH tests PASSED${NC}"
+# ── Summary ─────────────────────────────────────────────────────────
+echo ""
+echo "============================================================"
+echo -e "${GREEN}  ALL TESTS PASSED${NC}"
+echo ""
+echo "  noVNC desktop: http://localhost:${HOST_NOVNC_PORT}/vnc.html"
+echo "  SSH access:    ssh -p ${HOST_SSH_PORT} ${SSH_USER}@localhost"
+echo "============================================================"

scripts/verify_overnight.sh ADDED Viewed

	@@ -0,0 +1,38 @@

+#!/usr/bin/env bash
+# Overnight verification: 3 full --until-ok runs. Exit 0 only if all pass.
+# Usage: from repo root, with .env containing HF_TOKEN:
+#   bash scripts/verify_overnight.sh
+set -e
+REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+cd "$REPO_ROOT"
+LOG="$REPO_ROOT/docs/verification_run.log"
+APP_URL="${APP_URL:-https://tao-shen-huggingrun.hf.space}"
+EXPECT="${EXPECT:-Directory listing}"
+ROUNDS="${ROUNDS:-3}"
+if [ ! -f .env ]; then
+  echo "Missing .env (HF_TOKEN required)" >&2
+  exit 1
+fi
+export $(grep -v '^#' .env | xargs)
+echo "=== Overnight verification started $(date -u +%Y-%m-%dT%H:%M:%SZ) ===" | tee -a "$LOG"
+echo "APP_URL=$APP_URL EXPECT=$EXPECT ROUNDS=$ROUNDS" | tee -a "$LOG"
+PASSED=0
+for r in $(seq 1 "$ROUNDS"); do
+  echo "" | tee -a "$LOG"
+  echo "--- Round $r/$ROUNDS at $(date -u +%H:%M:%SZ) ---" | tee -a "$LOG"
+  if python3 scripts/monitor_and_test.py --until-ok --url "$APP_URL" --expect "$EXPECT" --stress-n 50 >> "$LOG" 2>&1; then
+    PASSED=$((PASSED+1))
+    echo "Round $r PASSED" | tee -a "$LOG"
+  else
+    echo "Round $r FAILED" | tee -a "$LOG"
+    exit 1
+  fi
+  [ "$r" -lt "$ROUNDS" ] && sleep 30
+done
+echo "" | tee -a "$LOG"
+echo "=== ALL $ROUNDS ROUNDS PASSED at $(date -u +%Y-%m-%dT%H:%M:%SZ) ===" | tee -a "$LOG"
+exit 0

ubuntu-desktop/start-desktop.sh CHANGED Viewed

@@ -36,19 +36,42 @@ echo "[start-desktop] XFCE started, starting x11vnc ..." >&2
 # x11vnc: share display :99 on port 5901 (do not exit on failure so noVNC can still start)
 x11vnc -display "$DISPLAY" -rfbport "$VNC_PORT" -forever -shared -noxdamage -nopw -bg || true
-# SSH (optional): do not let failures here stop noVNC
 set +e
 SSHD_PORT="${SSH_PORT:-2222}"
 mkdir -p "$HOME/.ssh"
 [ -n "${SSH_AUTHORIZED_KEYS-}" ] && echo "$SSH_AUTHORIZED_KEYS" > "$HOME/.ssh/authorized_keys" && chmod 600 "$HOME/.ssh/authorized_keys"
-[ ! -f "$HOME/.ssh/ssh_host_ed25519_key" ] && ssh-keygen -t ed25519 -f "$HOME/.ssh/ssh_host_ed25519_key" -N "" -C "" 2>/dev/null
-if [ -f "$HOME/.ssh/authorized_keys" ] && [ -f "$HOME/.ssh/ssh_host_ed25519_key" ]; then
-  sshd -o "Port=$SSHD_PORT" -o "HostKey=$HOME/.ssh/ssh_host_ed25519_key" \
-       -o "AuthorizedKeysFile=$HOME/.ssh/authorized_keys" \
-       -o "PermitEmptyPasswords=no" -o "PasswordAuthentication=no" \
-       -o "ListenAddress=127.0.0.1" -o "PidFile=$HOME/.ssh/sshd.pid" \
-       -o "UsePAM=no" -o "PermitUserEnvironment=yes" -D -e &
   sleep 1
   [ -n "${SSH_REVERSE_TARGET-}" ] && ssh -o StrictHostKeyChecking=no -o ServerAliveInterval=60 -R "0.0.0.0:${SSHD_PORT}:127.0.0.1:${SSHD_PORT}" $SSH_REVERSE_TARGET -N &
 fi
 set -e

 # x11vnc: share display :99 on port 5901 (do not exit on failure so noVNC can still start)
 x11vnc -display "$DISPLAY" -rfbport "$VNC_PORT" -forever -shared -noxdamage -nopw -bg || true
+# SSH: always start sshd; do not let failures here stop noVNC
 set +e
 SSHD_PORT="${SSH_PORT:-2222}"
+SSHD_LISTEN="${SSH_LISTEN:-0.0.0.0}"
 mkdir -p "$HOME/.ssh"
+# If SSH_AUTHORIZED_KEYS is set, use key-based auth only; otherwise allow password auth for local testing
 [ -n "${SSH_AUTHORIZED_KEYS-}" ] && echo "$SSH_AUTHORIZED_KEYS" > "$HOME/.ssh/authorized_keys" && chmod 600 "$HOME/.ssh/authorized_keys"
+# Use pre-generated host key from Docker build, or generate at runtime
+HOST_KEY="$HOME/.ssh/ssh_host_ed25519_key"
+[ ! -f "$HOST_KEY" ] && cp /home/user/.ssh/ssh_host_ed25519_key "$HOST_KEY" 2>/dev/null
+[ ! -f "$HOST_KEY" ] && ssh-keygen -t ed25519 -f "$HOST_KEY" -N "" -C "" 2>/dev/null
+if [ -f "$HOST_KEY" ]; then
+  if [ -f "$HOME/.ssh/authorized_keys" ]; then
+    # Key-based auth only (production / HF Spaces)
+    echo "[start-desktop] Starting sshd (key auth) on $SSHD_LISTEN:$SSHD_PORT ..." >&2
+    /usr/sbin/sshd -o "Port=$SSHD_PORT" -o "HostKey=$HOST_KEY" \
+         -o "AuthorizedKeysFile=$HOME/.ssh/authorized_keys" \
+         -o "PermitEmptyPasswords=no" -o "PasswordAuthentication=no" \
+         -o "ListenAddress=$SSHD_LISTEN" -o "PidFile=$HOME/.ssh/sshd.pid" \
+         -o "UsePAM=no" -o "PermitUserEnvironment=yes" -D -e &
+  else
+    # No keys configured: allow password-less login for local Docker testing
+    echo "[start-desktop] Starting sshd (no-password, local test) on $SSHD_LISTEN:$SSHD_PORT ..." >&2
+    /usr/sbin/sshd -o "Port=$SSHD_PORT" -o "HostKey=$HOST_KEY" \
+         -o "PermitEmptyPasswords=yes" -o "PasswordAuthentication=yes" \
+         -o "ListenAddress=$SSHD_LISTEN" -o "PidFile=$HOME/.ssh/sshd.pid" \
+         -o "UsePAM=no" -o "PermitRootLogin=no" -D -e &
+  fi
+  SSHD_PID=$!
   sleep 1
+  echo "[start-desktop] sshd PID=$SSHD_PID" >&2
+  # Reverse SSH tunnel (HF Spaces: outbound only on 80/443/8080)
   [ -n "${SSH_REVERSE_TARGET-}" ] && ssh -o StrictHostKeyChecking=no -o ServerAliveInterval=60 -R "0.0.0.0:${SSHD_PORT}:127.0.0.1:${SSHD_PORT}" $SSH_REVERSE_TARGET -N &
 fi
 set -e