tao-shen Claude Opus 4.6 commited on
Commit
aa42ffe
Β·
1 Parent(s): 1cf5e3c

feat: add /logs endpoint for reliable log viewing

Browse files

HF's runtime log API doesn't capture Docker SDK container output.
Added /var/log/huggingrun.log file that all scripts write to,
served via nginx at /logs endpoint. Logs are now viewable with:
curl https://<space>.hf.space/logs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

scripts/entrypoint.sh CHANGED
@@ -6,25 +6,32 @@
6
  # 3. Start sync-loop daemon in BACKGROUND
7
  # ─────────────────────────────────────────────────────────────────────
8
  set -e
9
- # HF Spaces log API captures stderr β€” redirect ALL stdout β†’ stderr
10
- exec 1>&2
11
- echo "========================================"
12
- echo "[entrypoint] HuggingRun starting ..."
13
- echo "[entrypoint] Date: $(date -u)"
14
- echo "[entrypoint] SPACE_ID=${SPACE_ID:-not set}"
15
- echo "[entrypoint] HF_TOKEN=${HF_TOKEN:+set (${#HF_TOKEN} chars)}${HF_TOKEN:-NOT SET}"
16
- echo "[entrypoint] HF_DATASET_REPO=${HF_DATASET_REPO:-not set}"
17
- echo "[entrypoint] PERSIST_PATH=${PERSIST_PATH:-/data}"
18
- echo "[entrypoint] RUN_CMD=${RUN_CMD:-default}"
19
- echo "========================================"
 
 
 
 
 
 
 
20
 
21
  # Determine dataset repo
22
  if [ -z "$HF_DATASET_REPO" ]; then
23
  if [ -n "$SPACE_ID" ]; then
24
  export HF_DATASET_REPO="${SPACE_ID}-data"
25
- echo "[entrypoint] Auto-set HF_DATASET_REPO=${HF_DATASET_REPO}"
26
  elif [ -n "$HF_TOKEN" ]; then
27
- echo "[entrypoint] Resolving HF_DATASET_REPO from token ..."
28
  export HF_DATASET_REPO=$(python3 -c "
29
  from huggingface_hub import HfApi
30
  import os
@@ -34,13 +41,13 @@ try:
34
  except:
35
  print('')
36
  " 2>/dev/null)
37
- echo "[entrypoint] Resolved: ${HF_DATASET_REPO}"
38
  fi
39
  fi
40
 
41
  # Ensure dataset repo exists
42
  if [ -n "$HF_TOKEN" ] && [ -n "$HF_DATASET_REPO" ]; then
43
- echo "[entrypoint] Verifying dataset: ${HF_DATASET_REPO} ..."
44
  python3 -c "
45
  from huggingface_hub import HfApi
46
  import os
@@ -52,9 +59,9 @@ try:
52
  except:
53
  api.create_repo(repo_id=repo, repo_type='dataset', private=True)
54
  print(f'[entrypoint] Created dataset: {repo}', flush=True)
55
- " 2>&1 || echo "[entrypoint] WARNING: Could not verify dataset"
56
  else
57
- echo "[entrypoint] WARNING: persistence disabled (no token/repo)"
58
  fi
59
 
60
  # Write env for other processes
@@ -63,21 +70,21 @@ export HF_TOKEN="${HF_TOKEN}"
63
  export HF_DATASET_REPO="${HF_DATASET_REPO}"
64
  export PERSIST_PATH="${PERSIST_PATH:-/data}"
65
  ENVEOF
66
- echo "[entrypoint] Wrote /etc/huggingrun.env"
67
 
68
  # Background: init (download + restore) then start sync-loop
69
  # This runs AFTER services start so HF doesn't timeout
70
  (
71
- echo "[entrypoint:bg] Starting init in background ..."
72
- python3 -u /opt/git_sync_daemon.py init
73
- echo "[entrypoint:bg] Init done, starting sync-loop ..."
74
- exec python3 -u /opt/git_sync_daemon.py sync-loop
75
  ) &
76
  BG_PID=$!
77
- echo "[entrypoint] Background init+sync PID=${BG_PID}"
78
 
79
  # Start services immediately (so HF sees port 7860)
80
  CMD="${RUN_CMD:-python3 /app/demo_app.py}"
81
- echo "[entrypoint] Starting services: ${CMD}"
82
- echo "========================================"
83
  exec $CMD
 
6
  # 3. Start sync-loop daemon in BACKGROUND
7
  # ─────────────────────────────────────────────────────────────────────
8
  set -e
9
+
10
+ LOGFILE="/var/log/huggingrun.log"
11
+ touch "$LOGFILE"
12
+
13
+ log() {
14
+ echo "$@" >> "$LOGFILE"
15
+ echo "$@" >&2
16
+ }
17
+
18
+ log "========================================"
19
+ log "[entrypoint] HuggingRun starting ..."
20
+ log "[entrypoint] Date: $(date -u)"
21
+ log "[entrypoint] SPACE_ID=${SPACE_ID:-not set}"
22
+ log "[entrypoint] HF_TOKEN=${HF_TOKEN:+set (${#HF_TOKEN} chars)}${HF_TOKEN:-NOT SET}"
23
+ log "[entrypoint] HF_DATASET_REPO=${HF_DATASET_REPO:-not set}"
24
+ log "[entrypoint] PERSIST_PATH=${PERSIST_PATH:-/data}"
25
+ log "[entrypoint] RUN_CMD=${RUN_CMD:-default}"
26
+ log "========================================"
27
 
28
  # Determine dataset repo
29
  if [ -z "$HF_DATASET_REPO" ]; then
30
  if [ -n "$SPACE_ID" ]; then
31
  export HF_DATASET_REPO="${SPACE_ID}-data"
32
+ log "[entrypoint] Auto-set HF_DATASET_REPO=${HF_DATASET_REPO}"
33
  elif [ -n "$HF_TOKEN" ]; then
34
+ log "[entrypoint] Resolving HF_DATASET_REPO from token ..."
35
  export HF_DATASET_REPO=$(python3 -c "
36
  from huggingface_hub import HfApi
37
  import os
 
41
  except:
42
  print('')
43
  " 2>/dev/null)
44
+ log "[entrypoint] Resolved: ${HF_DATASET_REPO}"
45
  fi
46
  fi
47
 
48
  # Ensure dataset repo exists
49
  if [ -n "$HF_TOKEN" ] && [ -n "$HF_DATASET_REPO" ]; then
50
+ log "[entrypoint] Verifying dataset: ${HF_DATASET_REPO} ..."
51
  python3 -c "
52
  from huggingface_hub import HfApi
53
  import os
 
59
  except:
60
  api.create_repo(repo_id=repo, repo_type='dataset', private=True)
61
  print(f'[entrypoint] Created dataset: {repo}', flush=True)
62
+ " >> "$LOGFILE" 2>&1 || log "[entrypoint] WARNING: Could not verify dataset"
63
  else
64
+ log "[entrypoint] WARNING: persistence disabled (no token/repo)"
65
  fi
66
 
67
  # Write env for other processes
 
70
  export HF_DATASET_REPO="${HF_DATASET_REPO}"
71
  export PERSIST_PATH="${PERSIST_PATH:-/data}"
72
  ENVEOF
73
+ log "[entrypoint] Wrote /etc/huggingrun.env"
74
 
75
  # Background: init (download + restore) then start sync-loop
76
  # This runs AFTER services start so HF doesn't timeout
77
  (
78
+ log "[entrypoint:bg] Starting init in background ..."
79
+ python3 -u /opt/git_sync_daemon.py init >> "$LOGFILE" 2>&1
80
+ log "[entrypoint:bg] Init done, starting sync-loop ..."
81
+ exec python3 -u /opt/git_sync_daemon.py sync-loop >> "$LOGFILE" 2>&1
82
  ) &
83
  BG_PID=$!
84
+ log "[entrypoint] Background init+sync PID=${BG_PID}"
85
 
86
  # Start services immediately (so HF sees port 7860)
87
  CMD="${RUN_CMD:-python3 /app/demo_app.py}"
88
+ log "[entrypoint] Starting services: ${CMD}"
89
+ log "========================================"
90
  exec $CMD
ubuntu-server/nginx.conf CHANGED
@@ -33,6 +33,13 @@ http {
33
  proxy_send_timeout 86400;
34
  }
35
 
 
 
 
 
 
 
 
36
  # Everything else β†’ ttyd web terminal (on 7681)
37
  location / {
38
  proxy_pass http://127.0.0.1:7681;
 
33
  proxy_send_timeout 86400;
34
  }
35
 
36
+ # /logs β†’ live container logs
37
+ location /logs {
38
+ default_type text/plain;
39
+ add_header Cache-Control "no-cache, no-store";
40
+ alias /var/log/huggingrun.log;
41
+ }
42
+
43
  # Everything else β†’ ttyd web terminal (on 7681)
44
  location / {
45
  proxy_pass http://127.0.0.1:7681;
ubuntu-server/start-server.sh CHANGED
@@ -4,29 +4,27 @@
4
  # Port 7860 (nginx): web terminal + SSH
5
  # ─────────────────────────────────────────────────────────────────────
6
 
 
 
 
7
  export SSH_PORT="${SSH_PORT:-2222}"
8
  export TTYD_PORT="${TTYD_PORT:-7681}"
9
 
10
- # Helper: write to BOTH stdout and stderr, and also /proc/1/fd/1 + /proc/1/fd/2
11
- # This ensures HF captures the output regardless of which stream they monitor
12
  log() {
13
- echo "$@"
14
- echo "$@" >&2
15
- echo "$@" > /proc/1/fd/1 2>/dev/null || true
16
- echo "$@" > /proc/1/fd/2 2>/dev/null || true
17
  }
18
 
19
- # ── Start nginx FIRST so HF sees port 7860 and starts capturing logs ──
20
  nginx -g 'daemon off;' &
21
  NGINX_PID=$!
22
-
23
- # Forward signals to nginx for clean shutdown
24
  trap "kill $NGINX_PID 2>/dev/null; wait $NGINX_PID 2>/dev/null; exit" SIGTERM SIGINT SIGQUIT
25
 
26
- # Small delay to let nginx bind and HF detect the port
27
- sleep 3
28
 
29
- # ── Now print boot info (HF should be streaming logs at this point) ──
30
  log "========================================"
31
  log "[ubuntu] HuggingRun Ubuntu Server"
32
  log "[ubuntu] $(date -u)"
@@ -39,9 +37,9 @@ log "[ubuntu] Disk: $(df -h / 2>/dev/null | awk 'NR==2{print $2, "total,", $4, "
39
  log "[ubuntu] User: $(whoami) (uid=$(id -u))"
40
  log "========================================"
41
 
42
- # ── Network info ───────────────────────────────────────────────────
43
  log "[ubuntu] Network interfaces:"
44
- ip -4 addr show 2>/dev/null | grep -E 'inet |^[0-9]' | while read line; do log " $line"; done || true
45
 
46
  # ── sshd ──────────────────────────────────────────────────────────
47
  mkdir -p /run/sshd
@@ -61,7 +59,7 @@ else
61
  log "[ubuntu] [FAILED] sshd failed to start"
62
  fi
63
 
64
- # ── WebSocket-to-SSH bridge ──────────────────────────────────────
65
  log "[ubuntu] Starting WS-SSH bridge on 127.0.0.1:7862 ..."
66
  python3 /opt/ws-ssh-bridge.py &
67
  BRIDGE_PID=$!
@@ -72,7 +70,7 @@ else
72
  log "[ubuntu] [FAILED] WS-SSH bridge failed to start"
73
  fi
74
 
75
- # ── ttyd (web terminal) ─────────────────────────────────────────
76
  log "[ubuntu] Starting ttyd on 127.0.0.1:${TTYD_PORT} ..."
77
  ttyd --port "$TTYD_PORT" --writable --base-path / bash --login &
78
  TTYD_PID=$!
@@ -83,7 +81,7 @@ else
83
  log "[ubuntu] [FAILED] ttyd failed to start"
84
  fi
85
 
86
- # ── Process summary ──────────────────────────────────────────────
87
  log "========================================"
88
  log "[ubuntu] Services:"
89
  log "[ubuntu] nginx PID=${NGINX_PID} 0.0.0.0:7860"
@@ -91,26 +89,20 @@ log "[ubuntu] sshd PID=${SSHD_PID} 127.0.0.1:${SSH_PORT}"
91
  log "[ubuntu] ws-ssh-bridge PID=${BRIDGE_PID} 127.0.0.1:7862"
92
  log "[ubuntu] ttyd PID=${TTYD_PID} 127.0.0.1:${TTYD_PORT}"
93
  log "========================================"
94
-
95
- # ── Installed packages ───────────────────────────────────────────
96
  log "[ubuntu] Base packages: $(wc -l < /etc/base-packages.list 2>/dev/null || echo '?')"
97
  log "[ubuntu] Current packages: $(dpkg-query -W -f='\n' 2>/dev/null | wc -l)"
98
 
99
- # ── Running processes ────────────────────────────────────────────
100
  log "[ubuntu] All processes:"
101
- ps aux --no-headers 2>/dev/null | awk '{printf "[ubuntu] %-8s PID=%-6s %s\n", $1, $2, $11}' | while read line; do log "$line"; done || true
102
 
103
  log "[ubuntu] ══ System ready ══"
 
104
 
105
- # ── Heartbeat (every 10s for debugging, will increase later) ─────
106
  (while true; do
107
- sleep 10
108
- MSG="[ubuntu] heartbeat: $(date -u) | load=$(cat /proc/loadavg 2>/dev/null | cut -d' ' -f1-3)"
109
- echo "$MSG"
110
- echo "$MSG" >&2
111
- echo "$MSG" > /proc/1/fd/1 2>/dev/null || true
112
- echo "$MSG" > /proc/1/fd/2 2>/dev/null || true
113
  done) &
114
 
115
- # ── Wait for nginx (keep shell as PID 1 for signal handling) ─────
116
  wait $NGINX_PID
 
4
  # Port 7860 (nginx): web terminal + SSH
5
  # ─────────────────────────────────────────────────────────────────────
6
 
7
+ LOGFILE="/var/log/huggingrun.log"
8
+ > "$LOGFILE" # truncate on start
9
+
10
  export SSH_PORT="${SSH_PORT:-2222}"
11
  export TTYD_PORT="${TTYD_PORT:-7681}"
12
 
13
+ # Log to file + stderr (HF may or may not capture stderr)
 
14
  log() {
15
+ local msg="$*"
16
+ echo "$msg" >> "$LOGFILE"
17
+ echo "$msg" >&2
 
18
  }
19
 
20
+ # ── Start nginx FIRST so HF sees port 7860 ──────────────────────
21
  nginx -g 'daemon off;' &
22
  NGINX_PID=$!
 
 
23
  trap "kill $NGINX_PID 2>/dev/null; wait $NGINX_PID 2>/dev/null; exit" SIGTERM SIGINT SIGQUIT
24
 
25
+ sleep 2
 
26
 
27
+ # ── Boot info ─────────────────────────────────────────────────────
28
  log "========================================"
29
  log "[ubuntu] HuggingRun Ubuntu Server"
30
  log "[ubuntu] $(date -u)"
 
37
  log "[ubuntu] User: $(whoami) (uid=$(id -u))"
38
  log "========================================"
39
 
40
+ # ── Network info ──────────────────────────────────────────────────
41
  log "[ubuntu] Network interfaces:"
42
+ ip -4 addr show 2>/dev/null | grep -E 'inet |^[0-9]' | while IFS= read -r line; do log " $line"; done
43
 
44
  # ── sshd ──────────────────────────────────────────────────────────
45
  mkdir -p /run/sshd
 
59
  log "[ubuntu] [FAILED] sshd failed to start"
60
  fi
61
 
62
+ # ── WebSocket-to-SSH bridge ───────────────────────────────────────
63
  log "[ubuntu] Starting WS-SSH bridge on 127.0.0.1:7862 ..."
64
  python3 /opt/ws-ssh-bridge.py &
65
  BRIDGE_PID=$!
 
70
  log "[ubuntu] [FAILED] WS-SSH bridge failed to start"
71
  fi
72
 
73
+ # ── ttyd (web terminal) ──────────────────────────────────────────
74
  log "[ubuntu] Starting ttyd on 127.0.0.1:${TTYD_PORT} ..."
75
  ttyd --port "$TTYD_PORT" --writable --base-path / bash --login &
76
  TTYD_PID=$!
 
81
  log "[ubuntu] [FAILED] ttyd failed to start"
82
  fi
83
 
84
+ # ── Process summary ───────────────────────────────────────────────
85
  log "========================================"
86
  log "[ubuntu] Services:"
87
  log "[ubuntu] nginx PID=${NGINX_PID} 0.0.0.0:7860"
 
89
  log "[ubuntu] ws-ssh-bridge PID=${BRIDGE_PID} 127.0.0.1:7862"
90
  log "[ubuntu] ttyd PID=${TTYD_PID} 127.0.0.1:${TTYD_PORT}"
91
  log "========================================"
 
 
92
  log "[ubuntu] Base packages: $(wc -l < /etc/base-packages.list 2>/dev/null || echo '?')"
93
  log "[ubuntu] Current packages: $(dpkg-query -W -f='\n' 2>/dev/null | wc -l)"
94
 
 
95
  log "[ubuntu] All processes:"
96
+ ps aux --no-headers 2>/dev/null | awk '{printf "[ubuntu] %-8s PID=%-6s %s\n", $1, $2, $11}' | while IFS= read -r line; do log "$line"; done
97
 
98
  log "[ubuntu] ══ System ready ══"
99
+ log "[ubuntu] View logs: curl https://<space>.hf.space/logs"
100
 
101
+ # ── Heartbeat ─────────────────────────────────────────────────────
102
  (while true; do
103
+ sleep 60
104
+ log "[ubuntu] heartbeat: $(date -u) | load=$(cat /proc/loadavg 2>/dev/null | cut -d' ' -f1-3) | mem=$(free -h 2>/dev/null | awk '/Mem:/{print $3"/"$2}' || echo '?')"
 
 
 
 
105
  done) &
106
 
107
+ # ── Wait for nginx ────────────────────────────────────────────────
108
  wait $NGINX_PID