Somrat Sorkar commited on
Commit
5365372
Β·
1 Parent(s): 0bfb89f

v1.1.0: Pre-built Docker image, huggingface_hub sync, password auth, trusted proxies

Browse files
Files changed (6) hide show
  1. .env.example +13 -0
  2. CHANGELOG.md +16 -0
  3. Dockerfile +21 -7
  4. README.md +21 -9
  5. start.sh +31 -1
  6. workspace-sync.py +149 -0
.env.example CHANGED
@@ -128,6 +128,10 @@ LLM_MODEL=anthropic/claude-sonnet-4-5
128
  # Generate: openssl rand -hex 32
129
  GATEWAY_TOKEN=your_gateway_token_here
130
 
 
 
 
 
131
  # ── OPTIONAL: Telegram Integration ──
132
  # Get bot token from: https://t.me/BotFather
133
  TELEGRAM_BOT_TOKEN=your_bot_token_here
@@ -164,6 +168,15 @@ OPENCLAW_VERSION=latest
164
  # Health endpoint port. Default: 7861
165
  HEALTH_PORT=7861
166
 
 
 
 
 
 
 
 
 
 
167
  # ════════════════════════════════════════════════════════════════
168
  # QUICK START: Only 3 secrets required!
169
  # 1. LLM_API_KEY β†’ From your LLM provider
 
128
  # Generate: openssl rand -hex 32
129
  GATEWAY_TOKEN=your_gateway_token_here
130
 
131
+ # (Optional) Password auth β€” simpler alternative to token for casual users
132
+ # If set, users can log in with this password instead of the token
133
+ # OPENCLAW_PASSWORD=your_password_here
134
+
135
  # ── OPTIONAL: Telegram Integration ──
136
  # Get bot token from: https://t.me/BotFather
137
  TELEGRAM_BOT_TOKEN=your_bot_token_here
 
168
  # Health endpoint port. Default: 7861
169
  HEALTH_PORT=7861
170
 
171
+ # Trusted proxies (comma-separated IPs)
172
+ # Fixes "Proxy headers detected from untrusted address" behind reverse proxies
173
+ # Only set if you see pairing/auth errors. Find IPs in Space logs (remote=x.x.x.x)
174
+ # TRUSTED_PROXIES=10.20.31.87,10.20.26.157
175
+
176
+ # Allowed origins for Control UI (comma-separated URLs)
177
+ # Locks down the web UI to only these origins
178
+ # ALLOWED_ORIGINS=https://your-space.hf.space
179
+
180
  # ════════════════════════════════════════════════════════════════
181
  # QUICK START: Only 3 secrets required!
182
  # 1. LLM_API_KEY β†’ From your LLM provider
CHANGELOG.md CHANGED
@@ -2,6 +2,22 @@
2
 
3
  All notable changes to this project will be documented in this file.
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  ## [1.0.0] - 2026-03-30
6
 
7
  ### πŸŽ‰ Initial Release
 
2
 
3
  All notable changes to this project will be documented in this file.
4
 
5
+ ## [1.1.0] - 2026-03-31
6
+
7
+ ### Added
8
+ - **Pre-built Docker image** β€” uses `ghcr.io/openclaw/openclaw:latest` multi-stage build for much faster builds (minutes instead of 30+)
9
+ - **Python huggingface_hub sync** β€” `workspace-sync.py` uses the `huggingface_hub` library for more reliable HF Dataset sync (handles auth, LFS, retries). Falls back to git-based sync automatically
10
+ - **Password auth** β€” `OPENCLAW_PASSWORD` for simpler login (optional alternative to token)
11
+ - **Trusted proxies** β€” `TRUSTED_PROXIES` env var fixes "Proxy headers detected from untrusted address" errors on HF Spaces
12
+ - **Allowed origins** β€” `ALLOWED_ORIGINS` env var to lock down Control UI access
13
+ - **40+ LLM providers** β€” Added support for OpenCode, OpenRouter, DeepSeek, Qwen, Z.ai, Moonshot, Mistral, xAI, NVIDIA, Volcengine, BytePlus, Cohere, Groq, HuggingFace Inference, and more
14
+ - **OpenCode Zen/Go** β€” support for OpenCode's tested model service
15
+
16
+ ### Changed
17
+ - Provider detection now uses `case` statement (cleaner, faster) with correct OpenClaw provider IDs
18
+ - Model IDs now sourced from OpenClaw docs (not OpenRouter) for accuracy
19
+ - Google API key env var corrected to `GEMINI_API_KEY`
20
+
21
  ## [1.0.0] - 2026-03-30
22
 
23
  ### πŸŽ‰ Initial Release
Dockerfile CHANGED
@@ -1,30 +1,44 @@
1
- FROM node:22-slim
 
 
 
 
 
 
2
 
3
- # Version pinning (default: latest)
4
- ARG OPENCLAW_VERSION=latest
5
 
6
- # Install git, ca-certificates, jq, and curl
7
  RUN apt-get update && apt-get install -y \
8
  git \
9
  ca-certificates \
10
  jq \
11
  curl \
 
 
12
  --no-install-recommends && \
 
13
  rm -rf /var/lib/apt/lists/*
14
 
15
  # Reuse existing node user (UID 1000)
16
  RUN mkdir -p /home/node/app /home/node/.openclaw && \
17
  chown -R 1000:1000 /home/node
18
 
19
- # Install OpenClaw (version configurable via build arg)
20
- RUN npm install -g openclaw@${OPENCLAW_VERSION}
 
 
 
 
21
 
22
- # Copy files
23
  COPY --chown=1000:1000 dns-fix.js /opt/dns-fix.js
24
  COPY --chown=1000:1000 health-server.js /home/node/app/health-server.js
25
  COPY --chown=1000:1000 start.sh /home/node/app/start.sh
26
  COPY --chown=1000:1000 keep-alive.sh /home/node/app/keep-alive.sh
27
  COPY --chown=1000:1000 workspace-sync.sh /home/node/app/workspace-sync.sh
 
28
  RUN chmod +x /home/node/app/start.sh /home/node/app/keep-alive.sh /home/node/app/workspace-sync.sh
29
 
30
  USER node
 
1
+ # ════════════════════════════════════════════════════════════════
2
+ # 🦞 HuggingClaw β€” OpenClaw Gateway for HuggingFace Spaces
3
+ # ════════════════════════════════════════════════════════════════
4
+ # Multi-stage build: uses pre-built OpenClaw image for fast builds
5
+
6
+ # ── Stage 1: Pull pre-built OpenClaw ──
7
+ FROM ghcr.io/openclaw/openclaw:latest AS openclaw
8
 
9
+ # ── Stage 2: Runtime ──
10
+ FROM node:22-slim
11
 
12
+ # Install system dependencies
13
  RUN apt-get update && apt-get install -y \
14
  git \
15
  ca-certificates \
16
  jq \
17
  curl \
18
+ python3 \
19
+ python3-pip \
20
  --no-install-recommends && \
21
+ pip3 install --no-cache-dir --break-system-packages huggingface_hub && \
22
  rm -rf /var/lib/apt/lists/*
23
 
24
  # Reuse existing node user (UID 1000)
25
  RUN mkdir -p /home/node/app /home/node/.openclaw && \
26
  chown -R 1000:1000 /home/node
27
 
28
+ # Copy pre-built OpenClaw (skips npm install entirely β€” much faster!)
29
+ COPY --from=openclaw --chown=1000:1000 /app /home/node/.openclaw/openclaw-app
30
+
31
+ # Symlink openclaw CLI so it's available globally
32
+ RUN ln -s /home/node/.openclaw/openclaw-app/openclaw.mjs /usr/local/bin/openclaw 2>/dev/null || \
33
+ npm install -g openclaw@latest
34
 
35
+ # Copy HuggingClaw files
36
  COPY --chown=1000:1000 dns-fix.js /opt/dns-fix.js
37
  COPY --chown=1000:1000 health-server.js /home/node/app/health-server.js
38
  COPY --chown=1000:1000 start.sh /home/node/app/start.sh
39
  COPY --chown=1000:1000 keep-alive.sh /home/node/app/keep-alive.sh
40
  COPY --chown=1000:1000 workspace-sync.sh /home/node/app/workspace-sync.sh
41
+ COPY --chown=1000:1000 workspace-sync.py /home/node/app/workspace-sync.py
42
  RUN chmod +x /home/node/app/start.sh /home/node/app/keep-alive.sh /home/node/app/workspace-sync.sh
43
 
44
  USER node
README.md CHANGED
@@ -23,14 +23,15 @@ Works with **any LLM** (Anthropic, OpenAI, Google), connects via **Telegram**, a
23
  ### ✨ Features
24
 
25
  - **Zero-config** β€” just add 3 secrets and deploy
26
- - **Any LLM provider** β€” Claude, GPT-4, Gemini, etc.
 
 
27
  - **Built-in keep-alive** β€” self-pings to prevent HF sleep (no external cron needed)
28
- - **Auto-sync workspace** β€” commits + pushes changes every 10 min
29
  - **Auto-create backup** β€” creates the HF Dataset for you if it doesn't exist
30
  - **Graceful shutdown** β€” saves workspace before container dies
31
  - **Multi-user Telegram** β€” supports comma-separated user IDs for teams
32
  - **Health endpoint** β€” `/health` for monitoring
33
- - **Version pinning** β€” lock OpenClaw to a specific version
34
  - **100% HF-native** β€” runs entirely on HuggingFace infrastructure
35
 
36
  ---
@@ -107,6 +108,14 @@ See **`.env.example`** for the complete reference with examples.
107
  | `KEEP_ALIVE_INTERVAL` | `300` (5 min) | Self-ping interval. `0` = disable |
108
  | `SYNC_INTERVAL` | `600` (10 min) | Auto-sync interval |
109
 
 
 
 
 
 
 
 
 
110
  #### Advanced
111
 
112
  | Variable | Default | Purpose |
@@ -299,8 +308,9 @@ Set `HF_USERNAME` + `HF_TOKEN` and HuggingClaw handles everything:
299
 
300
  1. **Auto-creates** the dataset if it doesn't exist
301
  2. **Restores** workspace on every startup
302
- 3. **Auto-syncs** changes every 10 minutes (configurable)
303
- 4. **Saves** on shutdown (graceful SIGTERM handling)
 
304
 
305
  Custom dataset name: `BACKUP_DATASET_NAME=my-custom-backup`
306
 
@@ -356,14 +366,14 @@ openclaw channels login --gateway https://YOUR-SPACE-URL.hf.space
356
 
357
  ```
358
  HuggingClaw/
359
- β”œβ”€β”€ Dockerfile # Runtime: Node.js + OpenClaw + curl + jq
360
  β”œβ”€β”€ start.sh # Config generator + validation + orchestrator
361
  β”œβ”€β”€ keep-alive.sh # Self-ping to prevent HF sleep
362
- β”œβ”€β”€ workspace-sync.sh # Periodic workspace commit + push
 
363
  β”œβ”€β”€ health-server.js # Health endpoint (/health)
364
  β”œβ”€β”€ dns-fix.js # DNS override for HF network restrictions
365
  β”œβ”€β”€ .env.example # Complete configuration reference
366
- β”œβ”€β”€ .gitignore # Keeps secrets out of version control
367
  └── README.md # You are here
368
  ```
369
 
@@ -390,7 +400,9 @@ HuggingClaw/
390
 
391
  **Space sleeping** β†’ Check logs for `πŸ’“ Keep-alive started`. If missing, `SPACE_HOST` might not be set
392
 
393
- **Control UI blocked** β†’ The Space URL is auto-allowlisted. Check logs for origin errors
 
 
394
 
395
  **Version issues** β†’ Pin with `OPENCLAW_VERSION=2026.3.24` in secrets
396
 
 
23
  ### ✨ Features
24
 
25
  - **Zero-config** β€” just add 3 secrets and deploy
26
+ - **Any LLM provider** β€” Claude, GPT-4, Gemini, DeepSeek, Qwen, Grok, and [40+ more](#-llm-provider-setup)
27
+ - **Fast builds** β€” uses pre-built OpenClaw Docker image (minutes, not 30+)
28
+ - **Smart workspace sync** β€” uses `huggingface_hub` Python library (more reliable than git for HF)
29
  - **Built-in keep-alive** β€” self-pings to prevent HF sleep (no external cron needed)
 
30
  - **Auto-create backup** β€” creates the HF Dataset for you if it doesn't exist
31
  - **Graceful shutdown** β€” saves workspace before container dies
32
  - **Multi-user Telegram** β€” supports comma-separated user IDs for teams
33
  - **Health endpoint** β€” `/health` for monitoring
34
+ - **Password or token auth** β€” choose what works for you
35
  - **100% HF-native** β€” runs entirely on HuggingFace infrastructure
36
 
37
  ---
 
108
  | `KEEP_ALIVE_INTERVAL` | `300` (5 min) | Self-ping interval. `0` = disable |
109
  | `SYNC_INTERVAL` | `600` (10 min) | Auto-sync interval |
110
 
111
+ #### Security (Optional)
112
+
113
+ | Variable | Default | Purpose |
114
+ |----------|---------|---------|
115
+ | `OPENCLAW_PASSWORD` | β€” | Password auth (simpler alternative to token) |
116
+ | `TRUSTED_PROXIES` | β€” | Comma-separated proxy IPs (fixes auth issues behind reverse proxies) |
117
+ | `ALLOWED_ORIGINS` | β€” | Comma-separated URLs to lock down Control UI |
118
+
119
  #### Advanced
120
 
121
  | Variable | Default | Purpose |
 
308
 
309
  1. **Auto-creates** the dataset if it doesn't exist
310
  2. **Restores** workspace on every startup
311
+ 3. **Smart sync** β€” uses `huggingface_hub` Python library (handles auth, LFS, retries automatically; falls back to git if unavailable)
312
+ 4. **Auto-syncs** changes every 10 minutes (configurable via `SYNC_INTERVAL`)
313
+ 5. **Saves** on shutdown (graceful SIGTERM handling)
314
 
315
  Custom dataset name: `BACKUP_DATASET_NAME=my-custom-backup`
316
 
 
366
 
367
  ```
368
  HuggingClaw/
369
+ β”œβ”€β”€ Dockerfile # Multi-stage build with pre-built OpenClaw image
370
  β”œβ”€β”€ start.sh # Config generator + validation + orchestrator
371
  β”œβ”€β”€ keep-alive.sh # Self-ping to prevent HF sleep
372
+ β”œβ”€β”€ workspace-sync.py # Smart sync via huggingface_hub (with git fallback)
373
+ β”œβ”€β”€ workspace-sync.sh # Legacy git-based sync (fallback)
374
  β”œβ”€β”€ health-server.js # Health endpoint (/health)
375
  β”œβ”€β”€ dns-fix.js # DNS override for HF network restrictions
376
  β”œβ”€β”€ .env.example # Complete configuration reference
 
377
  └── README.md # You are here
378
  ```
379
 
 
400
 
401
  **Space sleeping** β†’ Check logs for `πŸ’“ Keep-alive started`. If missing, `SPACE_HOST` might not be set
402
 
403
+ **"Proxy headers detected" or auth errors** β†’ Set `TRUSTED_PROXIES` with the IPs from your Space logs (`remote=x.x.x.x`)
404
+
405
+ **Control UI blocked** β†’ Set `ALLOWED_ORIGINS=https://your-space.hf.space` or check logs for origin errors
406
 
407
  **Version issues** β†’ Pin with `OPENCLAW_VERSION=2026.3.24` in secrets
408
 
start.sh CHANGED
@@ -199,6 +199,25 @@ fi
199
  # Disable device auth (pairing) for headless Docker β€” token-only auth
200
  CONFIG_JSON=$(echo "$CONFIG_JSON" | jq ".gateway.controlUi.dangerouslyDisableDeviceAuth = true")
201
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  # Telegram (supports multiple user IDs, comma-separated)
203
  if [ -n "$TELEGRAM_BOT_TOKEN" ]; then
204
  CONFIG_JSON=$(echo "$CONFIG_JSON" | jq '.plugins.entries.telegram = {"enabled": true}')
@@ -233,6 +252,11 @@ printf " β”‚ %-40s β”‚\n" "Backup: βœ… ${HF_USERNAME}/${BACKUP_DATASET:-huggin
233
  else
234
  printf " β”‚ %-40s β”‚\n" "Backup: ❌ not configured"
235
  fi
 
 
 
 
 
236
  if [ -n "$SPACE_HOST" ]; then
237
  printf " β”‚ %-40s β”‚\n" "Keep-alive: βœ… every ${KEEP_ALIVE_INTERVAL:-300}s"
238
  printf " β”‚ %-40s β”‚\n" "Control UI: https://${SPACE_HOST}"
@@ -276,7 +300,13 @@ trap graceful_shutdown SIGTERM SIGINT
276
  # ── Start background services ──
277
  node /home/node/app/health-server.js &
278
  /home/node/app/keep-alive.sh &
279
- /home/node/app/workspace-sync.sh &
 
 
 
 
 
 
280
 
281
  # ── Launch gateway ──
282
  echo "πŸš€ Launching OpenClaw gateway on port 7860..."
 
199
  # Disable device auth (pairing) for headless Docker β€” token-only auth
200
  CONFIG_JSON=$(echo "$CONFIG_JSON" | jq ".gateway.controlUi.dangerouslyDisableDeviceAuth = true")
201
 
202
+ # Password auth (optional β€” simpler alternative to token for casual users)
203
+ if [ -n "$OPENCLAW_PASSWORD" ]; then
204
+ CONFIG_JSON=$(echo "$CONFIG_JSON" | jq ".gateway.auth.mode = \"password\" | .gateway.auth.password = \"$OPENCLAW_PASSWORD\"")
205
+ fi
206
+
207
+ # Trusted proxies (optional β€” fixes "Proxy headers detected from untrusted address" on HF Spaces)
208
+ # Set TRUSTED_PROXIES as comma-separated IPs, e.g. "10.20.31.87,10.20.26.157"
209
+ if [ -n "$TRUSTED_PROXIES" ]; then
210
+ PROXIES_JSON=$(echo "$TRUSTED_PROXIES" | tr ',' '\n' | sed 's/^ *//;s/ *$//' | jq -R . | jq -s .)
211
+ CONFIG_JSON=$(echo "$CONFIG_JSON" | jq ".gateway.trustedProxies = $PROXIES_JSON")
212
+ fi
213
+
214
+ # Allowed origins (optional β€” lock down Control UI to specific URLs)
215
+ # Set ALLOWED_ORIGINS as comma-separated URLs, e.g. "https://your-space.hf.space"
216
+ if [ -n "$ALLOWED_ORIGINS" ]; then
217
+ ORIGINS_JSON=$(echo "$ALLOWED_ORIGINS" | tr ',' '\n' | sed 's/^ *//;s/ *$//' | jq -R . | jq -s .)
218
+ CONFIG_JSON=$(echo "$CONFIG_JSON" | jq ".gateway.controlUi.allowedOrigins = $ORIGINS_JSON")
219
+ fi
220
+
221
  # Telegram (supports multiple user IDs, comma-separated)
222
  if [ -n "$TELEGRAM_BOT_TOKEN" ]; then
223
  CONFIG_JSON=$(echo "$CONFIG_JSON" | jq '.plugins.entries.telegram = {"enabled": true}')
 
252
  else
253
  printf " β”‚ %-40s β”‚\n" "Backup: ❌ not configured"
254
  fi
255
+ if [ -n "$OPENCLAW_PASSWORD" ]; then
256
+ printf " β”‚ %-40s β”‚\n" "Auth: πŸ”‘ password"
257
+ else
258
+ printf " β”‚ %-40s β”‚\n" "Auth: πŸ” token"
259
+ fi
260
  if [ -n "$SPACE_HOST" ]; then
261
  printf " β”‚ %-40s β”‚\n" "Keep-alive: βœ… every ${KEEP_ALIVE_INTERVAL:-300}s"
262
  printf " β”‚ %-40s β”‚\n" "Control UI: https://${SPACE_HOST}"
 
300
  # ── Start background services ──
301
  node /home/node/app/health-server.js &
302
  /home/node/app/keep-alive.sh &
303
+
304
+ # Use Python huggingface_hub sync if available, fallback to git-based sync
305
+ if python3 -c "import huggingface_hub" 2>/dev/null; then
306
+ python3 /home/node/app/workspace-sync.py &
307
+ else
308
+ /home/node/app/workspace-sync.sh &
309
+ fi
310
 
311
  # ── Launch gateway ──
312
  echo "πŸš€ Launching OpenClaw gateway on port 7860..."
workspace-sync.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ HuggingClaw Workspace Sync β€” HuggingFace Hub based backup
4
+ Uses huggingface_hub Python library instead of git for more reliable
5
+ HF Dataset operations (handles auth, LFS, retries automatically).
6
+
7
+ Falls back to git-based sync if HF_USERNAME or HF_TOKEN are not set.
8
+ """
9
+
10
+ import os
11
+ import sys
12
+ import time
13
+ import signal
14
+ import subprocess
15
+ from pathlib import Path
16
+
17
+ WORKSPACE = Path("/home/node/.openclaw/workspace")
18
+ INTERVAL = int(os.environ.get("SYNC_INTERVAL", "600"))
19
+ HF_TOKEN = os.environ.get("HF_TOKEN", "")
20
+ HF_USERNAME = os.environ.get("HF_USERNAME", "")
21
+ BACKUP_DATASET = os.environ.get("BACKUP_DATASET_NAME", "huggingclaw-backup")
22
+
23
+ running = True
24
+
25
+ def signal_handler(sig, frame):
26
+ global running
27
+ running = False
28
+
29
+ signal.signal(signal.SIGTERM, signal_handler)
30
+ signal.signal(signal.SIGINT, signal_handler)
31
+
32
+
33
+ def has_changes():
34
+ """Check if workspace has uncommitted changes (git-based check)."""
35
+ try:
36
+ subprocess.run(["git", "add", "-A"], cwd=WORKSPACE, capture_output=True)
37
+ result = subprocess.run(
38
+ ["git", "diff", "--cached", "--quiet"],
39
+ cwd=WORKSPACE, capture_output=True
40
+ )
41
+ return result.returncode != 0
42
+ except Exception:
43
+ return False
44
+
45
+
46
+ def sync_with_hf_hub():
47
+ """Sync workspace using huggingface_hub library."""
48
+ try:
49
+ from huggingface_hub import HfApi, upload_folder
50
+
51
+ api = HfApi(token=HF_TOKEN)
52
+ repo_id = f"{HF_USERNAME}/{BACKUP_DATASET}"
53
+
54
+ # Ensure dataset exists
55
+ try:
56
+ api.repo_info(repo_id=repo_id, repo_type="dataset")
57
+ except Exception:
58
+ print(f" πŸ“ Creating dataset {repo_id}...")
59
+ try:
60
+ api.create_repo(repo_id=repo_id, repo_type="dataset", private=True)
61
+ print(f" βœ… Dataset created: {repo_id}")
62
+ except Exception as e:
63
+ print(f" ⚠️ Could not create dataset: {e}")
64
+ return False
65
+
66
+ # Upload workspace
67
+ upload_folder(
68
+ folder_path=str(WORKSPACE),
69
+ repo_id=repo_id,
70
+ repo_type="dataset",
71
+ token=HF_TOKEN,
72
+ commit_message=f"Auto-sync {time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}",
73
+ ignore_patterns=[".git/*", ".git"],
74
+ )
75
+ return True
76
+
77
+ except ImportError:
78
+ print(" ⚠️ huggingface_hub not installed, falling back to git")
79
+ return False
80
+ except Exception as e:
81
+ print(f" ⚠️ HF Hub sync failed: {e}")
82
+ return False
83
+
84
+
85
+ def sync_with_git():
86
+ """Fallback: sync workspace using git."""
87
+ try:
88
+ ts = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
89
+ subprocess.run(["git", "add", "-A"], cwd=WORKSPACE, capture_output=True)
90
+ subprocess.run(
91
+ ["git", "commit", "-m", f"Auto-sync {ts}"],
92
+ cwd=WORKSPACE, capture_output=True
93
+ )
94
+ result = subprocess.run(
95
+ ["git", "push", "origin", "main"],
96
+ cwd=WORKSPACE, capture_output=True
97
+ )
98
+ return result.returncode == 0
99
+ except Exception:
100
+ return False
101
+
102
+
103
+ def main():
104
+ # Wait for workspace to initialize
105
+ time.sleep(30)
106
+
107
+ if not WORKSPACE.exists():
108
+ print("πŸ“ Workspace sync: workspace not found, exiting.")
109
+ return
110
+
111
+ use_hf_hub = bool(HF_TOKEN and HF_USERNAME)
112
+
113
+ if use_hf_hub:
114
+ print(f"πŸ”„ Workspace sync started (huggingface_hub): every {INTERVAL}s β†’ {HF_USERNAME}/{BACKUP_DATASET}")
115
+ else:
116
+ git_dir = WORKSPACE / ".git"
117
+ if not git_dir.exists():
118
+ print("πŸ“ Workspace sync: no git repo and no HF credentials, skipping.")
119
+ return
120
+ print(f"πŸ”„ Workspace sync started (git): every {INTERVAL}s")
121
+
122
+ while running:
123
+ time.sleep(INTERVAL)
124
+ if not running:
125
+ break
126
+
127
+ if not has_changes():
128
+ continue
129
+
130
+ ts = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
131
+
132
+ if use_hf_hub:
133
+ if sync_with_hf_hub():
134
+ print(f"πŸ”„ Workspace sync (hf_hub): pushed changes ({ts})")
135
+ else:
136
+ # Fallback to git
137
+ if sync_with_git():
138
+ print(f"πŸ”„ Workspace sync (git fallback): pushed changes ({ts})")
139
+ else:
140
+ print(f"πŸ”„ Workspace sync: failed ({ts}), will retry")
141
+ else:
142
+ if sync_with_git():
143
+ print(f"πŸ”„ Workspace sync (git): pushed changes ({ts})")
144
+ else:
145
+ print(f"πŸ”„ Workspace sync: push failed ({ts}), will retry")
146
+
147
+
148
+ if __name__ == "__main__":
149
+ main()