tao-shen Claude Opus 4.6 commited on
Commit
0b594d1
Β·
1 Parent(s): f5ea12d

feat: add AUTO_CREATE_DATASET env var to control dataset auto-creation

Browse files

When set to false, HuggingClaw will not auto-create the dataset repo
and will prompt the user to create it manually on HuggingFace.
Default: true (existing behavior preserved).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (3) hide show
  1. .env.example +50 -12
  2. README.md +27 -3
  3. scripts/sync_hf.py +41 -14
.env.example CHANGED
@@ -49,6 +49,13 @@ HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
49
  #
50
  OPENCLAW_DATASET_REPO=your-username/openclaw-data
51
 
 
 
 
 
 
 
 
52
  # How often (in seconds) to back up data to the Dataset repo.
53
  # Lower values = safer but more API calls to HuggingFace.
54
  #
@@ -57,26 +64,40 @@ OPENCLAW_DATASET_REPO=your-username/openclaw-data
57
  # SYNC_INTERVAL=120
58
 
59
 
60
- # ─── LLM PROVIDER ────────────────────────────────────────────────────────
61
  #
62
- # OpenClaw needs at least one LLM provider to power AI conversations.
63
- # OpenRouter is recommended β€” one API key, 200+ models, free tier included.
64
  #
65
- # OpenRouter API Key
66
- # Get one at: https://openrouter.ai/keys
67
- # Free-tier models: stepfun/step-3.5-flash:free, deepseek/deepseek-chat:free
68
  #
69
- # [RECOMMENDED]
 
 
 
 
 
 
 
 
 
70
  #
71
  OPENROUTER_API_KEY=sk-or-v1-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
72
 
73
- # Default model for new conversations.
74
- # Must be a model available from your configured provider.
75
- # Format: provider/model-id (e.g., openrouter/deepseek/deepseek-chat:free)
 
 
 
 
 
76
  #
77
- # [OPTIONAL] Default: openrouter/stepfun/step-3.5-flash:free
78
  #
79
- # OPENCLAW_DEFAULT_MODEL=openrouter/deepseek/deepseek-chat:free
80
 
81
 
82
  # ─── PERFORMANCE ──────────────────────────────────────────────────────────
@@ -97,3 +118,20 @@ OPENROUTER_API_KEY=sk-or-v1-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
97
  # [OPTIONAL] Default: UTC
98
  #
99
  # TZ=Asia/Shanghai
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  #
50
  OPENCLAW_DATASET_REPO=your-username/openclaw-data
51
 
52
+ # Whether to auto-create the Dataset repo if it doesn't exist.
53
+ # Set to false if you prefer to create the repo manually on HuggingFace.
54
+ #
55
+ # [OPTIONAL] Default: true
56
+ #
57
+ # AUTO_CREATE_DATASET=true
58
+
59
  # How often (in seconds) to back up data to the Dataset repo.
60
  # Lower values = safer but more API calls to HuggingFace.
61
  #
 
64
  # SYNC_INTERVAL=120
65
 
66
 
67
+ # ─── LLM / OPENAI-COMPATIBLE API ───────────────────────────────────────────
68
  #
69
+ # OpenClaw supports any OpenAI-compatible API. Set the API key for the
70
+ # provider(s) you use. See OpenClaw docs: https://openclawdoc.com/docs/reference/environment-variables
71
  #
72
+ # OpenAI (or any OpenAI-compatible endpoint)
73
+ # Use OPENAI_API_KEY alone for api.openai.com, or set OPENAI_BASE_URL for
74
+ # compatible endpoints (e.g. OpenRouter, local LLM servers, Azure OpenAI).
75
  #
76
+ # [RECOMMENDED] At least one of the following for AI conversations
77
+ #
78
+ OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
79
+
80
+ # Optional: base URL for OpenAI-compatible API (default: https://api.openai.com/v1)
81
+ # Examples: https://openrouter.ai/api/v1, http://localhost:11434/v1 (Ollama), etc.
82
+ #
83
+ # OPENAI_BASE_URL=https://api.openai.com/v1
84
+
85
+ # OpenRouter β€” one key, 200+ models, free tier: https://openrouter.ai/keys
86
  #
87
  OPENROUTER_API_KEY=sk-or-v1-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
88
 
89
+ # Other providers (OpenClaw reads these from the environment)
90
+ #
91
+ # ANTHROPIC_API_KEY=sk-ant-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
92
+ # GOOGLE_API_KEY=AIza...
93
+ # MISTRAL_API_KEY=mis-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
94
+ # COHERE_API_KEY=co-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
95
+
96
+ # Default model for new conversations (must exist in your configured provider).
97
  #
98
+ # [OPTIONAL] Examples: openai/gpt-4o-mini, openrouter/deepseek/deepseek-chat:free
99
  #
100
+ # OPENCLAW_DEFAULT_MODEL=openai/gpt-4o-mini
101
 
102
 
103
  # ─── PERFORMANCE ──────────────────────────────────────────────────────────
 
118
  # [OPTIONAL] Default: UTC
119
  #
120
  # TZ=Asia/Shanghai
121
+
122
+
123
+ # ─── OPENCLAW (from official docs) ─────────────────────────────────────────
124
+ #
125
+ # Optional overrides. See https://openclawdoc.com/docs/reference/environment-variables
126
+ #
127
+ # OPENCLAW_HOME=~/.openclaw
128
+ # OPENCLAW_STATE_DIR=~/.openclaw
129
+ # OPENCLAW_CONFIG_PATH=~/.openclaw/openclaw.json
130
+ # OPENCLAW_LOG_LEVEL=info
131
+ # OPENCLAW_API_PORT=8080
132
+ # OPENCLAW_WS_PORT=8081
133
+ # OPENCLAW_HOST=0.0.0.0
134
+ # OLLAMA_HOST=http://localhost:11434
135
+ # OPENCLAW_HTTP_PROXY=
136
+ # OPENCLAW_HTTPS_PROXY=
137
+ # OPENCLAW_NO_PROXY=
README.md CHANGED
@@ -8,6 +8,26 @@ pinned: false
8
  license: mit
9
  short_description: Deploy OpenClaw on HuggingFace Spaces
10
  app_port: 7860
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  <div align="center">
@@ -63,7 +83,11 @@ Go to **Settings β†’ Repository secrets** and configure:
63
  | `OPENCLAW_PASSWORD` | Recommended | Password for the Control UI (default: `huggingclaw`) |
64
  | `HF_TOKEN` | **Required** | HF Access Token with write permission ([create one](https://huggingface.co/settings/tokens)) |
65
  | `OPENCLAW_DATASET_REPO` | **Required** | Dataset repo for backup, e.g. `your-name/openclaw-data` |
66
- | `OPENROUTER_API_KEY` | Recommended | [OpenRouter](https://openrouter.ai) API key for LLM access |
 
 
 
 
67
 
68
  > For the full list of environment variables, see [`.env.example`](.env.example).
69
 
@@ -80,8 +104,8 @@ HuggingClaw is configured entirely through **environment variables**. A fully do
80
  | Category | Variables | Purpose |
81
  |----------|-----------|---------|
82
  | **Security** | `OPENCLAW_PASSWORD` | Protect the Control UI with a password |
83
- | **Persistence** | `HF_TOKEN`, `OPENCLAW_DATASET_REPO`, `SYNC_INTERVAL` | Auto-backup to HF Dataset |
84
- | **LLM** | `OPENROUTER_API_KEY`, `OPENCLAW_DEFAULT_MODEL` | Power AI conversations |
85
  | **Performance** | `NODE_MEMORY_LIMIT` | Tune Node.js memory usage |
86
  | **Locale** | `TZ` | Set timezone for logs |
87
 
 
8
  license: mit
9
  short_description: Deploy OpenClaw on HuggingFace Spaces
10
  app_port: 7860
11
+ tags:
12
+ - chatbot
13
+ - llm
14
+ - openclaw
15
+ - ai-assistant
16
+ - whatsapp
17
+ - telegram
18
+ - text-generation
19
+ - openai-api
20
+ - huggingface-spaces
21
+ - docker
22
+ - deployment
23
+ - persistent-storage
24
+ - agents
25
+ - multi-channel
26
+ - openai-compatible
27
+ - free-tier
28
+ - one-click-deploy
29
+ - self-hosted
30
+ - messaging-bot
31
  ---
32
 
33
  <div align="center">
 
83
  | `OPENCLAW_PASSWORD` | Recommended | Password for the Control UI (default: `huggingclaw`) |
84
  | `HF_TOKEN` | **Required** | HF Access Token with write permission ([create one](https://huggingface.co/settings/tokens)) |
85
  | `OPENCLAW_DATASET_REPO` | **Required** | Dataset repo for backup, e.g. `your-name/openclaw-data` |
86
+ | `OPENAI_API_KEY` | Recommended | OpenAI (or any [OpenAI-compatible](https://openclawdoc.com/docs/reference/environment-variables)) API key for LLM |
87
+ | `OPENROUTER_API_KEY` | Optional | [OpenRouter](https://openrouter.ai) API key (200+ models, free tier) |
88
+ | `ANTHROPIC_API_KEY` | Optional | Anthropic Claude API key |
89
+ | `GOOGLE_API_KEY` | Optional | Google / Gemini API key |
90
+ | `OPENCLAW_DEFAULT_MODEL` | Optional | Default model, e.g. `openai/gpt-4o-mini` or `openrouter/deepseek/deepseek-chat:free` |
91
 
92
  > For the full list of environment variables, see [`.env.example`](.env.example).
93
 
 
104
  | Category | Variables | Purpose |
105
  |----------|-----------|---------|
106
  | **Security** | `OPENCLAW_PASSWORD` | Protect the Control UI with a password |
107
+ | **Persistence** | `HF_TOKEN`, `OPENCLAW_DATASET_REPO`, `AUTO_CREATE_DATASET`, `SYNC_INTERVAL` | Auto-backup to HF Dataset |
108
+ | **LLM (OpenAI-compatible)** | `OPENAI_API_KEY`, `OPENAI_BASE_URL`, `OPENROUTER_API_KEY`, `ANTHROPIC_API_KEY`, `GOOGLE_API_KEY`, `MISTRAL_API_KEY`, `COHERE_API_KEY`, `OPENCLAW_DEFAULT_MODEL` | Power AI conversations ([OpenClaw env reference](https://openclawdoc.com/docs/reference/environment-variables)) |
109
  | **Performance** | `NODE_MEMORY_LIMIT` | Tune Node.js memory usage |
110
  | **Locale** | `TZ` | Set timezone for logs |
111
 
scripts/sync_hf.py CHANGED
@@ -62,20 +62,27 @@ TELEGRAM_BOT_TOKEN = os.environ.get("TELEGRAM_BOT_TOKEN", "")
62
  TELEGRAM_BOT_NAME = os.environ.get("TELEGRAM_BOT_NAME", "")
63
  TELEGRAM_ALLOW_USER = os.environ.get("TELEGRAM_ALLOW_USER", "")
64
 
65
- # OpenRouter API key for LLM access
 
 
 
 
66
  OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "")
67
 
68
  # Gateway password (override via HF Secret OPENCLAW_PASSWORD)
69
  OPENCLAW_PASSWORD = os.environ.get("OPENCLAW_PASSWORD", "huggingclaw")
70
 
71
- # Default model for new conversations
72
- OPENCLAW_DEFAULT_MODEL = os.environ.get("OPENCLAW_DEFAULT_MODEL", "openrouter/stepfun/step-3.5-flash:free")
 
 
73
 
74
  # HF Spaces built-in env vars (auto-set by HF runtime)
75
  SPACE_HOST = os.environ.get("SPACE_HOST", "") # e.g. "tao-shen-huggingclaw.hf.space"
76
  SPACE_ID = os.environ.get("SPACE_ID", "") # e.g. "tao-shen/HuggingClaw"
77
 
78
  SYNC_INTERVAL = int(os.environ.get("SYNC_INTERVAL", "120"))
 
79
 
80
  # Setup logging
81
  log_dir = OPENCLAW_HOME / "workspace"
@@ -107,12 +114,17 @@ class OpenClawFullSync:
107
  # ── Repo management ────────────────────────────────────────────────
108
 
109
  def _ensure_repo_exists(self):
110
- """Check if dataset repo exists; auto-create if not."""
111
  try:
112
  self.api.repo_info(repo_id=HF_REPO_ID, repo_type="dataset")
113
  print(f"[SYNC] Dataset repo found: {HF_REPO_ID}")
114
  return True
115
  except Exception:
 
 
 
 
 
116
  print(f"[SYNC] Dataset repo NOT found: {HF_REPO_ID} - creating...")
117
  try:
118
  self.api.create_repo(
@@ -273,12 +285,17 @@ class OpenClawFullSync:
273
  if "gateway" in cfg and "auth" in cfg["gateway"]:
274
  if cfg["gateway"]["auth"].get("password") == "__OPENCLAW_PASSWORD__":
275
  cfg["gateway"]["auth"]["password"] = OPENCLAW_PASSWORD
 
 
 
 
 
 
 
276
  if OPENROUTER_API_KEY:
277
- # Replace placeholder with actual key
278
  if "models" in cfg and "providers" in cfg["models"] and "openrouter" in cfg["models"]["providers"]:
279
  cfg["models"]["providers"]["openrouter"]["apiKey"] = OPENROUTER_API_KEY
280
  else:
281
- # No API key: remove provider entirely to avoid config validation error
282
  if "models" in cfg and "providers" in cfg["models"]:
283
  cfg["models"]["providers"].pop("openrouter", None)
284
  print("[SYNC] No OPENROUTER_API_KEY β€” removed openrouter provider from config")
@@ -368,8 +385,16 @@ class OpenClawFullSync:
368
  data.setdefault("agents", {}).setdefault("defaults", {}).setdefault("model", {})
369
  data.setdefault("session", {})["scope"] = "global"
370
 
371
- # Force OpenRouter provider
372
  data.setdefault("models", {}).setdefault("providers", {})
 
 
 
 
 
 
 
 
373
  if OPENROUTER_API_KEY:
374
  data["models"]["providers"]["openrouter"] = {
375
  "baseUrl": "https://openrouter.ai/api/v1",
@@ -380,9 +405,9 @@ class OpenClawFullSync:
380
  {"id": "deepseek/deepseek-chat:free", "name": "DeepSeek V3 (Free)"}
381
  ]
382
  }
383
- else:
384
- print("[SYNC] WARNING: OPENROUTER_API_KEY not set, skipping provider config")
385
- # Remove old gemini provider if present
386
  data["models"]["providers"].pop("gemini", None)
387
  data["agents"]["defaults"]["model"]["primary"] = OPENCLAW_DEFAULT_MODEL
388
 
@@ -492,13 +517,15 @@ class OpenClawFullSync:
492
  # Open log file
493
  log_fh = open(log_file, "a")
494
 
495
- # Prepare environment with required variables
496
  env = os.environ.copy()
 
 
 
497
  if OPENROUTER_API_KEY:
498
  env["OPENROUTER_API_KEY"] = OPENROUTER_API_KEY
499
- print(f"[SYNC] Setting OPENROUTER_API_KEY environment variable")
500
- else:
501
- print(f"[SYNC] WARNING: OPENROUTER_API_KEY not set, LLM features may not work")
502
  try:
503
  # Use Popen without shell to avoid pipe issues
504
  # auth disabled in config β€” no token needed
 
62
  TELEGRAM_BOT_NAME = os.environ.get("TELEGRAM_BOT_NAME", "")
63
  TELEGRAM_ALLOW_USER = os.environ.get("TELEGRAM_ALLOW_USER", "")
64
 
65
+ # OpenAI-compatible API (OpenAI, OpenRouter, or any compatible endpoint)
66
+ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
67
+ OPENAI_BASE_URL = os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1").rstrip("/")
68
+
69
+ # OpenRouter API key (optional; alternative to OPENAI_API_KEY + OPENAI_BASE_URL)
70
  OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "")
71
 
72
  # Gateway password (override via HF Secret OPENCLAW_PASSWORD)
73
  OPENCLAW_PASSWORD = os.environ.get("OPENCLAW_PASSWORD", "huggingclaw")
74
 
75
+ # Default model for new conversations (infer from provider if not set)
76
+ OPENCLAW_DEFAULT_MODEL = os.environ.get("OPENCLAW_DEFAULT_MODEL") or (
77
+ "openai/gpt-4o-mini" if OPENAI_API_KEY else "openrouter/stepfun/step-3.5-flash:free"
78
+ )
79
 
80
  # HF Spaces built-in env vars (auto-set by HF runtime)
81
  SPACE_HOST = os.environ.get("SPACE_HOST", "") # e.g. "tao-shen-huggingclaw.hf.space"
82
  SPACE_ID = os.environ.get("SPACE_ID", "") # e.g. "tao-shen/HuggingClaw"
83
 
84
  SYNC_INTERVAL = int(os.environ.get("SYNC_INTERVAL", "120"))
85
+ AUTO_CREATE_DATASET = os.environ.get("AUTO_CREATE_DATASET", "true").lower() in ("true", "1", "yes")
86
 
87
  # Setup logging
88
  log_dir = OPENCLAW_HOME / "workspace"
 
114
  # ── Repo management ────────────────────────────────────────────────
115
 
116
  def _ensure_repo_exists(self):
117
+ """Check if dataset repo exists; auto-create if AUTO_CREATE_DATASET is enabled."""
118
  try:
119
  self.api.repo_info(repo_id=HF_REPO_ID, repo_type="dataset")
120
  print(f"[SYNC] Dataset repo found: {HF_REPO_ID}")
121
  return True
122
  except Exception:
123
+ if not AUTO_CREATE_DATASET:
124
+ print(f"[SYNC] Dataset repo NOT found: {HF_REPO_ID}")
125
+ print(f"[SYNC] AUTO_CREATE_DATASET is disabled. Please create the dataset repo manually.")
126
+ print(f"[SYNC] β†’ https://huggingface.co/new-dataset")
127
+ return False
128
  print(f"[SYNC] Dataset repo NOT found: {HF_REPO_ID} - creating...")
129
  try:
130
  self.api.create_repo(
 
285
  if "gateway" in cfg and "auth" in cfg["gateway"]:
286
  if cfg["gateway"]["auth"].get("password") == "__OPENCLAW_PASSWORD__":
287
  cfg["gateway"]["auth"]["password"] = OPENCLAW_PASSWORD
288
+ if OPENAI_API_KEY and "models" in cfg and "providers" in cfg["models"] and "openai" in cfg["models"]["providers"]:
289
+ cfg["models"]["providers"]["openai"]["apiKey"] = OPENAI_API_KEY
290
+ if OPENAI_BASE_URL:
291
+ cfg["models"]["providers"]["openai"]["baseUrl"] = OPENAI_BASE_URL
292
+ elif "models" in cfg and "providers" in cfg["models"]:
293
+ if not OPENAI_API_KEY:
294
+ cfg["models"]["providers"].pop("openai", None)
295
  if OPENROUTER_API_KEY:
 
296
  if "models" in cfg and "providers" in cfg["models"] and "openrouter" in cfg["models"]["providers"]:
297
  cfg["models"]["providers"]["openrouter"]["apiKey"] = OPENROUTER_API_KEY
298
  else:
 
299
  if "models" in cfg and "providers" in cfg["models"]:
300
  cfg["models"]["providers"].pop("openrouter", None)
301
  print("[SYNC] No OPENROUTER_API_KEY β€” removed openrouter provider from config")
 
385
  data.setdefault("agents", {}).setdefault("defaults", {}).setdefault("model", {})
386
  data.setdefault("session", {})["scope"] = "global"
387
 
388
+ # OpenAI-compatible provider (OPENAI_API_KEY + optional OPENAI_BASE_URL)
389
  data.setdefault("models", {}).setdefault("providers", {})
390
+ if OPENAI_API_KEY:
391
+ data["models"]["providers"]["openai"] = {
392
+ "baseUrl": OPENAI_BASE_URL,
393
+ "apiKey": OPENAI_API_KEY,
394
+ "api": "openai-completions",
395
+ }
396
+ print(f"[SYNC] Set OpenAI-compatible provider (baseUrl={OPENAI_BASE_URL})")
397
+ # OpenRouter provider (optional)
398
  if OPENROUTER_API_KEY:
399
  data["models"]["providers"]["openrouter"] = {
400
  "baseUrl": "https://openrouter.ai/api/v1",
 
405
  {"id": "deepseek/deepseek-chat:free", "name": "DeepSeek V3 (Free)"}
406
  ]
407
  }
408
+ print("[SYNC] Set OpenRouter provider")
409
+ if not OPENAI_API_KEY and not OPENROUTER_API_KEY:
410
+ print("[SYNC] WARNING: No OPENAI_API_KEY or OPENROUTER_API_KEY set, LLM features may not work")
411
  data["models"]["providers"].pop("gemini", None)
412
  data["agents"]["defaults"]["model"]["primary"] = OPENCLAW_DEFAULT_MODEL
413
 
 
517
  # Open log file
518
  log_fh = open(log_file, "a")
519
 
520
+ # Prepare environment (all API keys passed through for OpenClaw)
521
  env = os.environ.copy()
522
+ if OPENAI_API_KEY:
523
+ env["OPENAI_API_KEY"] = OPENAI_API_KEY
524
+ env["OPENAI_BASE_URL"] = OPENAI_BASE_URL
525
  if OPENROUTER_API_KEY:
526
  env["OPENROUTER_API_KEY"] = OPENROUTER_API_KEY
527
+ if not OPENAI_API_KEY and not OPENROUTER_API_KEY:
528
+ print(f"[SYNC] WARNING: No OPENAI_API_KEY or OPENROUTER_API_KEY set, LLM features may not work")
 
529
  try:
530
  # Use Popen without shell to avoid pipe issues
531
  # auth disabled in config β€” no token needed