# OpenClaw on Hugging Face Spaces - 多模型配置版 FROM node:22-slim # 1. 基础依赖 RUN apt-get update && apt-get install -y --no-install-recommends \ git openssh-client build-essential python3 python3-pip \ g++ make ca-certificates curl jq \ && rm -rf /var/lib/apt/lists/* # 2. 安装 Hugging Face Hub RUN pip3 install --no-cache-dir huggingface_hub --break-system-packages # 3. Git 配置 RUN update-ca-certificates && \ git config --global http.sslVerify false && \ git config --global url."https://github.com/".insteadOf ssh://git@github.com/ # 4. 安装 OpenClaw RUN npm install -g openclaw@latest --unsafe-perm # 5. 环境变量预设 - 只保留非模型相关的配置 ENV \ # 基础配置 PORT=${PORT:-7860} \ NODE_ENV=${NODE_ENV:-production} \ HOME=${HOME:-/root} \ \ # OpenClaw 核心配置 OPENCLAW_GATEWAY_MODE=${OPENCLAW_GATEWAY_MODE:-local} \ OPENCLAW_GATEWAY_TOKEN=${OPENCLAW_GATEWAY_TOKEN:-} \ \ # 模型配置 - 单个JSON环境变量(包含所有模型和mode) MODELS_CONFIG=${MODELS_CONFIG:-'{"mode":"merge","providers":{},"primary":""}'} \ \ # 网关配置 GATEWAY_AUTH_MODE=${GATEWAY_AUTH_MODE:-token} \ GATEWAY_BIND=${GATEWAY_BIND:-lan} \ GATEWAY_TRUSTED_PROXIES=${GATEWAY_TRUSTED_PROXIES:-0.0.0.0/0,10.0.0.0/8,172.16.0.0/12,192.168.0.0/16} \ GATEWAY_ALLOWED_ORIGINS=${GATEWAY_ALLOWED_ORIGINS:-https://control.example.com} \ \ # 控制UI配置 CONTROLUI_ALLOW_INSECURE_AUTH=${CONTROLUI_ALLOW_INSECURE_AUTH:-true} \ CONTROLUI_DANGEROUS_HOST_HEADER=${CONTROLUI_DANGEROUS_HOST_HEADER:-true} \ CONTROLUI_DANGEROUS_DISABLE_DEVICE_AUTH=${CONTROLUI_DANGEROUS_DISABLE_DEVICE_AUTH:-true} \ \ # Telegram 配置 TELEGRAM_ENABLED=${TELEGRAM_ENABLED:-false} \ TELEGRAM_BOT_TOKEN=${TELEGRAM_BOT_TOKEN:-} \ TELEGRAM_DM_POLICY=${TELEGRAM_DM_POLICY:-allowlist} \ TELEGRAM_ALLOW_FROM=${TELEGRAM_ALLOW_FROM:-} \ TELEGRAM_PROXY_HOST=${TELEGRAM_PROXY_HOST:-} \ \ # 备份配置 BACKUP_ENABLED=${BACKUP_ENABLED:-true} \ BACKUP_INTERVAL=${BACKUP_INTERVAL:-21600} \ BACKUP_RETENTION_DAYS=${BACKUP_RETENTION_DAYS:-5} \ HF_DATASET=${HF_DATASET:-} \ HF_TOKEN=${HF_TOKEN:-} \ # 备份排除列表 - JSON数组格式,默认排除 completions 和 npm 目录 BACKUP_EXCLUDE=${BACKUP_EXCLUDE:-'["/root/.openclaw/completions","/root/.openclaw/npm"]'} # 6. 模型配置解析脚本 RUN cat > /usr/local/bin/parse-models-config.py << 'PARSE_EOF' #!/usr/bin/env python3 import json import os import sys def parse_models_config(): """解析 MODELS_CONFIG 环境变量,生成完整的 OpenClaw 配置""" # 获取环境变量 models_config_json = os.getenv("MODELS_CONFIG", "{}") # 默认配置 default_config = { "mode": "merge", # 默认值为 merge "providers": {}, "primary": "" } try: # 解析 JSON if models_config_json and models_config_json != "{}": config = json.loads(models_config_json) else: config = {} # 合并默认值 mode = config.get("mode", default_config["mode"]) providers = config.get("providers", {}) primary = config.get("primary", "") # 构建结果 result = { "mode": mode, "providers": {}, "primary": primary, "agents_models": {} } # 处理每个 provider for provider_name, provider_config in providers.items(): # 验证必要字段 base_url = provider_config.get("baseUrl", "") api_key = provider_config.get("apiKey", "") api_type = provider_config.get("api", "openai-completions") models = provider_config.get("models", []) if not base_url or not api_key: print(f"⚠️ Provider {provider_name} 缺少 baseUrl 或 apiKey,跳过") continue # 添加 provider 配置 result["providers"][provider_name] = { "baseUrl": base_url, "apiKey": api_key, "api": api_type, "models": models } # 为每个模型生成 agents.models 条目 for model in models: model_id = model.get("id") model_name = model.get("name", model_id) if model_id: key = f"{provider_name}/{model_id}" result["agents_models"][key] = { "alias": model_name } # 如果没有设置 primary 但有模型,使用第一个模型的完整ID if not result["primary"] and result["agents_models"]: first_model = list(result["agents_models"].keys())[0] result["primary"] = first_model print(f"ℹ️ 未设置主模型,自动使用: {first_model}") return result except json.JSONDecodeError as e: print(f"❌ MODELS_CONFIG JSON 解析失败: {e}") print(f"📄 收到的配置: {models_config_json[:200]}...") # 返回默认配置 return { "mode": "merge", "providers": {}, "primary": "", "agents_models": {} } def generate_full_config(): """生成完整的 OpenClaw 配置""" parsed = parse_models_config() # 构建完整的配置结构 full_config = { "models": { "mode": parsed["mode"], "providers": parsed["providers"] }, "agents": { "defaults": { "model": { "primary": parsed["primary"] }, "models": parsed["agents_models"], "workspace": "/root/.openclaw/workspace" } } } return full_config if __name__ == "__main__": if len(sys.argv) > 1: if sys.argv[1] == "--full": # 输出完整配置 config = generate_full_config() print(json.dumps(config, indent=2)) elif sys.argv[1] == "--validate": # 验证配置 parsed = parse_models_config() provider_count = len(parsed["providers"]) model_count = len(parsed["agents_models"]) if provider_count == 0: print("⚠️ 没有配置任何 provider,将使用空配置") else: print(f"✅ 配置验证成功:") print(f" • 模式: {parsed['mode']}") print(f" • Providers: {provider_count}") print(f" • 模型总数: {model_count}") if parsed["primary"]: print(f" • 主模型: {parsed['primary']}") else: print(f" ⚠️ 未设置主模型") # 列出所有配置的模型 if model_count > 0: print("\n📋 已配置的模型:") for model_key in parsed["agents_models"].keys(): print(f" • {model_key}") sys.exit(0 if provider_count > 0 else 1) else: # 默认输出解析结果 parsed = parse_models_config() print(json.dumps(parsed, indent=2)) PARSE_EOF RUN chmod +x /usr/local/bin/parse-models-config.py # 7. 同步脚本 - 备份和恢复整个 .openclaw 目录(支持排除列表) RUN cat > /usr/local/bin/sync.py << 'SYNC_EOF' #!/usr/bin/env python3 import os import sys import tarfile import shutil import re import json from huggingface_hub import HfApi, hf_hub_download from datetime import datetime, timedelta api = HfApi() repo_id = os.getenv("HF_DATASET", "") token = os.getenv("HF_TOKEN", "") retention_days = int(os.getenv("BACKUP_RETENTION_DAYS", "5")) OPENCLAW_DIR = "/root/.openclaw" BACKUP_DIR = "/tmp/openclaw_backups" def get_exclude_patterns(): """获取排除列表(支持文件和目录)""" exclude_json = os.getenv("BACKUP_EXCLUDE", '["/root/.openclaw/completions","/root/.openclaw/npm"]') try: exclude_list = json.loads(exclude_json) if not isinstance(exclude_list, list): print(f"⚠️ BACKUP_EXCLUDE 不是数组格式,使用默认排除列表") exclude_list = ["/root/.openclaw/completions", "/root/.openclaw/npm"] # 标准化路径(去除末尾斜杠) exclude_list = [p.rstrip('/') for p in exclude_list] print(f"📋 备份排除列表: {exclude_list if exclude_list else '无'}") return exclude_list except json.JSONDecodeError as e: print(f"⚠️ BACKUP_EXCLUDE JSON 解析失败: {e}") print(f" 使用默认排除列表") return ["/root/.openclaw/completions", "/root/.openclaw/npm"] def should_exclude(path, exclude_patterns): """检查路径是否应该被排除""" if not exclude_patterns: return False # 标准化路径 normalized_path = os.path.normpath(path) for pattern in exclude_patterns: normalized_pattern = os.path.normpath(pattern) # 完全匹配 if normalized_path == normalized_pattern: return True # 路径以排除模式开头(排除目录下的所有内容) if normalized_path.startswith(normalized_pattern + os.sep) or normalized_path.startswith(normalized_pattern + '/'): return True # 如果pattern是目录,且当前路径是该目录的直接子项 if os.path.exists(pattern) and os.path.isdir(pattern): parent_dir = os.path.dirname(normalized_path) if parent_dir == normalized_pattern: return True return False def ensure_backup_dir(): """确保备份临时目录存在""" os.makedirs(BACKUP_DIR, exist_ok=True) def get_backup_timestamp(): """获取备份文件的时间戳(从文件名中提取)""" def extract_timestamp(filename): # 匹配新格式: backup_2026-03-23_1774234484.tar.gz match = re.search(r'backup_(\d{4}-\d{2}-\d{2})_(\d+)\.tar\.gz', filename) if match: date_part, timestamp = match.groups() return int(timestamp) # 匹配旧格式: backup_2026-03-22.tar.gz (兼容) match_old = re.search(r'backup_(\d{4}-\d{2}-\d{2})\.tar\.gz', filename) if match_old: # 旧格式没有时间戳,返回日期对应的秒数作为排序依据 date_str = match_old.group(1) try: dt = datetime.strptime(date_str, "%Y-%m-%d") return int(dt.timestamp()) except: return 0 return 0 return extract_timestamp def get_latest_backup(files): """获取最新的备份文件(按时间戳排序)""" if not files: return None extract_timestamp = get_backup_timestamp() # 按时间戳排序,最新的在前 files_with_timestamp = [(f, extract_timestamp(f)) for f in files] files_with_timestamp.sort(key=lambda x: x[1], reverse=True) return files_with_timestamp[0][0] if files_with_timestamp else None def restore(): """从 Hugging Face Dataset 恢复最新的 .openclaw 目录""" if not repo_id or not token: print("⚠️ HF_DATASET 或 HF_TOKEN 未设置,跳过恢复") return False if not os.path.exists(OPENCLAW_DIR): os.makedirs(OPENCLAW_DIR, mode=0o755, exist_ok=True) try: # 列出数据集中的所有文件 files = api.list_repo_files(repo_id=repo_id, repo_type="dataset", token=token) backup_files = [f for f in files if (f.startswith("backup_") and f.endswith(".tar.gz"))] if not backup_files: print("ℹ️ 没有找到备份文件") return False # 按时间戳排序,获取最新的备份 latest_backup = get_latest_backup(backup_files) if not latest_backup: print("⚠️ 无法识别任何备份文件") return False print(f"📥 发现最新备份: {latest_backup}") # 下载备份文件 ensure_backup_dir() local_backup_path = os.path.join(BACKUP_DIR, latest_backup) print(f"⬇️ 下载备份文件...") downloaded_path = hf_hub_download( repo_id=repo_id, filename=latest_backup, repo_type="dataset", token=token, local_dir=BACKUP_DIR, local_dir_use_symlinks=False ) # 创建临时恢复目录 restore_temp = os.path.join(BACKUP_DIR, "restore_temp") if os.path.exists(restore_temp): shutil.rmtree(restore_temp) os.makedirs(restore_temp) # 解压备份文件 print(f"📦 解压备份文件...") with tarfile.open(downloaded_path, "r:gz") as tar: tar.extractall(path=restore_temp) # 检查解压后的内容 extracted_items = os.listdir(restore_temp) print(f"解压内容: {extracted_items}") # 情况1: 解压后直接是 .openclaw 目录 if ".openclaw" in extracted_items: source_dir = os.path.join(restore_temp, ".openclaw") # 情况2: 解压后是目录内容(sessions, openclaw.json 等) elif set(extracted_items) & {"sessions", "openclaw.json", "workspace"}: source_dir = restore_temp else: print(f"❌ 无法识别的备份格式: {extracted_items}") return False # 备份当前目录(如果需要) if os.path.exists(OPENCLAW_DIR) and os.listdir(OPENCLAW_DIR): backup_current = os.path.join(BACKUP_DIR, "current_before_restore") if os.path.exists(backup_current): shutil.rmtree(backup_current) shutil.copytree(OPENCLAW_DIR, backup_current) print(f"💾 已备份当前目录到: {backup_current}") # 清空并恢复目标目录 print(f"🔄 恢复数据到 {OPENCLAW_DIR}...") if os.path.exists(OPENCLAW_DIR): # 删除所有内容但不删除目录本身 for item in os.listdir(OPENCLAW_DIR): item_path = os.path.join(OPENCLAW_DIR, item) if os.path.isfile(item_path): os.remove(item_path) elif os.path.isdir(item_path): shutil.rmtree(item_path) # 复制所有文件 for item in os.listdir(source_dir): src = os.path.join(source_dir, item) dst = os.path.join(OPENCLAW_DIR, item) if os.path.isdir(src): shutil.copytree(src, dst) else: shutil.copy2(src, dst) print(f"✅ 恢复完成!") # 清理临时文件 shutil.rmtree(restore_temp) os.remove(downloaded_path) return True except Exception as e: print(f"❌ 恢复失败: {e}") return False def backup(): """备份整个 .openclaw 目录到 Hugging Face Dataset(支持排除列表)""" if not repo_id or not token: print("⚠️ HF_DATASET 或 HF_TOKEN 未设置,跳过备份") return if not os.path.exists(OPENCLAW_DIR) or not os.listdir(OPENCLAW_DIR): print("ℹ️ .openclaw 目录为空,跳过备份") return try: # 获取排除列表 exclude_patterns = get_exclude_patterns() # 生成备份文件名:backup_日期_时间戳.tar.gz today = datetime.now().strftime("%Y-%m-%d") timestamp = int(datetime.now().timestamp()) backup_filename = f"backup_{today}_{timestamp}.tar.gz" backup_path = os.path.join(BACKUP_DIR, backup_filename) ensure_backup_dir() print(f"📦 创建备份: {backup_filename}") # 统计排除的文件/目录数量 excluded_count = 0 included_count = 0 # 创建备份(带排除功能) with tarfile.open(backup_path, "w:gz") as tar: # 遍历 .openclaw 目录 for root, dirs, files in os.walk(OPENCLAW_DIR): # 检查当前目录是否应该被排除 if should_exclude(root, exclude_patterns): print(f" ⏭️ 排除目录: {root}") excluded_count += 1 # 跳过整个目录树 dirs[:] = [] continue # 添加文件 for file in files: file_path = os.path.join(root, file) arcname = os.path.relpath(file_path, start=os.path.dirname(OPENCLAW_DIR)) # 检查文件是否应该被排除 if should_exclude(file_path, exclude_patterns): print(f" ⏭️ 排除文件: {file_path}") excluded_count += 1 continue # 添加到压缩包 tar.add(file_path, arcname=arcname) included_count += 1 print(f"📊 备份统计:") print(f" • 已包含: {included_count} 个文件/目录") print(f" • 已排除: {excluded_count} 个文件/目录") # 上传到 Hugging Face print(f"⬆️ 上传到 Hugging Face Dataset: {repo_id}") api.upload_file( path_or_fileobj=backup_path, path_in_repo=backup_filename, repo_id=repo_id, repo_type="dataset", token=token ) print(f"✅ 备份完成: {backup_filename}") # 清理旧备份 try: files = api.list_repo_files(repo_id=repo_id, repo_type="dataset", token=token) backup_files = [f for f in files if f.startswith("backup_") and f.endswith(".tar.gz")] # 按时间戳排序(使用提取函数) extract_timestamp = get_backup_timestamp() backup_files_with_ts = [(f, extract_timestamp(f)) for f in backup_files] backup_files_with_ts.sort(key=lambda x: x[1]) # 删除超过保留天数的备份 now = datetime.now().timestamp() while backup_files_with_ts and len(backup_files_with_ts) > retention_days: oldest_file, oldest_ts = backup_files_with_ts.pop(0) # 检查是否超过保留天数 if (now - oldest_ts) > (retention_days * 86400): print(f"🗑️ 删除旧备份: {oldest_file}") api.delete_file( path_in_repo=oldest_file, repo_id=repo_id, repo_type="dataset", token=token ) else: break except Exception as e: print(f"⚠️ 清理旧备份失败: {e}") # 删除本地临时文件 os.remove(backup_path) except Exception as e: print(f"❌ 备份失败: {e}") def list_backups(): """列出所有可用的备份""" if not repo_id or not token: print("⚠️ HF_DATASET 或 HF_TOKEN 未设置") return try: files = api.list_repo_files(repo_id=repo_id, repo_type="dataset", token=token) backup_files = [f for f in files if f.startswith("backup_") and f.endswith(".tar.gz")] # 按时间戳排序 extract_timestamp = get_backup_timestamp() backup_files_with_ts = [(f, extract_timestamp(f)) for f in backup_files] backup_files_with_ts.sort(key=lambda x: x[1], reverse=True) if backup_files_with_ts: print("📋 可用的备份:") for i, (f, ts) in enumerate(backup_files_with_ts, 1): if ts: # 显示人类可读的时间 dt = datetime.fromtimestamp(ts) print(f" {i}. {f} ({dt.strftime('%Y-%m-%d %H:%M:%S')})") else: print(f" {i}. {f}") else: print("ℹ️ 没有找到备份文件") except Exception as e: print(f"❌ 列出备份失败: {e}") if __name__ == "__main__": if len(sys.argv) > 1: if sys.argv[1] == "backup": backup() elif sys.argv[1] == "restore": restore() elif sys.argv[1] == "list": list_backups() else: print(f"未知命令: {sys.argv[1]}") print("可用命令: backup, restore, list") else: # 默认执行恢复 restore() SYNC_EOF RUN chmod +x /usr/local/bin/sync.py # 8. Telegram API 替换脚本 RUN cat > /usr/local/bin/patch-telegram-api << 'PATCH_EOF' #!/bin/bash # 如果设置了 TELEGRAM_PROXY_HOST,则替换所有 Telegram API 地址 if [ -n "$TELEGRAM_PROXY_HOST" ]; then echo "🔧 检测到 TELEGRAM_PROXY_HOST 设置: $TELEGRAM_PROXY_HOST" echo "🔄 开始替换 OpenClaw 中的 Telegram API 地址..." OPENCLAW_DIR="/usr/local/lib/node_modules/openclaw" if [ -d "$OPENCLAW_DIR" ]; then # 统计替换前的匹配数量 MATCH_COUNT=$(grep -r "api.telegram.org" "$OPENCLAW_DIR" 2>/dev/null | wc -l) echo "📊 找到 $MATCH_COUNT 处需要替换的地址" # 执行替换 find "$OPENCLAW_DIR" -type f -name "*.js" -exec sed -i "s|api\\.telegram\\.org|$TELEGRAM_PROXY_HOST|g" {} + # 再次检查是否还有未替换的 REMAINING=$(grep -r "api.telegram.org" "$OPENCLAW_DIR" 2>/dev/null | wc -l) if [ "$REMAINING" -eq 0 ]; then echo "✅ Telegram API 地址替换完成!" echo " 原始地址: api.telegram.org" echo " 新地址: $TELEGRAM_PROXY_HOST" else echo "⚠️ 仍有 $REMAINING 处未替换,尝试二次替换..." find "$OPENCLAW_DIR" -type f \( -name "*.js" -o -name "*.json" -o -name "*.ts" \) -exec sed -i "s|api.telegram.org|$TELEGRAM_PROXY_HOST|g" {} + FINAL_REMAINING=$(grep -r "api.telegram.org" "$OPENCLAW_DIR" 2>/dev/null | wc -l) echo "📊 最终剩余未替换: $FINAL_REMAINING 处" fi # 验证替换结果 echo "🔍 验证替换结果(前3处):" grep -r "$TELEGRAM_PROXY_HOST" "$OPENCLAW_DIR" 2>/dev/null | head -3 | sed 's|.*| &|' else echo "❌ OpenClaw 目录不存在: $OPENCLAW_DIR" fi else echo "ℹ️ 未设置 TELEGRAM_PROXY_HOST,跳过 Telegram API 替换" fi PATCH_EOF RUN chmod +x /usr/local/bin/patch-telegram-api # 9. 启动脚本 RUN cat > /usr/local/bin/start-openclaw << 'START_EOF' #!/bin/bash set -e # 创建必要的目录 mkdir -p /root/.openclaw mkdir -p /root/.openclaw/sessions mkdir -p /root/.openclaw/workspace echo "========================================" echo "OpenClaw Gateway Starting..." echo "========================================" # 尝试恢复数据(如果配置了备份) if [ -n "$HF_DATASET" ] && [ -n "$HF_TOKEN" ]; then echo "🔄 尝试从 Hugging Face 恢复数据..." python3 /usr/local/bin/sync.py restore || echo "⚠️ 恢复失败,继续启动..." fi # 执行 Telegram API 地址替换(如果设置了代理) /usr/local/bin/patch-telegram-api # 生成令牌(如果没设置) if [ -z "$OPENCLAW_GATEWAY_TOKEN" ]; then OPENCLAW_GATEWAY_TOKEN=$(openssl rand -hex 16) echo "🔑 生成的网关令牌: $OPENCLAW_GATEWAY_TOKEN" fi # 转换可信代理列表为JSON数组 TRUSTED_PROXIES_JSON=$(echo "$GATEWAY_TRUSTED_PROXIES" | tr ',' '\n' | awk '{ printf "\"%s\",", $0 }' | sed 's/,$//' | sed 's/^/[/' | sed 's/$/]/') # 转换允许的源列表为JSON数组 ALLOWED_ORIGINS_JSON=$(echo "$GATEWAY_ALLOWED_ORIGINS" | tr ',' '\n' | awk '{ printf "\"%s\",", $0 }' | sed 's/,$//' | sed 's/^/[/' | sed 's/$/]/') # 转换Telegram允许列表为JSON数组 if [ -n "$TELEGRAM_ALLOW_FROM" ]; then TELEGRAM_ALLOW_JSON=$(echo "$TELEGRAM_ALLOW_FROM" | tr ',' '\n' | while read id; do id=$(echo "$id" | xargs) if [[ "$id" =~ ^[0-9]+$ ]]; then echo "\"tg:$id\"" elif [[ "$id" =~ ^tg: ]]; then echo "\"$id\"" elif [[ "$id" =~ ^@ ]]; then echo "\"$id\"" else echo "\"$id\"" fi done | paste -sd ',' | sed 's/^/[/' | sed 's/$/]/') else TELEGRAM_ALLOW_JSON="[]" fi # 创建 OpenClaw 配置(如果配置文件不存在) if [ ! -f "/root/.openclaw/openclaw.json" ]; then echo "📝 创建 OpenClaw 配置文件..." # 验证模型配置 echo "🔍 验证模型配置..." if python3 /usr/local/bin/parse-models-config.py --validate; then # 生成完整的模型配置 echo "✅ 模型配置验证成功,正在生成配置..." MODELS_FULL_CONFIG=$(python3 /usr/local/bin/parse-models-config.py --full) # 提取 models 和 agents 部分 MODELS_JSON=$(echo "$MODELS_FULL_CONFIG" | jq '.models') AGENTS_JSON=$(echo "$MODELS_FULL_CONFIG" | jq '.agents') # 创建完整配置 cat > /root/.openclaw/openclaw.json < /root/.openclaw/openclaw.json <