Spaces:
Sleeping
Sleeping
File size: 5,767 Bytes
3f834dd 4cb8f19 3f834dd 40feddf 0b1ce6f 4cb8f19 0b1ce6f 3f834dd 4cb8f19 7d62689 4cb8f19 3f834dd 40feddf 3f834dd 0b1ce6f 4cb8f19 0b1ce6f 40feddf 3f834dd 4cb8f19 3f834dd 4cb8f19 3f834dd 4cb8f19 3f834dd cf7ed23 3f834dd 0b1ce6f 3f834dd 4cb8f19 cf7ed23 0b1ce6f 3f834dd 4cb8f19 40feddf 0b1ce6f 3f834dd 0b1ce6f 3f834dd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 | #!/usr/bin/env python3
import os
import sys
import datetime
import json
import sqlite3
import tarfile
from huggingface_hub import HfApi, login
# 强制 stdout/stderr 实时输出(对 cron 日志很重要)
sys.stdout.reconfigure(line_buffering=True)
sys.stderr.reconfigure(line_buffering=True)
# ===== 配置 =====
DB_PATH = "/app/server/data/freeapi.db" # 数据库路径
DATASET_REPO = "lydgs/freellm-backup" # 数据集名称
BACKUP_PREFIX = "freeapi_backup" # 二进制备份前缀
CONFIG_PREFIX = "config_export" # 配置导出前缀
RETENTION_DAYS = 30
# ================
def export_db_to_json(db_path, temp_dir):
"""将数据库中所有表导出为JSON文件,保存在temp_dir下,返回生成的文件列表"""
if not os.path.exists(db_path):
print(f"⚠️ 数据库文件不存在: {db_path}")
return []
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# 获取所有用户表,排除系统表
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'")
tables = cursor.fetchall()
exported_files = []
for (table_name,) in tables:
cursor.execute(f"SELECT * FROM {table_name}")
rows = cursor.fetchall()
# 获取列名
col_names = [desc[0] for desc in cursor.description]
# 转换为字典列表
data = [dict(zip(col_names, row)) for row in rows]
json_path = os.path.join(temp_dir, f"{table_name}.json")
with open(json_path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
exported_files.append(json_path)
print(f"📄 导出表 {table_name} 到 {json_path} ({len(data)} 行)")
conn.close()
return exported_files
def backup_database():
# 打印调试信息:检查环境变量
print(f"[DEBUG] HF_TOKEN present: {bool(os.getenv('HF_TOKEN'))}")
print(f"[DEBUG] BAILIAN_API_KEY present: {bool(os.getenv('BAILIAN_API_KEY'))}")
print(f"[DEBUG] Database file exists: {os.path.exists(DB_PATH)}")
print(f"[DEBUG] Database file size: {os.path.getsize(DB_PATH) if os.path.exists(DB_PATH) else 'N/A'}")
if not os.path.exists(DB_PATH):
print(f"❌ 数据库文件不存在: {DB_PATH}")
return False
token = os.getenv("HF_TOKEN")
if not token:
print("❌ 环境变量 HF_TOKEN 未设置")
return False
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
backup_name = f"{BACKUP_PREFIX}_{timestamp}.db"
config_tar_name = f"{CONFIG_PREFIX}_{timestamp}.tar.gz"
try:
login(token=token)
print("[DEBUG] Hugging Face login successful")
except Exception as e:
print(f"❌ 登录 Hugging Face 失败: {e}")
return False
api = HfApi()
# 1. 上传二进制备份
try:
api.upload_file(
path_or_fileobj=DB_PATH,
path_in_repo=backup_name,
repo_id=DATASET_REPO,
repo_type="dataset"
)
print(f"✅ 二进制备份成功: {backup_name}")
except Exception as e:
print(f"❌ 二进制上传失败: {e}")
# 2. 导出 JSON 配置并打包上传
try:
temp_export_dir = f"/tmp/config_export_{timestamp}"
os.makedirs(temp_export_dir, exist_ok=True)
export_db_to_json(DB_PATH, temp_export_dir)
# 打包成 tar.gz
tar_path = f"/tmp/{config_tar_name}"
with tarfile.open(tar_path, "w:gz") as tar:
for file in os.listdir(temp_export_dir):
full_path = os.path.join(temp_export_dir, file)
tar.add(full_path, arcname=file)
api.upload_file(
path_or_fileobj=tar_path,
path_in_repo=config_tar_name,
repo_id=DATASET_REPO,
repo_type="dataset"
)
print(f"✅ 配置导出包上传成功: {config_tar_name}")
# 清理临时文件
import shutil
shutil.rmtree(temp_export_dir)
os.remove(tar_path)
except Exception as e:
print(f"❌ 配置导出失败: {e}")
# 3. 清理旧备份
try:
files = api.list_repo_files(repo_id=DATASET_REPO, repo_type="dataset")
now = datetime.datetime.now()
deleted = 0
for f in files:
if f.startswith(BACKUP_PREFIX) or f.startswith(CONFIG_PREFIX):
# 提取时间戳
if f.startswith(BACKUP_PREFIX):
ts_part = f.replace(BACKUP_PREFIX + "_", "").replace(".db", "")
else:
ts_part = f.replace(CONFIG_PREFIX + "_", "").replace(".tar.gz", "")
try:
file_time = datetime.datetime.strptime(ts_part, "%Y%m%d_%H%M%S")
if (now - file_time).days > RETENTION_DAYS:
api.delete_file(path_in_repo=f, repo_id=DATASET_REPO, repo_type="dataset")
print(f"🗑️ 已删除旧备份: {f}")
deleted += 1
except Exception:
pass
if deleted:
print(f"✅ 清理完成,共删除 {deleted} 个旧备份")
else:
print("✅ 没有需要清理的旧备份")
except Exception as e:
print(f"⚠️ 清理旧备份时出错: {e}")
return True
if __name__ == "__main__":
print(f"[{datetime.datetime.now()}] 开始备份及配置导出...")
success = backup_database()
print(f"[DEBUG] backup_database returned: {success}")
if success:
print("备份任务完成")
sys.exit(0)
else:
print("备份任务失败")
sys.exit(1) |