lydgs commited on
Commit
0b1ce6f
·
verified ·
1 Parent(s): fbd4b19

Update scripts/backup_to_dataset.py

Browse files
Files changed (1) hide show
  1. scripts/backup_to_dataset.py +28 -34
scripts/backup_to_dataset.py CHANGED
@@ -4,34 +4,30 @@ import sys
4
  import datetime
5
  from huggingface_hub import HfApi, login
6
 
7
- # ===== 配置区域 =====
8
- DB_PATH = "/app/server/data/freeapi.db" # 数据库文件路径
9
- DATASET_REPO = "lydgs/freellm-backup" # 替换你的私有数据集
10
- BACKUP_PREFIX = "freellm_backup" # 备份文件前缀
11
- RETENTION_DAYS = 30 # 保留天数(超过则删除)
12
- # ===================
13
 
14
  def backup_database():
15
- # 检查数据库文件
16
  if not os.path.exists(DB_PATH):
17
  print(f"❌ 数据库文件不存在: {DB_PATH}")
18
  return False
19
 
20
- # 生成备份文件名
21
- timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
22
- backup_name = f"{BACKUP_PREFIX}_{timestamp}.sqlite"
23
-
24
- # 获取 Hugging Face Token
25
  token = os.getenv("HF_TOKEN")
26
  if not token:
27
  print("❌ 环境变量 HF_TOKEN 未设置")
28
  return False
29
 
30
- # 登录
 
 
31
  login(token=token)
32
  api = HfApi()
33
 
34
- # 1. 上传当前备份
35
  try:
36
  api.upload_file(
37
  path_or_fileobj=DB_PATH,
@@ -39,33 +35,31 @@ def backup_database():
39
  repo_id=DATASET_REPO,
40
  repo_type="dataset"
41
  )
42
- print(f"✅ 备份成功: https://huggingface.co/datasets/{DATASET_REPO}/blob/main/{backup_name}")
43
  except Exception as e:
44
  print(f"❌ 上传失败: {e}")
45
  return False
46
 
47
- # 2. 清理旧备份(保留最近 RETENTION_DAYS 天的文件
48
  try:
49
  files = api.list_repo_files(repo_id=DATASET_REPO, repo_type="dataset")
50
  now = datetime.datetime.now()
51
- deleted_count = 0
52
  for f in files:
53
- if not f.startswith(BACKUP_PREFIX) or not f.endswith(".sqlite"):
54
  continue
55
- # 解析时间戳部分,文件名格式: prefix_YYYYMMDD_HHMMSS.sqlite
56
  try:
57
- parts = f.replace(".sqlite", "").split("_")
58
- if len(parts) >= 3:
59
- ts_str = f"{parts[1]}_{parts[2]}" # YYYYMMDD_HHMMSS
60
- file_time = datetime.datetime.strptime(ts_str, "%Y%m%d_%H%M%S")
61
- if (now - file_time).days > RETENTION_DAYS:
62
- api.delete_file(path_in_repo=f, repo_id=DATASET_REPO, repo_type="dataset")
63
- print(f"🗑️ 已删除旧备份: {f}")
64
- deleted_count += 1
65
- except Exception as parse_err:
66
- print(f"⚠️ 跳过无法解析的文件: {f}, 原因: {parse_err}")
67
- if deleted_count:
68
- print(f"✅ 清理完成,共删除 {deleted_count} 个旧备份")
69
  else:
70
  print("✅ 没有需要清理的旧备份")
71
  except Exception as e:
@@ -74,10 +68,10 @@ def backup_database():
74
  return True
75
 
76
  if __name__ == "__main__":
77
- print(f"[{datetime.datetime.now()}] 开始备份并清理...")
78
  if backup_database():
79
- print("任务完成")
80
  sys.exit(0)
81
  else:
82
- print("任务失败")
83
  sys.exit(1)
 
4
  import datetime
5
  from huggingface_hub import HfApi, login
6
 
7
+ # ===== 配置 =====
8
+ DB_PATH = "/data/freeapi.db" # 数据库路径(与 DATABASE_URL 一致)
9
+ DATASET_REPO = "lydgs/freellm-backup" # 替换你的数据集名称
10
+ BACKUP_PREFIX = "freeapi_backup" # 备份文件前缀
11
+ RETENTION_DAYS = 30 # 保留天数
12
+ # ================
13
 
14
  def backup_database():
 
15
  if not os.path.exists(DB_PATH):
16
  print(f"❌ 数据库文件不存在: {DB_PATH}")
17
  return False
18
 
 
 
 
 
 
19
  token = os.getenv("HF_TOKEN")
20
  if not token:
21
  print("❌ 环境变量 HF_TOKEN 未设置")
22
  return False
23
 
24
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
25
+ backup_name = f"{BACKUP_PREFIX}_{timestamp}.db"
26
+
27
  login(token=token)
28
  api = HfApi()
29
 
30
+ # 上传当前备份
31
  try:
32
  api.upload_file(
33
  path_or_fileobj=DB_PATH,
 
35
  repo_id=DATASET_REPO,
36
  repo_type="dataset"
37
  )
38
+ print(f"✅ 备份成功: {backup_name}")
39
  except Exception as e:
40
  print(f"❌ 上传失败: {e}")
41
  return False
42
 
43
+ # 清理旧备份(保留最近 RETENTION_DAYS 天)
44
  try:
45
  files = api.list_repo_files(repo_id=DATASET_REPO, repo_type="dataset")
46
  now = datetime.datetime.now()
47
+ deleted = 0
48
  for f in files:
49
+ if not f.startswith(BACKUP_PREFIX) or not f.endswith(".db"):
50
  continue
51
+ # 解析时间戳,文件名格式: freeapi_backup_YYYYMMDD_HHMMSS.db
52
  try:
53
+ ts_str = f.replace(BACKUP_PREFIX + "_", "").replace(".db", "")
54
+ file_time = datetime.datetime.strptime(ts_str, "%Y%m%d_%H%M%S")
55
+ if (now - file_time).days > RETENTION_DAYS:
56
+ api.delete_file(path_in_repo=f, repo_id=DATASET_REPO, repo_type="dataset")
57
+ print(f"🗑️ 已删除旧备份: {f}")
58
+ deleted += 1
59
+ except Exception:
60
+ pass
61
+ if deleted:
62
+ print(f" 清理完成,共删除 {deleted} 个旧备份")
 
 
63
  else:
64
  print("✅ 没有需要清理的旧备份")
65
  except Exception as e:
 
68
  return True
69
 
70
  if __name__ == "__main__":
71
+ print(f"[{datetime.datetime.now()}] 开始备份...")
72
  if backup_database():
73
+ print("备份任务完成")
74
  sys.exit(0)
75
  else:
76
+ print("备份任务失败")
77
  sys.exit(1)