lydgs commited on
Commit
4cb8f19
·
verified ·
1 Parent(s): afab6af

Update scripts/backup_to_dataset.py

Browse files
Files changed (1) hide show
  1. scripts/backup_to_dataset.py +97 -22
scripts/backup_to_dataset.py CHANGED
@@ -2,15 +2,49 @@
2
  import os
3
  import sys
4
  import datetime
 
 
 
5
  from huggingface_hub import HfApi, login
6
 
7
  # ===== 配置 =====
8
- DB_PATH = "/app/server/data/freeapi.db" # 数据库路径(与 DATABASE_URL 一致)
9
- DATASET_REPO = "lydgs/freellm-backup" # 替换为你的数据集名称
10
- BACKUP_PREFIX = "freeapi_backup" # 备份文件前缀
11
- RETENTION_DAYS = 30 # 保留天数
 
12
  # ================
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def backup_database():
15
  if not os.path.exists(DB_PATH):
16
  print(f"❌ 数据库文件不存在: {DB_PATH}")
@@ -23,11 +57,12 @@ def backup_database():
23
 
24
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
25
  backup_name = f"{BACKUP_PREFIX}_{timestamp}.db"
 
26
 
27
  login(token=token)
28
  api = HfApi()
29
 
30
- # 上传当前备份
31
  try:
32
  api.upload_file(
33
  path_or_fileobj=DB_PATH,
@@ -35,29 +70,69 @@ def backup_database():
35
  repo_id=DATASET_REPO,
36
  repo_type="dataset"
37
  )
38
- print(f"✅ 备份成功: {backup_name}")
39
  except Exception as e:
40
- print(f"❌ 上传失败: {e}")
41
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- # 清理旧备份(保留最近 RETENTION_DAYS 天
44
  try:
45
  files = api.list_repo_files(repo_id=DATASET_REPO, repo_type="dataset")
46
  now = datetime.datetime.now()
47
  deleted = 0
48
  for f in files:
49
- if not f.startswith(BACKUP_PREFIX) or not f.endswith(".db"):
50
- continue
51
- # 解析时间戳,文件名格式: freeapi_backup_YYYYMMDD_HHMMSS.db
52
- try:
53
- ts_str = f.replace(BACKUP_PREFIX + "_", "").replace(".db", "")
54
- file_time = datetime.datetime.strptime(ts_str, "%Y%m%d_%H%M%S")
55
- if (now - file_time).days > RETENTION_DAYS:
56
- api.delete_file(path_in_repo=f, repo_id=DATASET_REPO, repo_type="dataset")
57
- print(f"🗑️ 已删除旧备份: {f}")
58
- deleted += 1
59
- except Exception:
60
- pass
 
 
 
 
 
 
 
 
 
61
  if deleted:
62
  print(f"✅ 清理完成,共删除 {deleted} 个旧备份")
63
  else:
@@ -68,7 +143,7 @@ def backup_database():
68
  return True
69
 
70
  if __name__ == "__main__":
71
- print(f"[{datetime.datetime.now()}] 开始备份...")
72
  if backup_database():
73
  print("备份任务完成")
74
  sys.exit(0)
 
2
  import os
3
  import sys
4
  import datetime
5
+ import json
6
+ import sqlite3
7
+ import tarfile
8
  from huggingface_hub import HfApi, login
9
 
10
  # ===== 配置 =====
11
+ DB_PATH = "/app/server/data/freeapi.db" # 数据库路径
12
+ DATASET_REPO = "lydgs/freellm-backup" # 数据集名称
13
+ BACKUP_PREFIX = "freeapi_backup" # 二进制备份前缀
14
+ CONFIG_PREFIX = "config_export" # 配置导出前缀
15
+ RETENTION_DAYS = 30
16
  # ================
17
 
18
+ def export_db_to_json(db_path, temp_dir):
19
+ """将数据库中所有表导出为JSON文件,保存在temp_dir下,返回生成的文件列表"""
20
+ if not os.path.exists(db_path):
21
+ print(f"⚠️ 数据库文件不存在: {db_path}")
22
+ return []
23
+
24
+ conn = sqlite3.connect(db_path)
25
+ cursor = conn.cursor()
26
+
27
+ # 获取所有表名
28
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
29
+ tables = cursor.fetchall()
30
+
31
+ exported_files = []
32
+ for (table_name,) in tables:
33
+ cursor.execute(f"SELECT * FROM {table_name}")
34
+ rows = cursor.fetchall()
35
+ # 获取列名
36
+ col_names = [desc[0] for desc in cursor.description]
37
+ # 转换为字典列表
38
+ data = [dict(zip(col_names, row)) for row in rows]
39
+ json_path = os.path.join(temp_dir, f"{table_name}.json")
40
+ with open(json_path, 'w', encoding='utf-8') as f:
41
+ json.dump(data, f, indent=2, ensure_ascii=False)
42
+ exported_files.append(json_path)
43
+ print(f"📄 导出表 {table_name} 到 {json_path} ({len(data)} 行)")
44
+
45
+ conn.close()
46
+ return exported_files
47
+
48
  def backup_database():
49
  if not os.path.exists(DB_PATH):
50
  print(f"❌ 数据库文件不存在: {DB_PATH}")
 
57
 
58
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
59
  backup_name = f"{BACKUP_PREFIX}_{timestamp}.db"
60
+ config_tar_name = f"{CONFIG_PREFIX}_{timestamp}.tar.gz"
61
 
62
  login(token=token)
63
  api = HfApi()
64
 
65
+ # 1. 上传二进制备份
66
  try:
67
  api.upload_file(
68
  path_or_fileobj=DB_PATH,
 
70
  repo_id=DATASET_REPO,
71
  repo_type="dataset"
72
  )
73
+ print(f"✅ 二进制备份成功: {backup_name}")
74
  except Exception as e:
75
+ print(f"❌ 二进制上传失败: {e}")
76
+ # 继续尝试导出配置,不中断
77
+
78
+ # 2. 导出 JSON 配置并打包上传
79
+ try:
80
+ # 创建临时目录
81
+ temp_export_dir = f"/tmp/config_export_{timestamp}"
82
+ os.makedirs(temp_export_dir, exist_ok=True)
83
+
84
+ export_db_to_json(DB_PATH, temp_export_dir)
85
+
86
+ # 打包成 tar.gz
87
+ tar_path = f"/tmp/{config_tar_name}"
88
+ with tarfile.open(tar_path, "w:gz") as tar:
89
+ for file in os.listdir(temp_export_dir):
90
+ full_path = os.path.join(temp_export_dir, file)
91
+ tar.add(full_path, arcname=file)
92
+
93
+ # 上传压缩包
94
+ api.upload_file(
95
+ path_or_fileobj=tar_path,
96
+ path_in_repo=config_tar_name,
97
+ repo_id=DATASET_REPO,
98
+ repo_type="dataset"
99
+ )
100
+ print(f"✅ 配置导出包上传成功: {config_tar_name}")
101
+
102
+ # 清理临时文件
103
+ import shutil
104
+ shutil.rmtree(temp_export_dir)
105
+ os.remove(tar_path)
106
+ except Exception as e:
107
+ print(f"❌ 配置导出失败: {e}")
108
 
109
+ # 3. 清理旧备份(二进制和配置包
110
  try:
111
  files = api.list_repo_files(repo_id=DATASET_REPO, repo_type="dataset")
112
  now = datetime.datetime.now()
113
  deleted = 0
114
  for f in files:
115
+ # 删除超过 RETENTION_DAYS 天的二进制备份和配置包
116
+ if f.startswith(BACKUP_PREFIX) or f.startswith(CONFIG_PREFIX):
117
+ # 提取时间戳
118
+ parts = f.replace(".db", "").replace(".tar.gz", "").split("_")
119
+ if len(parts) >= 3:
120
+ ts_str = f"{parts[2]}_{parts[3]}" if len(parts) > 3 else parts[2]
121
+ # 文件名格式: freeapi_backup_20260602_061721.db config_export_20260602_061721.tar.gz
122
+ # 简化:取最后两个部分(日期_时间)
123
+ try:
124
+ # 尝试解析时间戳
125
+ if f.startswith(BACKUP_PREFIX):
126
+ ts_part = f.replace(BACKUP_PREFIX + "_", "").replace(".db", "")
127
+ else:
128
+ ts_part = f.replace(CONFIG_PREFIX + "_", "").replace(".tar.gz", "")
129
+ file_time = datetime.datetime.strptime(ts_part, "%Y%m%d_%H%M%S")
130
+ if (now - file_time).days > RETENTION_DAYS:
131
+ api.delete_file(path_in_repo=f, repo_id=DATASET_REPO, repo_type="dataset")
132
+ print(f"🗑️ 已删除旧备份: {f}")
133
+ deleted += 1
134
+ except Exception as parse_err:
135
+ print(f"⚠️ 跳过无法解析的文件: {f} ({parse_err})")
136
  if deleted:
137
  print(f"✅ 清理完成,共删除 {deleted} 个旧备份")
138
  else:
 
143
  return True
144
 
145
  if __name__ == "__main__":
146
+ print(f"[{datetime.datetime.now()}] 开始备份及配置导出...")
147
  if backup_database():
148
  print("备份任务完成")
149
  sys.exit(0)