lydgs commited on
Commit
d26f1f4
·
verified ·
1 Parent(s): 1c86b45

Update scripts/restore_from_dataset.py

Browse files
Files changed (1) hide show
  1. scripts/restore_from_dataset.py +100 -29
scripts/restore_from_dataset.py CHANGED
@@ -2,60 +2,101 @@
2
  import os
3
  import sys
4
  import shutil
 
 
 
 
 
5
  import datetime
6
  from huggingface_hub import HfApi, login, hf_hub_download
7
 
8
  DB_TARGET = "/app/server/data/freeapi.db"
9
  DATASET_REPO = "lydgs/freellm-backup"
 
10
  BACKUP_PREFIX = "freeapi_backup"
11
 
12
- def upload_backup(db_path, dataset_repo, backup_prefix):
13
- if not os.path.exists(db_path):
14
- print(f"⚠️ 本地数据库不存在: {db_path}")
15
  return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  token = os.getenv("HF_TOKEN")
17
  if not token:
18
- print("❌ 环境变量 HF_TOKEN 未设置")
19
  return False
20
- timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
21
- backup_name = f"{backup_prefix}_{timestamp}.db"
22
  login(token=token)
23
  api = HfApi()
24
  try:
25
- api.upload_file(
26
- path_or_fileobj=db_path,
27
- path_in_repo=backup_name,
28
- repo_id=dataset_repo,
29
- repo_type="dataset"
30
- )
31
- print(f" 初始备份创建成功: {backup_name}")
32
- return True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  except Exception as e:
34
- print(f" 创建初始备份失败: {e}")
35
  return False
36
 
37
- def restore_latest_backup():
 
38
  token = os.getenv("HF_TOKEN")
39
  if not token:
40
- print("⚠️ 未设置 HF_TOKEN,跳过恢复")
41
- return
42
  login(token=token)
43
  api = HfApi()
44
  try:
45
  files = api.list_repo_files(repo_id=DATASET_REPO, repo_type="dataset")
46
  db_files = [f for f in files if f.endswith(".db") and f.startswith(BACKUP_PREFIX)]
47
  if not db_files:
48
- print("ℹ️ 数据集中没有找到备份文件")
49
- # 如果没有备份,但本地数据库存在,则自动创建一份
50
- if os.path.exists(DB_TARGET):
51
- print("🔄 检测到本地数据库存在,将创建初始备份...")
52
- upload_backup(DB_TARGET, DATASET_REPO, BACKUP_PREFIX)
53
- else:
54
- print("ℹ️ 本地数据库也不存在,跳过恢复")
55
- return
56
  db_files.sort()
57
  latest = db_files[-1]
58
- print(f"🔄 发现最新备份: {latest},开始下载...")
59
  downloaded = hf_hub_download(
60
  repo_id=DATASET_REPO,
61
  filename=latest,
@@ -64,10 +105,40 @@ def restore_latest_backup():
64
  )
65
  os.makedirs(os.path.dirname(DB_TARGET), exist_ok=True)
66
  shutil.copy(downloaded, DB_TARGET)
67
- print(f"✅ 数据库恢复成功: {DB_TARGET}")
68
  os.remove(downloaded)
 
69
  except Exception as e:
70
- print(f"❌ 数据库恢复失败: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  if __name__ == "__main__":
73
  restore_latest_backup()
 
2
  import os
3
  import sys
4
  import shutil
5
+ import json
6
+ import sqlite3
7
+ import tarfile
8
+ import tempfile
9
+ import subprocess
10
  import datetime
11
  from huggingface_hub import HfApi, login, hf_hub_download
12
 
13
  DB_TARGET = "/app/server/data/freeapi.db"
14
  DATASET_REPO = "lydgs/freellm-backup"
15
+ CONFIG_PREFIX = "config_export"
16
  BACKUP_PREFIX = "freeapi_backup"
17
 
18
+ def import_json_to_db(json_dir, db_path):
19
+ """从 JSON 文件目录重建数据库"""
20
+ if not os.path.exists(json_dir):
21
  return False
22
+ os.makedirs(os.path.dirname(db_path), exist_ok=True)
23
+ if os.path.exists(db_path):
24
+ os.remove(db_path)
25
+ conn = sqlite3.connect(db_path)
26
+ cursor = conn.cursor()
27
+ for json_file in os.listdir(json_dir):
28
+ if not json_file.endswith('.json'):
29
+ continue
30
+ table_name = json_file[:-5]
31
+ with open(os.path.join(json_dir, json_file), 'r', encoding='utf-8') as f:
32
+ data = json.load(f)
33
+ if not data:
34
+ continue
35
+ # 获取列名
36
+ columns = list(data[0].keys())
37
+ # 创建表
38
+ cursor.execute(f"DROP TABLE IF EXISTS {table_name}")
39
+ cursor.execute(f"CREATE TABLE {table_name} ({', '.join(columns)})")
40
+ # 插入数据
41
+ placeholders = ', '.join('?' for _ in columns)
42
+ for row in data:
43
+ values = [row[col] for col in columns]
44
+ cursor.execute(f"INSERT INTO {table_name} VALUES ({placeholders})", values)
45
+ conn.commit()
46
+ conn.close()
47
+ return True
48
+
49
+ def restore_from_json():
50
+ """优先尝试从 JSON 包恢复,成功返回 True"""
51
  token = os.getenv("HF_TOKEN")
52
  if not token:
 
53
  return False
 
 
54
  login(token=token)
55
  api = HfApi()
56
  try:
57
+ files = api.list_repo_files(repo_id=DATASET_REPO, repo_type="dataset")
58
+ config_files = [f for f in files if f.startswith(CONFIG_PREFIX) and f.endswith('.tar.gz')]
59
+ if not config_files:
60
+ return False
61
+ config_files.sort(reverse=True)
62
+ latest_config = config_files[0]
63
+ print(f"🔄 发现 JSON 配置包: {latest_config},重建数据库...")
64
+ with tempfile.TemporaryDirectory() as tmpdir:
65
+ downloaded = hf_hub_download(
66
+ repo_id=DATASET_REPO,
67
+ filename=latest_config,
68
+ repo_type="dataset",
69
+ local_dir=tmpdir
70
+ )
71
+ extract_dir = os.path.join(tmpdir, "extract")
72
+ os.makedirs(extract_dir, exist_ok=True)
73
+ with tarfile.open(downloaded, "r:gz") as tar:
74
+ tar.extractall(extract_dir)
75
+ if import_json_to_db(extract_dir, DB_TARGET):
76
+ print(f"✅ 数据库从 JSON 配置重建成功: {DB_TARGET}")
77
+ return True
78
+ else:
79
+ print("⚠️ JSON 导入失败")
80
+ return False
81
  except Exception as e:
82
+ print(f"⚠️ JSON 恢复出错: {e}")
83
  return False
84
 
85
+ def restore_from_binary():
86
+ """回退:从二进制 .db 文件恢复"""
87
  token = os.getenv("HF_TOKEN")
88
  if not token:
89
+ return False
 
90
  login(token=token)
91
  api = HfApi()
92
  try:
93
  files = api.list_repo_files(repo_id=DATASET_REPO, repo_type="dataset")
94
  db_files = [f for f in files if f.endswith(".db") and f.startswith(BACKUP_PREFIX)]
95
  if not db_files:
96
+ return False
 
 
 
 
 
 
 
97
  db_files.sort()
98
  latest = db_files[-1]
99
+ print(f"🔄 使用二进制备份恢复: {latest}")
100
  downloaded = hf_hub_download(
101
  repo_id=DATASET_REPO,
102
  filename=latest,
 
105
  )
106
  os.makedirs(os.path.dirname(DB_TARGET), exist_ok=True)
107
  shutil.copy(downloaded, DB_TARGET)
108
+ print(f"✅ 数据库从二进制备份恢复成功: {DB_TARGET}")
109
  os.remove(downloaded)
110
+ return True
111
  except Exception as e:
112
+ print(f"❌ 二进制恢复失败: {e}")
113
+ return False
114
+
115
+ def create_full_backup():
116
+ """调用备份脚本,生成完整的 .db 和 JSON 包"""
117
+ backup_script = "/app/scripts/backup_to_dataset.py"
118
+ if not os.path.exists(backup_script):
119
+ print("⚠️ 备份脚本不存在,无法创建初始备份")
120
+ return False
121
+ try:
122
+ subprocess.run(["python3", backup_script], check=True)
123
+ print("✅ 已执行完整备份(包括 JSON 包)")
124
+ return True
125
+ except subprocess.CalledProcessError as e:
126
+ print(f"❌ 执行备份脚本失败: {e}")
127
+ return False
128
+
129
+ def restore_latest_backup():
130
+ # 1. 优先尝试 JSON 恢复
131
+ if restore_from_json():
132
+ return
133
+ # 2. 回退到二进制恢复
134
+ if restore_from_binary():
135
+ return
136
+ # 3. 完全无备份,但本地数据库存在 -> 创建完整备份
137
+ if os.path.exists(DB_TARGET):
138
+ print("🔄 无任何备份,检测到本地数据库,将创建完整备份(包括 JSON 包)...")
139
+ create_full_backup()
140
+ else:
141
+ print("ℹ️ 本地数据库也不存在,跳过恢复")
142
 
143
  if __name__ == "__main__":
144
  restore_latest_backup()