FreeLLMAPI / scripts /restore_from_dataset.py
lydgs's picture
Update scripts/restore_from_dataset.py
d26f1f4 verified
Raw
History Blame Contribute Delete
5.09 kB
#!/usr/bin/env python3
import os
import sys
import shutil
import json
import sqlite3
import tarfile
import tempfile
import subprocess
import datetime
from huggingface_hub import HfApi, login, hf_hub_download
DB_TARGET = "/app/server/data/freeapi.db"
DATASET_REPO = "lydgs/freellm-backup"
CONFIG_PREFIX = "config_export"
BACKUP_PREFIX = "freeapi_backup"
def import_json_to_db(json_dir, db_path):
"""从 JSON 文件目录重建数据库"""
if not os.path.exists(json_dir):
return False
os.makedirs(os.path.dirname(db_path), exist_ok=True)
if os.path.exists(db_path):
os.remove(db_path)
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
for json_file in os.listdir(json_dir):
if not json_file.endswith('.json'):
continue
table_name = json_file[:-5]
with open(os.path.join(json_dir, json_file), 'r', encoding='utf-8') as f:
data = json.load(f)
if not data:
continue
# 获取列名
columns = list(data[0].keys())
# 创建表
cursor.execute(f"DROP TABLE IF EXISTS {table_name}")
cursor.execute(f"CREATE TABLE {table_name} ({', '.join(columns)})")
# 插入数据
placeholders = ', '.join('?' for _ in columns)
for row in data:
values = [row[col] for col in columns]
cursor.execute(f"INSERT INTO {table_name} VALUES ({placeholders})", values)
conn.commit()
conn.close()
return True
def restore_from_json():
"""优先尝试从 JSON 包恢复,成功返回 True"""
token = os.getenv("HF_TOKEN")
if not token:
return False
login(token=token)
api = HfApi()
try:
files = api.list_repo_files(repo_id=DATASET_REPO, repo_type="dataset")
config_files = [f for f in files if f.startswith(CONFIG_PREFIX) and f.endswith('.tar.gz')]
if not config_files:
return False
config_files.sort(reverse=True)
latest_config = config_files[0]
print(f"🔄 发现 JSON 配置包: {latest_config},重建数据库...")
with tempfile.TemporaryDirectory() as tmpdir:
downloaded = hf_hub_download(
repo_id=DATASET_REPO,
filename=latest_config,
repo_type="dataset",
local_dir=tmpdir
)
extract_dir = os.path.join(tmpdir, "extract")
os.makedirs(extract_dir, exist_ok=True)
with tarfile.open(downloaded, "r:gz") as tar:
tar.extractall(extract_dir)
if import_json_to_db(extract_dir, DB_TARGET):
print(f"✅ 数据库从 JSON 配置重建成功: {DB_TARGET}")
return True
else:
print("⚠️ JSON 导入失败")
return False
except Exception as e:
print(f"⚠️ JSON 恢复出错: {e}")
return False
def restore_from_binary():
"""回退:从二进制 .db 文件恢复"""
token = os.getenv("HF_TOKEN")
if not token:
return False
login(token=token)
api = HfApi()
try:
files = api.list_repo_files(repo_id=DATASET_REPO, repo_type="dataset")
db_files = [f for f in files if f.endswith(".db") and f.startswith(BACKUP_PREFIX)]
if not db_files:
return False
db_files.sort()
latest = db_files[-1]
print(f"🔄 使用二进制备份恢复: {latest}")
downloaded = hf_hub_download(
repo_id=DATASET_REPO,
filename=latest,
repo_type="dataset",
local_dir="/tmp"
)
os.makedirs(os.path.dirname(DB_TARGET), exist_ok=True)
shutil.copy(downloaded, DB_TARGET)
print(f"✅ 数据库从二进制备份恢复成功: {DB_TARGET}")
os.remove(downloaded)
return True
except Exception as e:
print(f"❌ 二进制恢复失败: {e}")
return False
def create_full_backup():
"""调用备份脚本,生成完整的 .db 和 JSON 包"""
backup_script = "/app/scripts/backup_to_dataset.py"
if not os.path.exists(backup_script):
print("⚠️ 备份脚本不存在,无法创建初始备份")
return False
try:
subprocess.run(["python3", backup_script], check=True)
print("✅ 已执行完整备份(包括 JSON 包)")
return True
except subprocess.CalledProcessError as e:
print(f"❌ 执行备份脚本失败: {e}")
return False
def restore_latest_backup():
# 1. 优先尝试 JSON 恢复
if restore_from_json():
return
# 2. 回退到二进制恢复
if restore_from_binary():
return
# 3. 完全无备份,但本地数据库存在 -> 创建完整备份
if os.path.exists(DB_TARGET):
print("🔄 无任何备份,检测到本地数据库,将创建完整备份(包括 JSON 包)...")
create_full_backup()
else:
print("ℹ️ 本地数据库也不存在,跳过恢复")
if __name__ == "__main__":
restore_latest_backup()