Spaces:
Paused
Paused
| #!/usr/bin/env python3 | |
| from huggingface_hub import snapshot_download, HfApi | |
| import os | |
| import sys | |
| import time | |
| import json | |
| import hashlib | |
| from datetime import datetime | |
| def get_dataset_info(repo_id, token): | |
| """获取数据集的最新信息,用于检测更新""" | |
| try: | |
| api = HfApi(token=token) | |
| info = api.repo_info(repo_id=repo_id, repo_type="dataset") | |
| return { | |
| "sha": info.sha, | |
| "last_modified": info.last_modified.isoformat() if info.last_modified else None | |
| } | |
| except Exception as e: | |
| print(f"获取数据集信息出错: {str(e)}") | |
| return None | |
| def save_dataset_info(info, music_dir): | |
| """保存数据集信息到本地文件""" | |
| info_file = os.path.join(music_dir, ".dataset_info.json") | |
| try: | |
| with open(info_file, "w") as f: | |
| json.dump(info, f) | |
| except Exception as e: | |
| print(f"保存数据集信息出错: {str(e)}") | |
| def load_dataset_info(music_dir): | |
| """从本地文件加载数据集信息""" | |
| info_file = os.path.join(music_dir, ".dataset_info.json") | |
| if not os.path.exists(info_file): | |
| return None | |
| try: | |
| with open(info_file, "r") as f: | |
| return json.load(f) | |
| except Exception as e: | |
| print(f"加载数据集信息出错: {str(e)}") | |
| return None | |
| def update_music(dataset_name, token, music_dir, force=False): | |
| """更新音乐文件,只在有变化时更新""" | |
| print(f"[{datetime.now()}] 检查音乐数据集更新...") | |
| # 获取远程数据集信息 | |
| remote_info = get_dataset_info(dataset_name, token) | |
| if not remote_info: | |
| print("无法获取远程数据集信息,跳过更新") | |
| return False | |
| # 获取本地数据集信息 | |
| local_info = load_dataset_info(music_dir) | |
| # 检查是否需要更新 | |
| if not force and local_info and local_info.get("sha") == remote_info.get("sha"): | |
| print("音乐数据集没有变化,无需更新") | |
| return False | |
| print(f"检测到音乐数据集有更新,开始下载...") | |
| try: | |
| # 下载数据集 | |
| snapshot_download( | |
| repo_id=dataset_name, | |
| repo_type="dataset", | |
| local_dir=music_dir, | |
| token=token | |
| ) | |
| # 保存新的数据集信息 | |
| save_dataset_info(remote_info, music_dir) | |
| print(f"[{datetime.now()}] 音乐数据集更新成功!") | |
| return True | |
| except Exception as e: | |
| print(f"更新音乐数据集出错: {str(e)}") | |
| return False | |
| if __name__ == "__main__": | |
| # 命令行参数: dataset_name token music_dir [interval] [force] | |
| dataset_name = sys.argv[1] | |
| token = sys.argv[2] | |
| music_dir = sys.argv[3] | |
| # 可选参数 | |
| interval = int(sys.argv[4]) if len(sys.argv) > 4 else 3600 # 默认1小时 | |
| force = sys.argv[5].lower() == "true" if len(sys.argv) > 5 else False | |
| # 第一次运行时强制更新 | |
| update_music(dataset_name, token, music_dir, force=True) | |
| # 定期检查更新 | |
| while True: | |
| time.sleep(interval) | |
| update_music(dataset_name, token, music_dir, force=force) |