jamesw853 commited on
Commit
00c1284
·
verified ·
1 Parent(s): ec550ee

Update sync_to_dataset.py

Browse files
Files changed (1) hide show
  1. sync_to_dataset.py +36 -18
sync_to_dataset.py CHANGED
@@ -4,43 +4,61 @@ import time
4
  import logging
5
  from huggingface_hub import HfApi, upload_folder
6
 
 
7
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
8
  logger = logging.getLogger(__name__)
9
 
 
10
  HF_TOKEN = os.environ.get("HF_TOKEN")
11
- DATASET_REPO = os.environ.get("OPENCODE_DATASET_REPO")
12
- SYNC_INTERVAL = 120 # 秒
 
 
 
 
13
 
14
  if not HF_TOKEN:
15
- logger.error("HF_TOKEN environment variable not set. Sync will not work.")
16
  exit(1)
17
  if not DATASET_REPO:
18
- logger.error("OPENCODE_DATASET_REPO environment variable not set. Sync will not work.")
19
  exit(1)
20
 
21
  api = HfApi(token=HF_TOKEN)
22
 
23
- def sync_data():
 
24
  try:
25
- if not os.path.isdir("/data"):
26
- logger.warning("/data does not exist or is not a directory")
27
  return
 
 
 
 
 
 
 
 
 
28
  upload_folder(
29
  repo_id=DATASET_REPO,
30
- folder_path="/data",
31
- path_in_repo="",
32
  repo_type="dataset",
33
- commit_message=f"Auto sync at {time.strftime('%Y-%m-%d %H:%M:%S')}"
34
  )
35
- logger.info(f"Successfully synced /data to {DATASET_REPO}")
36
  except Exception as e:
37
- logger.error(f"Sync failed: {e}")
38
 
39
- if __name__ == "__main__":
40
- logger.info(f"Starting sync daemon, interval={SYNC_INTERVAL}s, dataset={DATASET_REPO}")
 
 
41
  while True:
42
- try:
43
- sync_data()
44
- except Exception as e:
45
- logger.error(f"Unexpected error in sync loop: {e}")
46
  time.sleep(SYNC_INTERVAL)
 
 
 
 
 
4
  import logging
5
  from huggingface_hub import HfApi, upload_folder
6
 
7
+ # 配置日志
8
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
9
  logger = logging.getLogger(__name__)
10
 
11
+ # 从环境变量读取配置
12
  HF_TOKEN = os.environ.get("HF_TOKEN")
13
+ DATASET_REPO = os.environ.get("OPENCODE_DATASET_REPO") # 应为 "jamesw853/opencode-data"
14
+ LOCAL_DIR = "/data"
15
+ SYNC_INTERVAL = 180 # 秒
16
+
17
+ # 目标子目录(注意:目录名包含空格,Hugging Face Hub 支持,但建议用引号)
18
+ TARGET_SUBDIR = "opencode backup" # 上传到数据集的这个子目录下
19
 
20
  if not HF_TOKEN:
21
+ logger.error("HF_TOKEN environment variable not set. Cannot upload.")
22
  exit(1)
23
  if not DATASET_REPO:
24
+ logger.error("OPENCODE_DATASET_REPO environment variable not set.")
25
  exit(1)
26
 
27
  api = HfApi(token=HF_TOKEN)
28
 
29
+ def upload_data():
30
+ """将 /data 目录上传到数据集的指定子目录"""
31
  try:
32
+ if not os.path.isdir(LOCAL_DIR):
33
+ logger.warning(f"{LOCAL_DIR} does not exist, skipping upload.")
34
  return
35
+
36
+ # 列出要上传的文件(可选,用于日志)
37
+ files = []
38
+ for root, _, filenames in os.walk(LOCAL_DIR):
39
+ for f in filenames:
40
+ files.append(os.path.relpath(os.path.join(root, f), LOCAL_DIR))
41
+ logger.info(f"Found {len(files)} file(s) to upload: {files[:5]}{'...' if len(files)>5 else ''}")
42
+
43
+ # 上传整个文件夹到数据集内的 TARGET_SUBDIR 子目录
44
  upload_folder(
45
  repo_id=DATASET_REPO,
46
+ folder_path=LOCAL_DIR,
47
+ path_in_repo=TARGET_SUBDIR, # 所有文件会出现在数据集 /opencode backup/ 下
48
  repo_type="dataset",
49
+ commit_message=f"Auto backup to {TARGET_SUBDIR} at {time.strftime('%Y-%m-%d %H:%M:%S')}"
50
  )
51
+ logger.info(f"Successfully uploaded {LOCAL_DIR} to {DATASET_REPO}/{TARGET_SUBDIR}")
52
  except Exception as e:
53
+ logger.error(f"Upload failed: {e}")
54
 
55
+ def main():
56
+ logger.info(f"Starting backup daemon: interval={SYNC_INTERVAL}s, dataset={DATASET_REPO}, target_dir={TARGET_SUBDIR}")
57
+ # 启动后先立即执行一次
58
+ upload_data()
59
  while True:
 
 
 
 
60
  time.sleep(SYNC_INTERVAL)
61
+ upload_data()
62
+
63
+ if __name__ == "__main__":
64
+ main()