Anuj-Panthri commited on
Commit
b2d3370
·
1 Parent(s): d4b1229
Files changed (4) hide show
  1. backup.py +42 -23
  2. on_startup.sh +2 -7
  3. requirements.txt +2 -1
  4. restore.py +10 -5
backup.py CHANGED
@@ -10,30 +10,52 @@ dataset_name = "Anuj-Panthri/JupyterLab_Storage"
10
  work_dir = os.environ['NOTEBOOK_DIR']
11
  dataset_save_path = "."
12
 
13
- def push_to_hub():
14
- # backup "~/../../data/" to hugging face dataset "."
15
- api = HfApi()
16
 
17
- api.upload_folder(
18
- repo_id=dataset_name,
19
- repo_type="dataset",
20
- folder_path=work_dir,
21
- path_in_repo=dataset_save_path,
22
- )
23
 
24
- def commit_scheduler(minutes=10):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- scheduler = CommitScheduler(
27
- repo_id=dataset_name,
28
- repo_type="dataset",
29
- folder_path=work_dir,
30
- path_in_repo=dataset_save_path,
31
- every=minutes,
32
- )
33
 
34
- print("scheduler running")
35
- while True:
36
- pass
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  if __name__=="__main__":
39
  # Initialize parser
@@ -46,6 +68,3 @@ if __name__=="__main__":
46
  push_to_hub()
47
  else:
48
  commit_scheduler(args.schedule)
49
-
50
- # print(args)
51
- # commit_scheduler()
 
10
  work_dir = os.environ['NOTEBOOK_DIR']
11
  dataset_save_path = "."
12
 
13
+ # def push_to_hub():
14
+ # # backup "~/../../data/" to hugging face dataset "."
15
+ # api = HfApi()
16
 
17
+ # api.upload_folder(
18
+ # repo_id=dataset_name,
19
+ # repo_type="dataset",
20
+ # folder_path=work_dir,
21
+ # path_in_repo=dataset_save_path,
22
+ # )
23
 
24
+ # def commit_scheduler(minutes=10):
25
+
26
+ # scheduler = CommitScheduler(
27
+ # repo_id=dataset_name,
28
+ # repo_type="dataset",
29
+ # folder_path=work_dir,
30
+ # path_in_repo=dataset_save_path,
31
+ # every=minutes,
32
+ # )
33
+
34
+ # print("scheduler running")
35
+ # while True:
36
+ # pass
37
+
38
+
39
+ from git import Repo
40
+ import time
41
+ from glob import glob
42
 
 
 
 
 
 
 
 
43
 
44
+ def push_to_hub():
45
+ local_repo = Repo(work_dir)
46
+ local_repo.index.add(glob(os.path.join(work_dir,"*")))
47
+ print('Files Added Successfully')
48
+ local_repo.index.commit('Automatic backup commit')
49
+ print('Commited successfully')
50
+ origin = local_repo.remote(name='origin')
51
+ origin.push()
52
+ print('Pushed successfully')
53
+
54
+ def commit_scheduler(minutes=10):
55
+ while(True):
56
+ push_to_hub()
57
+ time.sleep(minutes*60)
58
+
59
 
60
  if __name__=="__main__":
61
  # Initialize parser
 
68
  push_to_hub()
69
  else:
70
  commit_scheduler(args.schedule)
 
 
 
on_startup.sh CHANGED
@@ -1,10 +1,5 @@
1
- #!/bin/bash
2
- # Write some commands here that will run on root user before startup.
3
- # For example, to clone transformers and install it in dev mode:
4
- # git clone https://github.com/huggingface/transformers.git
5
- # cd transformers && pip install -e ".[dev]"
6
-
7
- huggingface-cli login --token ${HF_TOKEN}
8
  python3 restore.py
9
  nohup python3 backup.py -s 2 &
10
  exec ./start_server.sh
 
1
+ git lfs install
2
+ huggingface-cli login --token ${HF_TOKEN} --add-to-git-credential
 
 
 
 
 
3
  python3 restore.py
4
  nohup python3 backup.py -s 2 &
5
  exec ./start_server.sh
requirements.txt CHANGED
@@ -4,4 +4,5 @@ tornado==6.2
4
  ipywidgets
5
  huggingface_hub
6
  datasets
7
- python-dotenv
 
 
4
  ipywidgets
5
  huggingface_hub
6
  datasets
7
+ python-dotenv
8
+ GitPython
restore.py CHANGED
@@ -6,8 +6,13 @@ dataset_name = "Anuj-Panthri/JupyterLab_Storage"
6
  work_dir = os.environ['NOTEBOOK_DIR']
7
  dataset_save_path = "."
8
 
9
- snapshot_download(
10
- repo_id=dataset_name,
11
- repo_type="dataset",
12
- local_dir=work_dir,
13
- )
 
 
 
 
 
 
6
  work_dir = os.environ['NOTEBOOK_DIR']
7
  dataset_save_path = "."
8
 
9
+ # snapshot_download(
10
+ # repo_id=dataset_name,
11
+ # repo_type="dataset",
12
+ # local_dir=work_dir,
13
+ # )
14
+
15
+
16
+ import subprocess
17
+
18
+ subprocess.run([f"git clone https://huggingface.co/datasets/Anuj-Panthri/JupyterLab_Storage {work_dir}"])