JupyterLab / backup.py
Anuj-Panthri's picture
backup and restore
96e4358
from dotenv import load_dotenv
load_dotenv()
from datasets import load_dataset
from huggingface_hub import CommitScheduler
from huggingface_hub import HfApi
import os
import argparse
dataset_name = "Anuj-Panthri/JupyterLab_Storage"
work_dir = os.environ['NOTEBOOK_DIR']
dataset_save_path = "."
# def push_to_hub():
# # backup "~/../../data/" to hugging face dataset "."
# api = HfApi()
# api.upload_folder(
# repo_id=dataset_name,
# repo_type="dataset",
# folder_path=work_dir,
# path_in_repo=dataset_save_path,
# )
# def commit_scheduler(minutes=10):
# scheduler = CommitScheduler(
# repo_id=dataset_name,
# repo_type="dataset",
# folder_path=work_dir,
# path_in_repo=dataset_save_path,
# every=minutes,
# )
# print("scheduler running")
# while True:
# pass
from git import Repo
import time
def push_to_hub():
local_repo = Repo(work_dir)
local_repo.git.add(all=True)
print('Files Added Successfully')
local_repo.index.commit('Automatic backup commit')
print('Commited successfully')
origin = local_repo.remote(name='origin')
origin.push()
print('Pushed successfully')
def commit_scheduler(minutes=10):
while(True):
push_to_hub()
time.sleep(minutes*60)
if __name__=="__main__":
# Initialize parser
parser = argparse.ArgumentParser()
parser.add_argument("-p","--push", help="push to hub",action='store_true',default=True)
parser.add_argument("-s","--schedule", help="push to hub scheduler every n minutes",type=int)
args = parser.parse_args()
if not args.schedule:
push_to_hub()
else:
commit_scheduler(args.schedule)