roshbeed commited on
Commit
36ec7fa
·
verified ·
1 Parent(s): 362c0ce

Upload src/upload_to_hf.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. src/upload_to_hf.py +108 -0
src/upload_to_hf.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import glob
4
+ from huggingface_hub import HfApi, create_repo
5
+ from datetime import datetime
6
+
7
+ def upload_to_huggingface(repo_name, token):
8
+ """
9
+ Upload model checkpoints, embeddings, and all intermediary files to Hugging Face Hub.
10
+
11
+ Args:
12
+ repo_name (str): Name of the repository to create/use on Hugging Face
13
+ token (str): Hugging Face API token
14
+ """
15
+ api = HfApi(token=token)
16
+
17
+ # Create repository if it doesn't exist
18
+ try:
19
+ create_repo(repo_name, token=token, repo_type="model", exist_ok=True)
20
+ except Exception as e:
21
+ print(f"Error creating repository: {e}")
22
+ return
23
+
24
+ # Upload CBOW checkpoints
25
+ cbow_checkpoints = glob.glob('cbow/checkpoints/*.pth')
26
+ for checkpoint in cbow_checkpoints:
27
+ print(f"Uploading {checkpoint}...")
28
+ api.upload_file(
29
+ path_or_fileobj=checkpoint,
30
+ path_in_repo=f"cbow/checkpoints/{os.path.basename(checkpoint)}",
31
+ repo_id=repo_name,
32
+ repo_type="model"
33
+ )
34
+
35
+ # Upload any model checkpoints from the main checkpoints directory
36
+ main_checkpoints = glob.glob('checkpoints/*.pth')
37
+ for checkpoint in main_checkpoints:
38
+ print(f"Uploading {checkpoint}...")
39
+ api.upload_file(
40
+ path_or_fileobj=checkpoint,
41
+ path_in_repo=f"checkpoints/{os.path.basename(checkpoint)}",
42
+ repo_id=repo_name,
43
+ repo_type="model"
44
+ )
45
+
46
+ # Upload raw and intermediary data files
47
+ data_files = [
48
+ 'tokenized_triples.json',
49
+ 'triples_small.json',
50
+ 'extracted_data.json',
51
+ 'corpus.pkl',
52
+ 'text8'
53
+ ]
54
+
55
+ for data_file in data_files:
56
+ if os.path.exists(data_file):
57
+ print(f"Uploading {data_file}...")
58
+ api.upload_file(
59
+ path_or_fileobj=data_file,
60
+ path_in_repo=f"data/{data_file}",
61
+ repo_id=repo_name,
62
+ repo_type="model"
63
+ )
64
+
65
+ # Upload vocabulary and tokenizer files
66
+ vocab_files = glob.glob('cbow/*.pkl')
67
+ for vocab_file in vocab_files:
68
+ print(f"Uploading {vocab_file}...")
69
+ api.upload_file(
70
+ path_or_fileobj=vocab_file,
71
+ path_in_repo=f"vocabulary/{os.path.basename(vocab_file)}",
72
+ repo_id=repo_name,
73
+ repo_type="model"
74
+ )
75
+
76
+ # Upload configuration files
77
+ config_files = ['sweep.yaml', 'requirements.txt']
78
+ for config_file in config_files:
79
+ if os.path.exists(config_file):
80
+ print(f"Uploading {config_file}...")
81
+ api.upload_file(
82
+ path_or_fileobj=config_file,
83
+ path_in_repo=f"config/{config_file}",
84
+ repo_id=repo_name,
85
+ repo_type="model"
86
+ )
87
+
88
+ # Upload source code files
89
+ code_files = glob.glob('*.py')
90
+ for code_file in code_files:
91
+ print(f"Uploading {code_file}...")
92
+ api.upload_file(
93
+ path_or_fileobj=code_file,
94
+ path_in_repo=f"src/{code_file}",
95
+ repo_id=repo_name,
96
+ repo_type="model"
97
+ )
98
+
99
+ print(f"\nUpload complete! Files are available at: https://huggingface.co/{repo_name}")
100
+
101
+ if __name__ == "__main__":
102
+ import argparse
103
+ parser = argparse.ArgumentParser(description='Upload model files to Hugging Face Hub')
104
+ parser.add_argument('--repo_name', type=str, required=True, help='Name of the repository on Hugging Face')
105
+ parser.add_argument('--token', type=str, required=True, help='Hugging Face API token')
106
+ args = parser.parse_args()
107
+
108
+ upload_to_huggingface(args.repo_name, args.token)