File size: 3,636 Bytes
36ec7fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dae3897
 
 
 
36ec7fa
dae3897
 
36ec7fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import os
import torch
import glob
from huggingface_hub import HfApi, create_repo
from datetime import datetime

def upload_to_huggingface(repo_name, token):
    """
    Upload model checkpoints, embeddings, and all intermediary files to Hugging Face Hub.
    
    Args:
        repo_name (str): Name of the repository to create/use on Hugging Face
        token (str): Hugging Face API token
    """
    api = HfApi(token=token)
    
    # Create repository if it doesn't exist
    try:
        create_repo(repo_name, token=token, repo_type="model", exist_ok=True)
    except Exception as e:
        print(f"Error creating repository: {e}")
        return

    # Upload CBOW checkpoints
    cbow_checkpoints = glob.glob('cbow/checkpoints/*.pth')
    for checkpoint in cbow_checkpoints:
        print(f"Uploading {checkpoint}...")
        api.upload_file(
            path_or_fileobj=checkpoint,
            path_in_repo=f"cbow/checkpoints/{os.path.basename(checkpoint)}",
            repo_id=repo_name,
            repo_type="model"
        )

    # Upload any model checkpoints from the main checkpoints directory
    main_checkpoints = glob.glob('checkpoints/*.pth')
    for checkpoint in main_checkpoints:
        print(f"Uploading {checkpoint}...")
        api.upload_file(
            path_or_fileobj=checkpoint,
            path_in_repo=f"checkpoints/{os.path.basename(checkpoint)}",
            repo_id=repo_name,
            repo_type="model"
        )

    # Upload raw and intermediary data files
    data_files = [
        'tokenized_triples.json',
        'triples_small.json',
        'extracted_data.json',
        'corpus.pkl',
        'text8'
    ]
    
    for data_file in data_files:
        if os.path.exists(data_file):
            print(f"Uploading {data_file}...")
            api.upload_file(
                path_or_fileobj=data_file,
                path_in_repo=f"data/{data_file}",
                repo_id=repo_name,
                repo_type="model"
            )

    # Upload all tokenizer files from cbow directory
    cbow_files = glob.glob('cbow/*.pkl')
    for cbow_file in cbow_files:
        print(f"Uploading {cbow_file}...")
        api.upload_file(
            path_or_fileobj=cbow_file,
            path_in_repo=f"cbow/{os.path.basename(cbow_file)}",
            repo_id=repo_name,
            repo_type="model"
        )

    # Upload configuration files
    config_files = ['sweep.yaml', 'requirements.txt']
    for config_file in config_files:
        if os.path.exists(config_file):
            print(f"Uploading {config_file}...")
            api.upload_file(
                path_or_fileobj=config_file,
                path_in_repo=f"config/{config_file}",
                repo_id=repo_name,
                repo_type="model"
            )

    # Upload source code files
    code_files = glob.glob('*.py')
    for code_file in code_files:
        print(f"Uploading {code_file}...")
        api.upload_file(
            path_or_fileobj=code_file,
            path_in_repo=f"src/{code_file}",
            repo_id=repo_name,
            repo_type="model"
        )

    print(f"\nUpload complete! Files are available at: https://huggingface.co/{repo_name}")

if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(description='Upload model files to Hugging Face Hub')
    parser.add_argument('--repo_name', type=str, required=True, help='Name of the repository on Hugging Face')
    parser.add_argument('--token', type=str, required=True, help='Hugging Face API token')
    args = parser.parse_args()
    
    upload_to_huggingface(args.repo_name, args.token)