FuryAssassin commited on
Commit
ac6e54b
·
verified ·
1 Parent(s): 7b52fed

Upload folder using huggingface_hub

Browse files
.python_tmp/05ef2759-9606-4dbe-8aab-d60dc9b9acec.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import HfApi, hf_hub_download
2
+ import os, json
3
+ root = os.getcwd()
4
+ api = HfApi()
5
+ with open('hf_token.txt') as f:
6
+ token = f.read().strip()
7
+ who = api.whoami(token=token)
8
+ username = who.get('name') or who.get('login')
9
+ repo_id = f"{username}/MyAwesomeModel-CleanupRelease"
10
+
11
+ # canonical bytes from local step_100
12
+ canon_path = os.path.join(root,'checkpoints','step_100','config.json')
13
+ with open(canon_path,'rb') as f:
14
+ canonical = f.read()
15
+
16
+ # gather local config paths
17
+ local_configs = []
18
+ for dirpath, dirnames, filenames in os.walk(os.path.join(root,'checkpoints')):
19
+ for fn in filenames:
20
+ if fn=='config.json':
21
+ local_configs.append(os.path.join(dirpath,fn))
22
+ local_configs.sort()
23
+
24
+ mismatches = []
25
+ for p in local_configs:
26
+ rel = os.path.relpath(p, root)
27
+ try:
28
+ dl = hf_hub_download(repo_id=repo_id, filename=rel, token=token, repo_type='model')
29
+ except Exception as e:
30
+ mismatches.append((rel, 'download_error', str(e)))
31
+ continue
32
+ with open(dl,'rb') as f:
33
+ b = f.read()
34
+ if b != canonical:
35
+ mismatches.append((rel, 'byte_mismatch', len(b)))
36
+
37
+ # check repo .swp
38
+ files = api.list_repo_files(repo_id=repo_id, token=token)
39
+ swp_files = [f for f in files if f.endswith('.swp')]
40
+
41
+ # check local swap removed
42
+ swap_local = os.path.join(root,'evaluation','.setup.py.swp')
43
+ local_swap_exists = os.path.exists(swap_local)
44
+
45
+ print('local_swap_exists', local_swap_exists)
46
+ print('repo_swp_files', swp_files)
47
+ print('mismatches', mismatches)
48
+
49
+ # show canonical formatting details
50
+ s = canonical.decode('utf-8')
51
+ lines = s.splitlines()
52
+ print('Canonical raw repr:', repr(canonical))
53
+ print('Canonical text:\n', s)
54
+ print('Line count', len(lines))
55
+ # determine indentation by looking at second line leading spaces
56
+ if len(lines)>=2:
57
+ import re
58
+ m = re.match(r"^(\s*)", lines[1])
59
+ print('Indentation chars on line 2:', repr(m.group(1)))
60
+ # trailing newline?
61
+ print('Ends with newline?', canonical.endswith(b'\n'))
.python_tmp/28a4b2a5-22d6-4929-a9c6-bb8ee74c9abe.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import HfApi, upload_folder, hf_hub_download
2
+ import os
3
+ root = os.getcwd()
4
+ with open('hf_token.txt') as f:
5
+ token = f.read().strip()
6
+ api = HfApi()
7
+ who = api.whoami(token=token)
8
+ username = who.get('name') or who.get('login')
9
+ repo_name = 'MyAwesomeModel-CleanupRelease'
10
+ repo_id = f"{username}/{repo_name}"
11
+ # create repo with correct signature
12
+ try:
13
+ api.create_repo(repo_id=repo_id, token=token, exist_ok=True)
14
+ print('create_repo OK')
15
+ except Exception as e:
16
+ print('create_repo error', e)
17
+
18
+ # upload using upload_folder signature
19
+ try:
20
+ upload_folder(repo_id=repo_id, folder_path=root, path_in_repo='.', token=token, repo_type='model', ignore_patterns=['*.swp'])
21
+ print('upload_folder OK')
22
+ except Exception as e:
23
+ print('upload_folder error', e)
24
+
25
+ # list files
26
+ try:
27
+ files = api.list_repo_files(repo_id=repo_id, token=token)
28
+ print('files count', len(files))
29
+ swp_files = [f for f in files if f.endswith('.swp')]
30
+ print('.swp files in repo:', swp_files)
31
+ except Exception as e:
32
+ print('list_repo_files error', e)
33
+
34
+ # download one config to verify
35
+ rel = os.path.relpath(os.path.join(root,'checkpoints','step_100','config.json'), root)
36
+ print('rel', rel)
37
+ try:
38
+ dl = hf_hub_download(repo_id=repo_id, filename=rel, token=token, repo_type='model')
39
+ print('downloaded to', dl)
40
+ with open(dl,'rb') as f:
41
+ b = f.read()
42
+ print('size', len(b))
43
+ except Exception as e:
44
+ print('download error', e)
.python_tmp/40c80db1-8de5-4cb2-8f64-de819548ed08.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, sys, json
2
+ from collections import Counter
3
+
4
+ root = os.getcwd()
5
+ # find config files
6
+ config_paths = []
7
+ for dirpath, dirnames, filenames in os.walk(os.path.join(root, 'checkpoints')):
8
+ for fn in filenames:
9
+ if fn == 'config.json':
10
+ config_paths.append(os.path.join(dirpath, fn))
11
+ config_paths.sort()
12
+ print('Found', len(config_paths), 'config.json files')
13
+
14
+ # read bytes
15
+ bytes_map = {}
16
+ for p in config_paths:
17
+ with open(p, 'rb') as f:
18
+ b = f.read()
19
+ bytes_map[p] = b
20
+
21
+ # determine most common bytes
22
+ counter = Counter(bytes_map.values())
23
+ most_common = counter.most_common(1)
24
+ if not most_common:
25
+ print('No config files found, exiting')
26
+ sys.exit(1)
27
+ canonical_bytes, count = most_common[0]
28
+ print('Canonical bytes occur', count, 'times out of', len(config_paths))
29
+
30
+ # show canonical as repr
31
+ print('Canonical bytes repr:\n', canonical_bytes)
32
+
33
+ # rewrite outliers after validating JSON content identical
34
+ rewritten = []
35
+ mismatched_content = []
36
+ for p, b in bytes_map.items():
37
+ if b != canonical_bytes:
38
+ # validate JSON content equality
39
+ try:
40
+ js_current = json.loads(b.decode('utf-8'))
41
+ js_canonical = json.loads(canonical_bytes.decode('utf-8'))
42
+ except Exception as e:
43
+ print('Failed to parse JSON for', p, 'err', e)
44
+ mismatched_content.append(p)
45
+ continue
46
+ if js_current != js_canonical:
47
+ print('JSON content differs for', p)
48
+ mismatched_content.append(p)
49
+ continue
50
+ # write canonical bytes
51
+ with open(p, 'wb') as f:
52
+ f.write(canonical_bytes)
53
+ rewritten.append(p)
54
+
55
+ print('Rewrote', len(rewritten), 'files')
56
+ if mismatched_content:
57
+ print('Files with mismatched JSON content:', mismatched_content)
58
+
59
+ # remove swap file if exists
60
+ swap = os.path.join(root, 'evaluation', '.setup.py.swp')
61
+ if os.path.exists(swap):
62
+ os.remove(swap)
63
+ print('Removed swap file', swap)
64
+ else:
65
+ print('No swap file found at', swap)
66
+
67
+ # Now upload to Hugging Face
68
+ try:
69
+ from huggingface_hub import HfApi, upload_folder, hf_hub_download
70
+ except Exception as e:
71
+ print('huggingface_hub not installed:', e)
72
+ raise
73
+
74
+ # read token
75
+ token_path = os.path.join(root, 'hf_token.txt')
76
+ if not os.path.exists(token_path):
77
+ print('hf_token.txt not found')
78
+ sys.exit(1)
79
+ with open(token_path, 'r') as f:
80
+ token = f.read().strip()
81
+ api = HfApi()
82
+ who = api.whoami(token=token)
83
+ username = who.get('name') or who.get('login')
84
+ if not username:
85
+ print('Failed to get username from token whoami:', who)
86
+ sys.exit(1)
87
+ repo_name = 'MyAwesomeModel-CleanupRelease'
88
+ repo_id = f"{username}/{repo_name}"
89
+ print('Repo id:', repo_id)
90
+
91
+ # create repo if needed
92
+ try:
93
+ api.create_repo(name=repo_name, token=token, exist_ok=True)
94
+ print('Created or found repo')
95
+ except Exception as e:
96
+ print('create_repo error (continuing if exists):', e)
97
+
98
+ # upload folder
99
+ print('Uploading folder... this may take a while')
100
+ upload_folder(
101
+ repo_id=repo_id,
102
+ repo_type='model',
103
+ local_dir=root,
104
+ path_in_repo='.',
105
+ token=token,
106
+ ignore_patterns=['*.swp']
107
+ )
108
+ print('Upload finished')
109
+
110
+ # verify
111
+ files = api.list_repo_files(repo_id=repo_id, token=token)
112
+ print('Repo files count:', len(files))
113
+ # check .swp absence
114
+ swp_present = any(f.endswith('.swp') for f in files)
115
+ print('.swp present in repo files?', swp_present)
116
+
117
+ # For each checkpoints config, download and compare bytes
118
+ mismatches_repo = []
119
+ for p in config_paths:
120
+ # compute repo path relative
121
+ rel = os.path.relpath(p, root)
122
+ # hf_hub_download expects repo_id and filename
123
+ try:
124
+ local_dl = hf_hub_download(repo_id=repo_id, filename=rel, token=token, repo_type='model')
125
+ except Exception as e:
126
+ print('Failed to download', rel, 'err', e)
127
+ mismatches_repo.append(rel)
128
+ continue
129
+ with open(local_dl, 'rb') as f:
130
+ b = f.read()
131
+ if b != canonical_bytes:
132
+ mismatches_repo.append(rel)
133
+
134
+ print('Mismatched files in repo:', mismatches_repo)
135
+
136
+ # Output summary
137
+ print('SUMMARY')
138
+ print('canonical_bytes_count=', count)
139
+ print('rewritten=', rewritten)
140
+ print('swap_removed=', not os.path.exists(swap))
141
+ print('swp_present_in_repo=', swp_present)
142
+ print('mismatched_in_repo=', mismatches_repo)
.python_tmp/54bed8a4-2e49-4458-983f-45b2d078a179.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import HfApi, upload_folder, hf_hub_download
2
+ import os
3
+ root = os.getcwd()
4
+ with open('hf_token.txt') as f:
5
+ token = f.read().strip()
6
+ api = HfApi()
7
+ who = api.whoami(token=token)
8
+ username = who.get('name') or who.get('login')
9
+ repo_name = 'MyAwesomeModel-CleanupRelease'
10
+ repo_id = f"{username}/{repo_name}"
11
+
12
+ # ensure repo
13
+ api.create_repo(repo_id=repo_id, token=token, exist_ok=True)
14
+ print('Repo ready:', repo_id)
15
+
16
+ # upload, excluding hf_token.txt and .swp
17
+ try:
18
+ upload_folder(repo_id=repo_id, folder_path=root, path_in_repo='.', token=token, repo_type='model', ignore_patterns=['*.swp','hf_token.txt'])
19
+ print('Upload completed')
20
+ except Exception as e:
21
+ print('Upload failed:', e)
22
+
23
+ # list files
24
+ files = api.list_repo_files(repo_id=repo_id, token=token)
25
+ print('Repo files count:', len(files))
26
+ swp_files = [f for f in files if f.endswith('.swp')]
27
+ print('.swp files in repo:', swp_files)
28
+
29
+ # load canonical bytes
30
+ canon_path = os.path.join(root,'checkpoints','step_100','config.json')
31
+ with open(canon_path,'rb') as f:
32
+ canonical = f.read()
33
+
34
+ # verify each config
35
+ mismatches = []
36
+ for dirpath, dirnames, filenames in os.walk(os.path.join(root,'checkpoints')):
37
+ for fn in filenames:
38
+ if fn == 'config.json':
39
+ local_p = os.path.join(dirpath, fn)
40
+ rel = os.path.relpath(local_p, root)
41
+ try:
42
+ dl = hf_hub_download(repo_id=repo_id, filename=rel, token=token, repo_type='model')
43
+ except Exception as e:
44
+ mismatches.append((rel, 'download_error', str(e)))
45
+ continue
46
+ with open(dl,'rb') as f:
47
+ b = f.read()
48
+ if b != canonical:
49
+ mismatches.append((rel, 'byte_mismatch', len(b)))
50
+
51
+ print('mismatches after upload:', mismatches)
.python_tmp/bf6a9154-6953-4e1f-85eb-c9a5fb22d8d2.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import inspect
2
+ from huggingface_hub import upload_folder, HfApi
3
+ print('upload_folder signature:')
4
+ print(inspect.signature(upload_folder))
5
+ print('\nHfApi.create_repo signature:')
6
+ print(inspect.signature(HfApi.create_repo))
7
+ print('\nHfApi.upload_folder exists?', hasattr(HfApi, 'upload_folder'))
8
+ if hasattr(HfApi, 'upload_folder'):
9
+ print('HfApi.upload_folder signature:', inspect.signature(HfApi.upload_folder))
checkpoints/step_200/config.json CHANGED
@@ -1,6 +1,4 @@
1
  {
2
  "model_type": "bert",
3
- "architectures": [
4
- "BertModel"
5
- ]
6
- }
 
1
  {
2
  "model_type": "bert",
3
+ "architectures": ["BertModel"]
4
+ }
 
 
checkpoints/step_300/config.json CHANGED
@@ -1,6 +1,4 @@
1
  {
2
  "model_type": "bert",
3
- "architectures": [
4
- "BertModel"
5
- ]
6
- }
 
1
  {
2
  "model_type": "bert",
3
+ "architectures": ["BertModel"]
4
+ }