Upload folder using huggingface_hub
Browse files- .python_tmp/05ef2759-9606-4dbe-8aab-d60dc9b9acec.py +61 -0
- .python_tmp/28a4b2a5-22d6-4929-a9c6-bb8ee74c9abe.py +44 -0
- .python_tmp/40c80db1-8de5-4cb2-8f64-de819548ed08.py +142 -0
- .python_tmp/54bed8a4-2e49-4458-983f-45b2d078a179.py +51 -0
- .python_tmp/bf6a9154-6953-4e1f-85eb-c9a5fb22d8d2.py +9 -0
- checkpoints/step_200/config.json +2 -4
- checkpoints/step_300/config.json +2 -4
.python_tmp/05ef2759-9606-4dbe-8aab-d60dc9b9acec.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from huggingface_hub import HfApi, hf_hub_download
|
| 2 |
+
import os, json
|
| 3 |
+
root = os.getcwd()
|
| 4 |
+
api = HfApi()
|
| 5 |
+
with open('hf_token.txt') as f:
|
| 6 |
+
token = f.read().strip()
|
| 7 |
+
who = api.whoami(token=token)
|
| 8 |
+
username = who.get('name') or who.get('login')
|
| 9 |
+
repo_id = f"{username}/MyAwesomeModel-CleanupRelease"
|
| 10 |
+
|
| 11 |
+
# canonical bytes from local step_100
|
| 12 |
+
canon_path = os.path.join(root,'checkpoints','step_100','config.json')
|
| 13 |
+
with open(canon_path,'rb') as f:
|
| 14 |
+
canonical = f.read()
|
| 15 |
+
|
| 16 |
+
# gather local config paths
|
| 17 |
+
local_configs = []
|
| 18 |
+
for dirpath, dirnames, filenames in os.walk(os.path.join(root,'checkpoints')):
|
| 19 |
+
for fn in filenames:
|
| 20 |
+
if fn=='config.json':
|
| 21 |
+
local_configs.append(os.path.join(dirpath,fn))
|
| 22 |
+
local_configs.sort()
|
| 23 |
+
|
| 24 |
+
mismatches = []
|
| 25 |
+
for p in local_configs:
|
| 26 |
+
rel = os.path.relpath(p, root)
|
| 27 |
+
try:
|
| 28 |
+
dl = hf_hub_download(repo_id=repo_id, filename=rel, token=token, repo_type='model')
|
| 29 |
+
except Exception as e:
|
| 30 |
+
mismatches.append((rel, 'download_error', str(e)))
|
| 31 |
+
continue
|
| 32 |
+
with open(dl,'rb') as f:
|
| 33 |
+
b = f.read()
|
| 34 |
+
if b != canonical:
|
| 35 |
+
mismatches.append((rel, 'byte_mismatch', len(b)))
|
| 36 |
+
|
| 37 |
+
# check repo .swp
|
| 38 |
+
files = api.list_repo_files(repo_id=repo_id, token=token)
|
| 39 |
+
swp_files = [f for f in files if f.endswith('.swp')]
|
| 40 |
+
|
| 41 |
+
# check local swap removed
|
| 42 |
+
swap_local = os.path.join(root,'evaluation','.setup.py.swp')
|
| 43 |
+
local_swap_exists = os.path.exists(swap_local)
|
| 44 |
+
|
| 45 |
+
print('local_swap_exists', local_swap_exists)
|
| 46 |
+
print('repo_swp_files', swp_files)
|
| 47 |
+
print('mismatches', mismatches)
|
| 48 |
+
|
| 49 |
+
# show canonical formatting details
|
| 50 |
+
s = canonical.decode('utf-8')
|
| 51 |
+
lines = s.splitlines()
|
| 52 |
+
print('Canonical raw repr:', repr(canonical))
|
| 53 |
+
print('Canonical text:\n', s)
|
| 54 |
+
print('Line count', len(lines))
|
| 55 |
+
# determine indentation by looking at second line leading spaces
|
| 56 |
+
if len(lines)>=2:
|
| 57 |
+
import re
|
| 58 |
+
m = re.match(r"^(\s*)", lines[1])
|
| 59 |
+
print('Indentation chars on line 2:', repr(m.group(1)))
|
| 60 |
+
# trailing newline?
|
| 61 |
+
print('Ends with newline?', canonical.endswith(b'\n'))
|
.python_tmp/28a4b2a5-22d6-4929-a9c6-bb8ee74c9abe.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from huggingface_hub import HfApi, upload_folder, hf_hub_download
|
| 2 |
+
import os
|
| 3 |
+
root = os.getcwd()
|
| 4 |
+
with open('hf_token.txt') as f:
|
| 5 |
+
token = f.read().strip()
|
| 6 |
+
api = HfApi()
|
| 7 |
+
who = api.whoami(token=token)
|
| 8 |
+
username = who.get('name') or who.get('login')
|
| 9 |
+
repo_name = 'MyAwesomeModel-CleanupRelease'
|
| 10 |
+
repo_id = f"{username}/{repo_name}"
|
| 11 |
+
# create repo with correct signature
|
| 12 |
+
try:
|
| 13 |
+
api.create_repo(repo_id=repo_id, token=token, exist_ok=True)
|
| 14 |
+
print('create_repo OK')
|
| 15 |
+
except Exception as e:
|
| 16 |
+
print('create_repo error', e)
|
| 17 |
+
|
| 18 |
+
# upload using upload_folder signature
|
| 19 |
+
try:
|
| 20 |
+
upload_folder(repo_id=repo_id, folder_path=root, path_in_repo='.', token=token, repo_type='model', ignore_patterns=['*.swp'])
|
| 21 |
+
print('upload_folder OK')
|
| 22 |
+
except Exception as e:
|
| 23 |
+
print('upload_folder error', e)
|
| 24 |
+
|
| 25 |
+
# list files
|
| 26 |
+
try:
|
| 27 |
+
files = api.list_repo_files(repo_id=repo_id, token=token)
|
| 28 |
+
print('files count', len(files))
|
| 29 |
+
swp_files = [f for f in files if f.endswith('.swp')]
|
| 30 |
+
print('.swp files in repo:', swp_files)
|
| 31 |
+
except Exception as e:
|
| 32 |
+
print('list_repo_files error', e)
|
| 33 |
+
|
| 34 |
+
# download one config to verify
|
| 35 |
+
rel = os.path.relpath(os.path.join(root,'checkpoints','step_100','config.json'), root)
|
| 36 |
+
print('rel', rel)
|
| 37 |
+
try:
|
| 38 |
+
dl = hf_hub_download(repo_id=repo_id, filename=rel, token=token, repo_type='model')
|
| 39 |
+
print('downloaded to', dl)
|
| 40 |
+
with open(dl,'rb') as f:
|
| 41 |
+
b = f.read()
|
| 42 |
+
print('size', len(b))
|
| 43 |
+
except Exception as e:
|
| 44 |
+
print('download error', e)
|
.python_tmp/40c80db1-8de5-4cb2-8f64-de819548ed08.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, sys, json
|
| 2 |
+
from collections import Counter
|
| 3 |
+
|
| 4 |
+
root = os.getcwd()
|
| 5 |
+
# find config files
|
| 6 |
+
config_paths = []
|
| 7 |
+
for dirpath, dirnames, filenames in os.walk(os.path.join(root, 'checkpoints')):
|
| 8 |
+
for fn in filenames:
|
| 9 |
+
if fn == 'config.json':
|
| 10 |
+
config_paths.append(os.path.join(dirpath, fn))
|
| 11 |
+
config_paths.sort()
|
| 12 |
+
print('Found', len(config_paths), 'config.json files')
|
| 13 |
+
|
| 14 |
+
# read bytes
|
| 15 |
+
bytes_map = {}
|
| 16 |
+
for p in config_paths:
|
| 17 |
+
with open(p, 'rb') as f:
|
| 18 |
+
b = f.read()
|
| 19 |
+
bytes_map[p] = b
|
| 20 |
+
|
| 21 |
+
# determine most common bytes
|
| 22 |
+
counter = Counter(bytes_map.values())
|
| 23 |
+
most_common = counter.most_common(1)
|
| 24 |
+
if not most_common:
|
| 25 |
+
print('No config files found, exiting')
|
| 26 |
+
sys.exit(1)
|
| 27 |
+
canonical_bytes, count = most_common[0]
|
| 28 |
+
print('Canonical bytes occur', count, 'times out of', len(config_paths))
|
| 29 |
+
|
| 30 |
+
# show canonical as repr
|
| 31 |
+
print('Canonical bytes repr:\n', canonical_bytes)
|
| 32 |
+
|
| 33 |
+
# rewrite outliers after validating JSON content identical
|
| 34 |
+
rewritten = []
|
| 35 |
+
mismatched_content = []
|
| 36 |
+
for p, b in bytes_map.items():
|
| 37 |
+
if b != canonical_bytes:
|
| 38 |
+
# validate JSON content equality
|
| 39 |
+
try:
|
| 40 |
+
js_current = json.loads(b.decode('utf-8'))
|
| 41 |
+
js_canonical = json.loads(canonical_bytes.decode('utf-8'))
|
| 42 |
+
except Exception as e:
|
| 43 |
+
print('Failed to parse JSON for', p, 'err', e)
|
| 44 |
+
mismatched_content.append(p)
|
| 45 |
+
continue
|
| 46 |
+
if js_current != js_canonical:
|
| 47 |
+
print('JSON content differs for', p)
|
| 48 |
+
mismatched_content.append(p)
|
| 49 |
+
continue
|
| 50 |
+
# write canonical bytes
|
| 51 |
+
with open(p, 'wb') as f:
|
| 52 |
+
f.write(canonical_bytes)
|
| 53 |
+
rewritten.append(p)
|
| 54 |
+
|
| 55 |
+
print('Rewrote', len(rewritten), 'files')
|
| 56 |
+
if mismatched_content:
|
| 57 |
+
print('Files with mismatched JSON content:', mismatched_content)
|
| 58 |
+
|
| 59 |
+
# remove swap file if exists
|
| 60 |
+
swap = os.path.join(root, 'evaluation', '.setup.py.swp')
|
| 61 |
+
if os.path.exists(swap):
|
| 62 |
+
os.remove(swap)
|
| 63 |
+
print('Removed swap file', swap)
|
| 64 |
+
else:
|
| 65 |
+
print('No swap file found at', swap)
|
| 66 |
+
|
| 67 |
+
# Now upload to Hugging Face
|
| 68 |
+
try:
|
| 69 |
+
from huggingface_hub import HfApi, upload_folder, hf_hub_download
|
| 70 |
+
except Exception as e:
|
| 71 |
+
print('huggingface_hub not installed:', e)
|
| 72 |
+
raise
|
| 73 |
+
|
| 74 |
+
# read token
|
| 75 |
+
token_path = os.path.join(root, 'hf_token.txt')
|
| 76 |
+
if not os.path.exists(token_path):
|
| 77 |
+
print('hf_token.txt not found')
|
| 78 |
+
sys.exit(1)
|
| 79 |
+
with open(token_path, 'r') as f:
|
| 80 |
+
token = f.read().strip()
|
| 81 |
+
api = HfApi()
|
| 82 |
+
who = api.whoami(token=token)
|
| 83 |
+
username = who.get('name') or who.get('login')
|
| 84 |
+
if not username:
|
| 85 |
+
print('Failed to get username from token whoami:', who)
|
| 86 |
+
sys.exit(1)
|
| 87 |
+
repo_name = 'MyAwesomeModel-CleanupRelease'
|
| 88 |
+
repo_id = f"{username}/{repo_name}"
|
| 89 |
+
print('Repo id:', repo_id)
|
| 90 |
+
|
| 91 |
+
# create repo if needed
|
| 92 |
+
try:
|
| 93 |
+
api.create_repo(name=repo_name, token=token, exist_ok=True)
|
| 94 |
+
print('Created or found repo')
|
| 95 |
+
except Exception as e:
|
| 96 |
+
print('create_repo error (continuing if exists):', e)
|
| 97 |
+
|
| 98 |
+
# upload folder
|
| 99 |
+
print('Uploading folder... this may take a while')
|
| 100 |
+
upload_folder(
|
| 101 |
+
repo_id=repo_id,
|
| 102 |
+
repo_type='model',
|
| 103 |
+
local_dir=root,
|
| 104 |
+
path_in_repo='.',
|
| 105 |
+
token=token,
|
| 106 |
+
ignore_patterns=['*.swp']
|
| 107 |
+
)
|
| 108 |
+
print('Upload finished')
|
| 109 |
+
|
| 110 |
+
# verify
|
| 111 |
+
files = api.list_repo_files(repo_id=repo_id, token=token)
|
| 112 |
+
print('Repo files count:', len(files))
|
| 113 |
+
# check .swp absence
|
| 114 |
+
swp_present = any(f.endswith('.swp') for f in files)
|
| 115 |
+
print('.swp present in repo files?', swp_present)
|
| 116 |
+
|
| 117 |
+
# For each checkpoints config, download and compare bytes
|
| 118 |
+
mismatches_repo = []
|
| 119 |
+
for p in config_paths:
|
| 120 |
+
# compute repo path relative
|
| 121 |
+
rel = os.path.relpath(p, root)
|
| 122 |
+
# hf_hub_download expects repo_id and filename
|
| 123 |
+
try:
|
| 124 |
+
local_dl = hf_hub_download(repo_id=repo_id, filename=rel, token=token, repo_type='model')
|
| 125 |
+
except Exception as e:
|
| 126 |
+
print('Failed to download', rel, 'err', e)
|
| 127 |
+
mismatches_repo.append(rel)
|
| 128 |
+
continue
|
| 129 |
+
with open(local_dl, 'rb') as f:
|
| 130 |
+
b = f.read()
|
| 131 |
+
if b != canonical_bytes:
|
| 132 |
+
mismatches_repo.append(rel)
|
| 133 |
+
|
| 134 |
+
print('Mismatched files in repo:', mismatches_repo)
|
| 135 |
+
|
| 136 |
+
# Output summary
|
| 137 |
+
print('SUMMARY')
|
| 138 |
+
print('canonical_bytes_count=', count)
|
| 139 |
+
print('rewritten=', rewritten)
|
| 140 |
+
print('swap_removed=', not os.path.exists(swap))
|
| 141 |
+
print('swp_present_in_repo=', swp_present)
|
| 142 |
+
print('mismatched_in_repo=', mismatches_repo)
|
.python_tmp/54bed8a4-2e49-4458-983f-45b2d078a179.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from huggingface_hub import HfApi, upload_folder, hf_hub_download
|
| 2 |
+
import os
|
| 3 |
+
root = os.getcwd()
|
| 4 |
+
with open('hf_token.txt') as f:
|
| 5 |
+
token = f.read().strip()
|
| 6 |
+
api = HfApi()
|
| 7 |
+
who = api.whoami(token=token)
|
| 8 |
+
username = who.get('name') or who.get('login')
|
| 9 |
+
repo_name = 'MyAwesomeModel-CleanupRelease'
|
| 10 |
+
repo_id = f"{username}/{repo_name}"
|
| 11 |
+
|
| 12 |
+
# ensure repo
|
| 13 |
+
api.create_repo(repo_id=repo_id, token=token, exist_ok=True)
|
| 14 |
+
print('Repo ready:', repo_id)
|
| 15 |
+
|
| 16 |
+
# upload, excluding hf_token.txt and .swp
|
| 17 |
+
try:
|
| 18 |
+
upload_folder(repo_id=repo_id, folder_path=root, path_in_repo='.', token=token, repo_type='model', ignore_patterns=['*.swp','hf_token.txt'])
|
| 19 |
+
print('Upload completed')
|
| 20 |
+
except Exception as e:
|
| 21 |
+
print('Upload failed:', e)
|
| 22 |
+
|
| 23 |
+
# list files
|
| 24 |
+
files = api.list_repo_files(repo_id=repo_id, token=token)
|
| 25 |
+
print('Repo files count:', len(files))
|
| 26 |
+
swp_files = [f for f in files if f.endswith('.swp')]
|
| 27 |
+
print('.swp files in repo:', swp_files)
|
| 28 |
+
|
| 29 |
+
# load canonical bytes
|
| 30 |
+
canon_path = os.path.join(root,'checkpoints','step_100','config.json')
|
| 31 |
+
with open(canon_path,'rb') as f:
|
| 32 |
+
canonical = f.read()
|
| 33 |
+
|
| 34 |
+
# verify each config
|
| 35 |
+
mismatches = []
|
| 36 |
+
for dirpath, dirnames, filenames in os.walk(os.path.join(root,'checkpoints')):
|
| 37 |
+
for fn in filenames:
|
| 38 |
+
if fn == 'config.json':
|
| 39 |
+
local_p = os.path.join(dirpath, fn)
|
| 40 |
+
rel = os.path.relpath(local_p, root)
|
| 41 |
+
try:
|
| 42 |
+
dl = hf_hub_download(repo_id=repo_id, filename=rel, token=token, repo_type='model')
|
| 43 |
+
except Exception as e:
|
| 44 |
+
mismatches.append((rel, 'download_error', str(e)))
|
| 45 |
+
continue
|
| 46 |
+
with open(dl,'rb') as f:
|
| 47 |
+
b = f.read()
|
| 48 |
+
if b != canonical:
|
| 49 |
+
mismatches.append((rel, 'byte_mismatch', len(b)))
|
| 50 |
+
|
| 51 |
+
print('mismatches after upload:', mismatches)
|
.python_tmp/bf6a9154-6953-4e1f-85eb-c9a5fb22d8d2.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import inspect
|
| 2 |
+
from huggingface_hub import upload_folder, HfApi
|
| 3 |
+
print('upload_folder signature:')
|
| 4 |
+
print(inspect.signature(upload_folder))
|
| 5 |
+
print('\nHfApi.create_repo signature:')
|
| 6 |
+
print(inspect.signature(HfApi.create_repo))
|
| 7 |
+
print('\nHfApi.upload_folder exists?', hasattr(HfApi, 'upload_folder'))
|
| 8 |
+
if hasattr(HfApi, 'upload_folder'):
|
| 9 |
+
print('HfApi.upload_folder signature:', inspect.signature(HfApi.upload_folder))
|
checkpoints/step_200/config.json
CHANGED
|
@@ -1,6 +1,4 @@
|
|
| 1 |
{
|
| 2 |
"model_type": "bert",
|
| 3 |
-
"architectures": [
|
| 4 |
-
|
| 5 |
-
]
|
| 6 |
-
}
|
|
|
|
| 1 |
{
|
| 2 |
"model_type": "bert",
|
| 3 |
+
"architectures": ["BertModel"]
|
| 4 |
+
}
|
|
|
|
|
|
checkpoints/step_300/config.json
CHANGED
|
@@ -1,6 +1,4 @@
|
|
| 1 |
{
|
| 2 |
"model_type": "bert",
|
| 3 |
-
"architectures": [
|
| 4 |
-
|
| 5 |
-
]
|
| 6 |
-
}
|
|
|
|
| 1 |
{
|
| 2 |
"model_type": "bert",
|
| 3 |
+
"architectures": ["BertModel"]
|
| 4 |
+
}
|
|
|
|
|
|