CheckpointCleanup-Release / .python_tmp /cfe9ba7d-1be2-4ef9-8a9f-b4106fadfff6.py
FuryAssassin's picture
Upload folder using huggingface_hub
f09153a verified
import os, re, json, hashlib, datetime, sys
from pathlib import Path
root = Path('checkpoints')
if not root.exists():
print('checkpoints directory not found')
sys.exit(1)
step_dirs = []
for p in root.iterdir():
if p.is_dir() and re.match(r'step_\d+$', p.name):
step_dirs.append(p)
step_dirs.sort(key=lambda x: int(x.name.split('_')[1]))
print(f'Found {len(step_dirs)} step dirs')
configs = {}
raw_map = {}
for d in step_dirs:
cfg = d / 'config.json'
if cfg.exists():
try:
obj = json.load(cfg.open())
norm = json.dumps(obj, sort_keys=True, separators=(',',':'))
except Exception as e:
# fallback to raw normalized whitespace
txt = cfg.read_text()
norm = '\n'.join([line.strip() for line in txt.splitlines() if line.strip()])
h = hashlib.md5(norm.encode()).hexdigest()
configs.setdefault(h, {'norm':norm,'steps':[]})['steps'].append((int(d.name.split('_')[1]), d))
raw_map[d] = {'hash':h,'path':cfg}
else:
print(f'No config in {d}')
print('Groups:')
for h,v in configs.items():
steps_sorted = sorted(v['steps'], key=lambda x: x[0])
print(h, '->', [s for s,_ in steps_sorted])
# For each group, keep config in highest step, replace others with pointer
now = datetime.datetime.utcnow().isoformat()+'Z'
for h,v in configs.items():
steps = v['steps']
max_step, max_dir = max(steps, key=lambda x:x[0])
print(f'Canonical for hash {h} is step_{max_step}')
for s,d in steps:
cfg = d / 'config.json'
if s == max_step:
print(f'Keeping canonical config in {d}')
continue
# remove config.json but DO NOT delete pytorch_model.bin
try:
cfg.unlink()
print(f'Removed {cfg}')
except Exception as e:
print('Failed to remove', cfg, e)
# create pointer file
pointer = d / 'config_pointer.txt'
rel = os.path.relpath(max_dir / 'config.json', d)
content = f"This config was consolidated during repository cleanup on {now}.\nCanonical config retained at: {rel}\nOriginal step: step_{s}\nCanonical step: step_{max_step}\nMD5: {h}\n"
pointer.write_text(content)
print(f'Wrote pointer {pointer}')
# Update README.md: insert short note in section '## 4. How to Run Locally'
readme = Path('README.md')
if readme.exists():
txt = readme.read_text()
insert_after = '## 4. How to Run Locally'
note = '\n\n> Note: Checkpoints cleanup — duplicate config.json files across checkpoints have been consolidated. For each unique config, only the highest-numbered step retains the canonical config; other steps now contain a pointer file. No pytorch_model.bin files were removed.\n'
if insert_after in txt:
parts = txt.split(insert_after,1)
newtxt = parts[0] + insert_after + note + parts[1]
readme.write_text(newtxt)
print('Updated README.md with cleanup note')
else:
print('Could not find section to insert note; appending at end')
readme.write_text(txt + '\n\n' + note)
else:
print('README.md not found')
print('Done')