File size: 1,406 Bytes
8cf92b3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | #!/usr/bin/env python3
"""Re-encode unified-clip images from PNG to JPEG@95 in place (reclaims disk).
python scripts/reencode_jpeg.py --root data/unified/av2 --workers 20
"""
import argparse, glob, os
from multiprocessing import Pool
import imageio.v2 as imageio
def _conv(png):
try:
arr = imageio.imread(png)
jpg = png[:-4] + ".jpg"
imageio.imwrite(jpg, arr, quality=95)
os.remove(png)
return os.path.getsize(jpg)
except Exception as e: # leave the PNG if anything goes wrong
return f"ERR {png}: {e}"
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--root", required=True)
ap.add_argument("--workers", type=int, default=20)
args = ap.parse_args()
pngs = glob.glob(os.path.join(args.root, "**", "images", "*.png"), recursive=True)
print(f"re-encoding {len(pngs)} PNGs under {args.root} with {args.workers} workers ...", flush=True)
errs = 0
with Pool(args.workers) as pool:
for i, r in enumerate(pool.imap_unordered(_conv, pngs, chunksize=64)):
if isinstance(r, str):
errs += 1
if errs <= 5:
print(r, flush=True)
if (i + 1) % 20000 == 0:
print(f" {i+1}/{len(pngs)}", flush=True)
print(f"done: {len(pngs)-errs} converted, {errs} errors", flush=True)
if __name__ == "__main__":
main()
|