| |
| """Re-encode unified-clip images from PNG to JPEG@95 in place (reclaims disk). |
| |
| python scripts/reencode_jpeg.py --root data/unified/av2 --workers 20 |
| """ |
|
|
| import argparse, glob, os |
| from multiprocessing import Pool |
|
|
| import imageio.v2 as imageio |
|
|
|
|
| def _conv(png): |
| try: |
| arr = imageio.imread(png) |
| jpg = png[:-4] + ".jpg" |
| imageio.imwrite(jpg, arr, quality=95) |
| os.remove(png) |
| return os.path.getsize(jpg) |
| except Exception as e: |
| return f"ERR {png}: {e}" |
|
|
|
|
| def main(): |
| ap = argparse.ArgumentParser() |
| ap.add_argument("--root", required=True) |
| ap.add_argument("--workers", type=int, default=20) |
| args = ap.parse_args() |
| pngs = glob.glob(os.path.join(args.root, "**", "images", "*.png"), recursive=True) |
| print(f"re-encoding {len(pngs)} PNGs under {args.root} with {args.workers} workers ...", flush=True) |
| errs = 0 |
| with Pool(args.workers) as pool: |
| for i, r in enumerate(pool.imap_unordered(_conv, pngs, chunksize=64)): |
| if isinstance(r, str): |
| errs += 1 |
| if errs <= 5: |
| print(r, flush=True) |
| if (i + 1) % 20000 == 0: |
| print(f" {i+1}/{len(pngs)}", flush=True) |
| print(f"done: {len(pngs)-errs} converted, {errs} errors", flush=True) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|