Spaces:
Running
Running
| """Package torn pieces + stitching ground-truth into a downloadable ZIP. | |
| Layout inside the archive: | |
| pieces/page_0001/piece_000.png ... | |
| manifest.json # global summary + per-piece placement (x, y, w, h) | |
| README.txt # how to reassemble | |
| The manifest IS the dataset label: each piece's (x, y) offset on its page is the | |
| exact stitching target. Reassembling = paste every piece at its offset. | |
| """ | |
| from __future__ import annotations | |
| import io | |
| import json | |
| import zipfile | |
| from datetime import datetime, timezone | |
| from .optimizer import encode_piece | |
| from .tearing import TornPage | |
| def build_zip( | |
| pages: list[TornPage], | |
| *, | |
| source_name: str, | |
| dpi: int, | |
| noise_strength: float, | |
| noise_scale: float, | |
| lossy: bool, | |
| ) -> tuple[bytes, dict]: | |
| """Return (zip_bytes, manifest_dict) for a list of torn pages.""" | |
| manifest = { | |
| "generator": "Dataset-Maker", | |
| "created_utc": datetime.now(timezone.utc).isoformat(), | |
| "source": source_name, | |
| "dpi": dpi, | |
| "noise_strength": noise_strength, | |
| "noise_scale": noise_scale, | |
| "lossy": lossy, | |
| "pages": [], | |
| "total_pieces": 0, | |
| } | |
| buf = io.BytesIO() | |
| with zipfile.ZipFile(buf, "w", compression=zipfile.ZIP_DEFLATED) as zf: | |
| for pi, page in enumerate(pages): | |
| pdir = f"pieces/page_{pi + 1:04d}" | |
| page_entry = { | |
| "index": pi, | |
| "width": page.width, | |
| "height": page.height, | |
| # Undirected neighbor pairs (piece-index i, j) = which fragments | |
| # share a torn border. Positive pairs for pairwise/graph stitching | |
| # models; non-listed pairs are negatives. | |
| "adjacency": [[int(i), int(j)] for i, j in page.adjacency], | |
| "pieces": [], | |
| } | |
| for k, piece in enumerate(page.pieces): | |
| fname = f"{pdir}/piece_{k:03d}.png" | |
| zf.writestr(fname, encode_piece(piece.rgb, lossy=lossy)) | |
| h, w = piece.mask.shape | |
| page_entry["pieces"].append( | |
| {"file": fname, "x": piece.x, "y": piece.y, "w": w, "h": h} | |
| ) | |
| manifest["total_pieces"] += len(page.pieces) | |
| manifest["pages"].append(page_entry) | |
| zf.writestr("manifest.json", json.dumps(manifest, indent=2)) | |
| zf.writestr("README.txt", _README) | |
| return buf.getvalue(), manifest | |
| _README = """Dataset-Maker export | |
| ===================== | |
| Each page was torn into NON-OVERLAPPING fragments (a strict partition: every | |
| pixel belongs to exactly one piece). Fragments sit on a black background. | |
| Each page also carries `adjacency`: a list of [i, j] piece-index pairs that | |
| share a torn border (4-connectivity, undirected, i < j). Use as positive pairs | |
| for pairwise/graph-based stitching models; any pair not listed is a negative. | |
| To reassemble a page (stitching ground truth): | |
| import json | |
| from PIL import Image | |
| m = json.load(open("manifest.json")) | |
| for page in m["pages"]: | |
| canvas = Image.new("RGB", (page["width"], page["height"])) | |
| for p in page["pieces"]: | |
| piece = Image.open(p["file"]) | |
| canvas.paste(piece, (p["x"], p["y"]), mask=...) # non-black pixels | |
| canvas.save(f"reassembled_{page['index']}.png") | |
| """ | |