|
|
|
|
|
""" |
|
|
Script to reconstruct the original model file from shards |
|
|
""" |
|
|
import json |
|
|
import hashlib |
|
|
from pathlib import Path |
|
|
|
|
|
def reconstruct_file(shards_dir="."): |
|
|
shards_dir = Path(shards_dir) |
|
|
|
|
|
|
|
|
metadata_files = list(shards_dir.glob("*.shards.json")) |
|
|
if not metadata_files: |
|
|
print("Error: No shards metadata file found") |
|
|
return False |
|
|
|
|
|
metadata_path = metadata_files[0] |
|
|
print(f"Loading metadata: {metadata_path}") |
|
|
|
|
|
with open(metadata_path, 'r') as f: |
|
|
metadata = json.load(f) |
|
|
|
|
|
output_file = metadata["original_file"] |
|
|
print(f"Reconstructing: {output_file}") |
|
|
print(f" Expected size: {metadata['file_size'] / (1024**3):.2f} GB") |
|
|
print(f" Number of shards: {metadata['num_shards']}") |
|
|
|
|
|
with open(output_file, 'wb') as f_out: |
|
|
for shard_info in metadata["shards"]: |
|
|
shard_path = shards_dir / shard_info["filename"] |
|
|
print(f" Processing shard {shard_info['index'] + 1}/{metadata['num_shards']}: {shard_info['filename']}") |
|
|
|
|
|
if not shard_path.exists(): |
|
|
print(f"Error: Shard not found: {shard_path}") |
|
|
return False |
|
|
|
|
|
|
|
|
with open(shard_path, 'rb') as f_in: |
|
|
chunk_data = f_in.read() |
|
|
|
|
|
|
|
|
chunk_hash = hashlib.sha256(chunk_data).hexdigest() |
|
|
if chunk_hash != shard_info["sha256"]: |
|
|
print(f"Error: Hash mismatch for {shard_info['filename']}") |
|
|
print(f" Expected: {shard_info['sha256']}") |
|
|
print(f" Got: {chunk_hash}") |
|
|
return False |
|
|
|
|
|
|
|
|
f_out.write(chunk_data) |
|
|
|
|
|
print(f"\n✓ Reconstruction complete: {output_file}") |
|
|
return True |
|
|
|
|
|
if __name__ == "__main__": |
|
|
import sys |
|
|
shards_dir = sys.argv[1] if len(sys.argv) > 1 else "." |
|
|
success = reconstruct_file(shards_dir) |
|
|
exit(0 if success else 1) |
|
|
|