File size: 1,379 Bytes
09b2c2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import argparse
from pathlib import Path

import torch
from huggingface_hub import hf_hub_download

from configuration_f2p_decoder import F2PDecoderConfig
from modeling_f2p_decoder import F2PDecoderModel


def convert(output_dir: str) -> None:
    output_path = Path(output_dir)
    checkpoint_path = hf_hub_download("nyu-visionx/siglip2_decoder", "model.pt")
    state_dict = torch.load(checkpoint_path, map_location="cpu")
    state_dict = {f"decoder.{key}": value for key, value in state_dict.items()}

    config = F2PDecoderConfig()
    model = F2PDecoderModel(config)
    missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
    unexpected_keys = [key for key in unexpected_keys if key]
    missing_keys = [
        key for key in missing_keys if key not in {"image_mean", "image_std"}
    ]
    if missing_keys or unexpected_keys:
        raise RuntimeError(
            "Checkpoint conversion mismatch: "
            f"missing={missing_keys}, unexpected={unexpected_keys}"
        )

    model.save_pretrained(output_path, safe_serialization=True)
    print(f"Saved Hugging Face artifact to {output_path}")


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--output_dir", default="hf_artifacts/f2p_decoder")
    args = parser.parse_args()
    convert(args.output_dir)


if __name__ == "__main__":
    main()