| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | import argparse |
| | import torch |
| | from pathlib import Path |
| | import importlib |
| | import sys |
| |
|
| | parser = argparse.ArgumentParser() |
| | parser.add_argument("--jit", required=True, help="Path to existing JIT model (used to extract state_dict)") |
| | parser.add_argument("--out", required=True, help="Output path for new JIT model on CUDA") |
| | parser.add_argument("--py_module", required=False, help="Python import path for model (e.g. jirackkit.src.main.python.gpt2_jit.JiRack_H4_L2_V50257_D768_MSL8192_FF768x4)", default=None) |
| | parser.add_argument("--class_name", required=False, help="Name of model class in module", default=None) |
| | parser.add_argument("--seq_len", type=int, default=8, help="Sequence length for example input (short is fine for trace)") |
| | parser.add_argument("--vocab_size", type=int, default=50257, help="Vocab size for dummy input") |
| | parser.add_argument("--use_script", action="store_true", help="Use torch.jit.script instead of trace (requires model to be scriptable)") |
| | args = parser.parse_args() |
| |
|
| | jit_path = Path(args.jit) |
| | out_path = Path(args.out) |
| | if not jit_path.exists(): |
| | print("JIT file not found:", jit_path) |
| | sys.exit(1) |
| |
|
| | |
| | print("Loading state_dict from existing JIT (cpu)...") |
| | jit = torch.jit.load(str(jit_path), map_location='cpu') |
| | try: |
| | sd = jit.state_dict() |
| | print("state_dict keys:", list(sd.keys())[:10], "...") |
| | except Exception as e: |
| | print("Failed to obtain state_dict() from JIT:", e) |
| | sd = None |
| |
|
| | |
| | if args.py_module is None or args.class_name is None: |
| | print("ERROR: You must provide --py_module and --class_name to reconstruct the Python model.") |
| | print("Example: --py_module jirackkit.src.main.python.gpt2_jit.JiRack_H4_L2_V50257_D768_MSL8192_FF768x4 --class_name GPTPyTorch") |
| | sys.exit(1) |
| |
|
| | print("Importing Python model:", args.py_module, args.class_name) |
| | module = importlib.import_module(args.py_module) |
| | ModelClass = getattr(module, args.class_name) |
| |
|
| | |
| | MODEL_KWARGS = {} |
| |
|
| | print("Instantiating Python model...") |
| | model = ModelClass(**MODEL_KWARGS) |
| |
|
| | |
| | if sd is not None: |
| | try: |
| | model.load_state_dict(sd) |
| | print("Weights loaded into Python model from JIT.state_dict().") |
| | except Exception as e: |
| | print("Failed to load state_dict into Python model:", e) |
| | print("You may need to adapt keys or load partial weights. Exiting.") |
| | sys.exit(1) |
| |
|
| | |
| | if not torch.cuda.is_available(): |
| | print("CUDA not available on this machine. Aborting.") |
| | sys.exit(1) |
| | device = torch.device('cuda:0') |
| | model.to(device) |
| | model.eval() |
| |
|
| | |
| | seq_len = args.seq_len |
| | vocab = args.vocab_size |
| | example_input = torch.randint(0, vocab, (1, seq_len), dtype=torch.long, device=device) |
| |
|
| | |
| | print("Tracing/script-model on CUDA. This will produce a JIT module whose constants are on CUDA.") |
| | if args.use_script: |
| | print("Using torch.jit.script...") |
| | scripted = torch.jit.script(model) |
| | else: |
| | print("Using torch.jit.trace with example input of shape", example_input.shape) |
| | scripted = torch.jit.trace(model, example_input) |
| |
|
| | |
| | out_path.parent.mkdir(parents=True, exist_ok=True) |
| | scripted.save(str(out_path)) |
| | print("Saved new JIT (CUDA) model to:", out_path) |
| | print("Done. Replace your old model file with this one (keep backup).") |
| |
|