JiRack_empty / source_jit /tools_retrace_to_cuda.py

Upload 16 files

c88fe21 verified 3 months ago

4.43 kB

	# Copyright (c) 2025 CMS Manhattan
	# All rights reserved.
	# Author: Konstantin Vladimirovich Grabko
	# Email: grabko@cmsmanhattan.com
	# Phone: +1(516)777-0945
	#
	# This program is free software: you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation, version 3 of the License.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program. If not, see <https://www.gnu.org/licenses/>.
	#
	# Additional terms:
	# Any commercial use or distribution of this software or derivative works
	# requires explicit written permission from the copyright holder.

	import argparse
	import torch
	from pathlib import Path
	import importlib
	import sys

	parser = argparse.ArgumentParser()
	parser.add_argument("--jit", required=True, help="Path to existing JIT model (used to extract state_dict)")
	parser.add_argument("--out", required=True, help="Output path for new JIT model on CUDA")
	parser.add_argument("--py_module", required=False, help="Python import path for model (e.g. jirackkit.src.main.python.gpt2_jit.JiRack_H4_L2_V50257_D768_MSL8192_FF768x4)", default=None)
	parser.add_argument("--class_name", required=False, help="Name of model class in module", default=None)
	parser.add_argument("--seq_len", type=int, default=8, help="Sequence length for example input (short is fine for trace)")
	parser.add_argument("--vocab_size", type=int, default=50257, help="Vocab size for dummy input")
	parser.add_argument("--use_script", action="store_true", help="Use torch.jit.script instead of trace (requires model to be scriptable)")
	args = parser.parse_args()

	jit_path = Path(args.jit)
	out_path = Path(args.out)
	if not jit_path.exists():
	print("JIT file not found:", jit_path)
	sys.exit(1)

	# 1) load state_dict from existing JIT (safe: load on cpu)
	print("Loading state_dict from existing JIT (cpu)...")
	jit = torch.jit.load(str(jit_path), map_location='cpu')
	try:
	sd = jit.state_dict()
	print("state_dict keys:", list(sd.keys())[:10], "...")
	except Exception as e:
	print("Failed to obtain state_dict() from JIT:", e)
	sd = None

	# 2) Import python module & create model instance
	if args.py_module is None or args.class_name is None:
	print("ERROR: You must provide --py_module and --class_name to reconstruct the Python model.")
	print("Example: --py_module jirackkit.src.main.python.gpt2_jit.JiRack_H4_L2_V50257_D768_MSL8192_FF768x4 --class_name GPTPyTorch")
	sys.exit(1)

	print("Importing Python model:", args.py_module, args.class_name)
	module = importlib.import_module(args.py_module)
	ModelClass = getattr(module, args.class_name)

	# NOTE: Provide the correct constructor args for your model here if needed.
	MODEL_KWARGS = {} # <-- EDIT if your model constructor requires arguments

	print("Instantiating Python model...")
	model = ModelClass(**MODEL_KWARGS)

	# 3) load weights if available
	if sd is not None:
	try:
	model.load_state_dict(sd)
	print("Weights loaded into Python model from JIT.state_dict().")
	except Exception as e:
	print("Failed to load state_dict into Python model:", e)
	print("You may need to adapt keys or load partial weights. Exiting.")
	sys.exit(1)

	# 4) move to cuda
	if not torch.cuda.is_available():
	print("CUDA not available on this machine. Aborting.")
	sys.exit(1)
	device = torch.device('cuda:0')
	model.to(device)
	model.eval()

	# 5) prepare example input on CUDA (batch=1)
	seq_len = args.seq_len
	vocab = args.vocab_size
	example_input = torch.randint(0, vocab, (1, seq_len), dtype=torch.long, device=device)

	# 6) trace or script
	print("Tracing/script-model on CUDA. This will produce a JIT module whose constants are on CUDA.")
	if args.use_script:
	print("Using torch.jit.script...")
	scripted = torch.jit.script(model)
	else:
	print("Using torch.jit.trace with example input of shape", example_input.shape)
	scripted = torch.jit.trace(model, example_input)

	# 7) save
	out_path.parent.mkdir(parents=True, exist_ok=True)
	scripted.save(str(out_path))
	print("Saved new JIT (CUDA) model to:", out_path)
	print("Done. Replace your old model file with this one (keep backup).")