# Copyright (c) 2025 CMS Manhattan
# All rights reserved.
# Author: Konstantin Vladimirovich Grabko
# Email: grabko@cmsmanhattan.com
# Phone: +1(516)777-0945
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
#
# Additional terms:
# Any commercial use or distribution of this software or derivative works
# requires explicit written permission from the copyright holder.

import argparse
import torch
from pathlib import Path
import importlib
import sys

parser = argparse.ArgumentParser()
parser.add_argument("--jit", required=True, help="Path to existing JIT model (used to extract state_dict)")
parser.add_argument("--out", required=True, help="Output path for new JIT model on CUDA")
parser.add_argument("--py_module", required=False, help="Python import path for model (e.g. jirackkit.src.main.python.gpt2_jit.JiRack_H4_L2_V50257_D768_MSL8192_FF768x4)", default=None)
parser.add_argument("--class_name", required=False, help="Name of model class in module", default=None)
parser.add_argument("--seq_len", type=int, default=8, help="Sequence length for example input (short is fine for trace)")
parser.add_argument("--vocab_size", type=int, default=50257, help="Vocab size for dummy input")
parser.add_argument("--use_script", action="store_true", help="Use torch.jit.script instead of trace (requires model to be scriptable)")
args = parser.parse_args()

jit_path = Path(args.jit)
out_path = Path(args.out)
if not jit_path.exists():
    print("JIT file not found:", jit_path)
    sys.exit(1)

# 1) load state_dict from existing JIT (safe: load on cpu)
print("Loading state_dict from existing JIT (cpu)...")
jit = torch.jit.load(str(jit_path), map_location='cpu')
try:
    sd = jit.state_dict()
    print("state_dict keys:", list(sd.keys())[:10], "...")
except Exception as e:
    print("Failed to obtain state_dict() from JIT:", e)
    sd = None

# 2) Import python module & create model instance
if args.py_module is None or args.class_name is None:
    print("ERROR: You must provide --py_module and --class_name to reconstruct the Python model.")
    print("Example: --py_module jirackkit.src.main.python.gpt2_jit.JiRack_H4_L2_V50257_D768_MSL8192_FF768x4 --class_name GPTPyTorch")
    sys.exit(1)

print("Importing Python model:", args.py_module, args.class_name)
module = importlib.import_module(args.py_module)
ModelClass = getattr(module, args.class_name)

# NOTE: Provide the correct constructor args for your model here if needed.
MODEL_KWARGS = {}  # <-- EDIT if your model constructor requires arguments

print("Instantiating Python model...")
model = ModelClass(**MODEL_KWARGS)

# 3) load weights if available
if sd is not None:
    try:
        model.load_state_dict(sd)
        print("Weights loaded into Python model from JIT.state_dict().")
    except Exception as e:
        print("Failed to load state_dict into Python model:", e)
        print("You may need to adapt keys or load partial weights. Exiting.")
        sys.exit(1)

# 4) move to cuda
if not torch.cuda.is_available():
    print("CUDA not available on this machine. Aborting.")
    sys.exit(1)
device = torch.device('cuda:0')
model.to(device)
model.eval()

# 5) prepare example input on CUDA (batch=1)
seq_len = args.seq_len
vocab = args.vocab_size
example_input = torch.randint(0, vocab, (1, seq_len), dtype=torch.long, device=device)

# 6) trace or script
print("Tracing/script-model on CUDA. This will produce a JIT module whose constants are on CUDA.")
if args.use_script:
    print("Using torch.jit.script...")
    scripted = torch.jit.script(model)
else:
    print("Using torch.jit.trace with example input of shape", example_input.shape)
    scripted = torch.jit.trace(model, example_input)

# 7) save
out_path.parent.mkdir(parents=True, exist_ok=True)
scripted.save(str(out_path))
print("Saved new JIT (CUDA) model to:", out_path)
print("Done. Replace your old model file with this one (keep backup).")