from transformers import AutoModelForCausalLM, AutoTokenizer model_path = "final_model" print("Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained( model_path, trust_remote_code=True ) print("Loading model...") model = AutoModelForCausalLM.from_pretrained( model_path, trust_remote_code=True ) # 🔥 FIXES model = model.float() model.config.num_hidden_layers = getattr(model.config, "n_layer", 12) model.config.is_encoder_decoder = False prompt = "Write a Python function for binary search" inputs = tokenizer(prompt, return_tensors="pt") print("Generating...") output = model.generate( **inputs, max_new_tokens=200, temperature=0.7, do_sample=True ) print("\n=== OUTPUT ===\n") print(tokenizer.decode(output[0], skip_special_tokens=True))