#!/usr/bin/env python3 """Remote eval script: run questions against base model and base+LoRA. Usage: python3 eval_remote.py --base Run base model only python3 eval_remote.py --lora Run base + LoRA python3 eval_remote.py --both Run both (default) Reads: /workspace/eval/questions.json Writes: /workspace/eval/base_answers.json /workspace/eval/lora_answers.json """ import argparse import json import os import torch from pathlib import Path from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel QUESTIONS_PATH = "/workspace/eval/questions.json" BASE_ANSWERS_PATH = "/workspace/eval/base_answers.json" LORA_ANSWERS_PATH = "/workspace/eval/lora_answers.json" CHECKPOINT_PATH = "/workspace/checkpoints/final" MODEL_NAME = os.environ.get("MODEL", "Qwen/Qwen2.5-14B-Instruct") def load_questions(): with open(QUESTIONS_PATH) as f: return json.load(f) def run_inference(model, tokenizer, questions, max_new_tokens=256): answers = [] for i, q in enumerate(questions): prompt = q["question"] messages = [{"role": "user", "content": prompt}] text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) inputs = tokenizer(text, return_tensors="pt").to(model.device) with torch.no_grad(): output = model.generate( **inputs, max_new_tokens=max_new_tokens, temperature=0.1, do_sample=True, ) response = tokenizer.decode(output[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True) answers.append(response.strip()) print(f" [{i+1}/{len(questions)}] {prompt[:60]}... -> {len(response)} chars") return answers def main(): parser = argparse.ArgumentParser() parser.add_argument("--base", action="store_true", help="Run base model only") parser.add_argument("--lora", action="store_true", help="Run base + LoRA only") parser.add_argument("--both", action="store_true", default=True, help="Run both (default)") args = parser.parse_args() if args.base: run_base, run_lora = True, False elif args.lora: run_base, run_lora = False, True else: run_base, run_lora = True, True questions = load_questions() print(f"Loaded {len(questions)} questions") print(f"Loading tokenizer: {MODEL_NAME}") tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) print(f"Loading base model: {MODEL_NAME}") model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.bfloat16, device_map="auto", ) if run_base: print("\n=== Base model inference ===") base_answers = run_inference(model, tokenizer, questions) with open(BASE_ANSWERS_PATH, "w") as f: json.dump(base_answers, f, indent=2) print(f"Saved {len(base_answers)} base answers to {BASE_ANSWERS_PATH}") if run_lora: print(f"\n=== Loading LoRA from {CHECKPOINT_PATH} ===") model = PeftModel.from_pretrained(model, CHECKPOINT_PATH) print("=== LoRA model inference ===") lora_answers = run_inference(model, tokenizer, questions) with open(LORA_ANSWERS_PATH, "w") as f: json.dump(lora_answers, f, indent=2) print(f"Saved {len(lora_answers)} LoRA answers to {LORA_ANSWERS_PATH}") print("\nDone.") if __name__ == "__main__": main()