aviation-lora / eval_remote.py
ziksy's picture
Upload eval_remote.py with huggingface_hub
363794c verified
#!/usr/bin/env python3
"""Remote eval script: run questions against base model and base+LoRA.
Usage:
python3 eval_remote.py --base Run base model only
python3 eval_remote.py --lora Run base + LoRA
python3 eval_remote.py --both Run both (default)
Reads: /workspace/eval/questions.json
Writes: /workspace/eval/base_answers.json
/workspace/eval/lora_answers.json
"""
import argparse
import json
import os
import torch
from pathlib import Path
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
QUESTIONS_PATH = "/workspace/eval/questions.json"
BASE_ANSWERS_PATH = "/workspace/eval/base_answers.json"
LORA_ANSWERS_PATH = "/workspace/eval/lora_answers.json"
CHECKPOINT_PATH = "/workspace/checkpoints/final"
MODEL_NAME = os.environ.get("MODEL", "Qwen/Qwen2.5-14B-Instruct")
def load_questions():
with open(QUESTIONS_PATH) as f:
return json.load(f)
def run_inference(model, tokenizer, questions, max_new_tokens=256):
answers = []
for i, q in enumerate(questions):
prompt = q["question"]
messages = [{"role": "user", "content": prompt}]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(text, return_tensors="pt").to(model.device)
with torch.no_grad():
output = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
temperature=0.1,
do_sample=True,
)
response = tokenizer.decode(output[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
answers.append(response.strip())
print(f" [{i+1}/{len(questions)}] {prompt[:60]}... -> {len(response)} chars")
return answers
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--base", action="store_true", help="Run base model only")
parser.add_argument("--lora", action="store_true", help="Run base + LoRA only")
parser.add_argument("--both", action="store_true", default=True, help="Run both (default)")
args = parser.parse_args()
if args.base:
run_base, run_lora = True, False
elif args.lora:
run_base, run_lora = False, True
else:
run_base, run_lora = True, True
questions = load_questions()
print(f"Loaded {len(questions)} questions")
print(f"Loading tokenizer: {MODEL_NAME}")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
print(f"Loading base model: {MODEL_NAME}")
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.bfloat16,
device_map="auto",
)
if run_base:
print("\n=== Base model inference ===")
base_answers = run_inference(model, tokenizer, questions)
with open(BASE_ANSWERS_PATH, "w") as f:
json.dump(base_answers, f, indent=2)
print(f"Saved {len(base_answers)} base answers to {BASE_ANSWERS_PATH}")
if run_lora:
print(f"\n=== Loading LoRA from {CHECKPOINT_PATH} ===")
model = PeftModel.from_pretrained(model, CHECKPOINT_PATH)
print("=== LoRA model inference ===")
lora_answers = run_inference(model, tokenizer, questions)
with open(LORA_ANSWERS_PATH, "w") as f:
json.dump(lora_answers, f, indent=2)
print(f"Saved {len(lora_answers)} LoRA answers to {LORA_ANSWERS_PATH}")
print("\nDone.")
if __name__ == "__main__":
main()