ziksy
/

aviation-lora

Model card Files Files and versions

aviation-lora / eval_remote.py

ziksy's picture

Upload eval_remote.py with huggingface_hub

363794c verified about 2 months ago

history blame contribute delete

3.53 kB

	#!/usr/bin/env python3
	"""Remote eval script: run questions against base model and base+LoRA.

	Usage:
	python3 eval_remote.py --base Run base model only
	python3 eval_remote.py --lora Run base + LoRA
	python3 eval_remote.py --both Run both (default)

	Reads: /workspace/eval/questions.json
	Writes: /workspace/eval/base_answers.json
	/workspace/eval/lora_answers.json
	"""

	import argparse
	import json
	import os
	import torch
	from pathlib import Path
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from peft import PeftModel

	QUESTIONS_PATH = "/workspace/eval/questions.json"
	BASE_ANSWERS_PATH = "/workspace/eval/base_answers.json"
	LORA_ANSWERS_PATH = "/workspace/eval/lora_answers.json"
	CHECKPOINT_PATH = "/workspace/checkpoints/final"
	MODEL_NAME = os.environ.get("MODEL", "Qwen/Qwen2.5-14B-Instruct")


	def load_questions():
	with open(QUESTIONS_PATH) as f:
	return json.load(f)


	def run_inference(model, tokenizer, questions, max_new_tokens=256):
	answers = []
	for i, q in enumerate(questions):
	prompt = q["question"]
	messages = [{"role": "user", "content": prompt}]
	text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	inputs = tokenizer(text, return_tensors="pt").to(model.device)

	with torch.no_grad():
	output = model.generate(
	**inputs,
	max_new_tokens=max_new_tokens,
	temperature=0.1,
	do_sample=True,
	)

	response = tokenizer.decode(output[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
	answers.append(response.strip())
	print(f" [{i+1}/{len(questions)}] {prompt[:60]}... -> {len(response)} chars")

	return answers


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument("--base", action="store_true", help="Run base model only")
	parser.add_argument("--lora", action="store_true", help="Run base + LoRA only")
	parser.add_argument("--both", action="store_true", default=True, help="Run both (default)")
	args = parser.parse_args()

	if args.base:
	run_base, run_lora = True, False
	elif args.lora:
	run_base, run_lora = False, True
	else:
	run_base, run_lora = True, True

	questions = load_questions()
	print(f"Loaded {len(questions)} questions")

	print(f"Loading tokenizer: {MODEL_NAME}")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)

	print(f"Loading base model: {MODEL_NAME}")
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	)

	if run_base:
	print("\n=== Base model inference ===")
	base_answers = run_inference(model, tokenizer, questions)
	with open(BASE_ANSWERS_PATH, "w") as f:
	json.dump(base_answers, f, indent=2)
	print(f"Saved {len(base_answers)} base answers to {BASE_ANSWERS_PATH}")

	if run_lora:
	print(f"\n=== Loading LoRA from {CHECKPOINT_PATH} ===")
	model = PeftModel.from_pretrained(model, CHECKPOINT_PATH)
	print("=== LoRA model inference ===")
	lora_answers = run_inference(model, tokenizer, questions)
	with open(LORA_ANSWERS_PATH, "w") as f:
	json.dump(lora_answers, f, indent=2)
	print(f"Saved {len(lora_answers)} LoRA answers to {LORA_ANSWERS_PATH}")

	print("\nDone.")


	if __name__ == "__main__":
	main()