ymlin105's picture
feat: add real-time book cover fetching and client-server architecture
ad8974a
"""
P4: Evaluation - Test zero-shot and fine-tuned model performance.
"""
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
from modelscope import snapshot_download
import json
def load_finetuned_model(base_model: str, lora_path: str):
"""Load base model + LoRA adapters."""
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
base_model,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
model = PeftModel.from_pretrained(model, lora_path)
model.eval()
return model, tokenizer
def predict(model, tokenizer, item_info: str) -> str:
"""Run inference on a single item."""
prompt = f"### Instruction:\nBased on the following item information, predict whether the user would be interested (Yes/No).\n\n### Input:\n{item_info}\n\n### Response:\n"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=10,
temperature=0.1,
do_sample=False
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract only the response part
if "### Response:" in response:
response = response.split("### Response:")[-1].strip()
return response
def evaluate(model, tokenizer, test_data: list) -> dict:
"""Evaluate model on test set."""
correct = 0
total = len(test_data)
for sample in test_data:
pred = predict(model, tokenizer, sample['input'])
expected = sample['output']
if expected.lower() in pred.lower():
correct += 1
accuracy = correct / total if total > 0 else 0
return {"accuracy": accuracy, "correct": correct, "total": total}
if __name__ == "__main__":
import sys
BASE_MODEL = snapshot_download("qwen/Qwen2-1.5B-Instruct")
LORA_PATH = "./lora_output"
print("Loading fine-tuned model...")
model, tokenizer = load_finetuned_model(BASE_MODEL, LORA_PATH)
# Load test data (use last 100 samples from training data as pseudo-test)
with open("training_data.json", 'r') as f:
all_data = json.load(f)
test_data = all_data[-100:]
print(f"Evaluating on {len(test_data)} samples...")
results = evaluate(model, tokenizer, test_data)
print(f"Results: Accuracy = {results['accuracy']*100:.1f}% ({results['correct']}/{results['total']})")