File size: 2,836 Bytes
d6e97b5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
def evaluate_model(model_path, model_name="Current Model"):
"""Comprehensive model evaluation"""
print(f"\n🧮 EVALUATING: {model_name}")
print("=" * 50)
try:
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path)
except Exception as e:
print(f"❌ Could not load model: {e}")
return
# Test cases with expected themes
test_cases = [
{"prompt": "Describe this image: a dog", "expected": "animal"},
{"prompt": "Describe this image: a city at night", "expected": "urban"},
{"prompt": "Describe this image: a beach with", "expected": "coastal"},
{"prompt": "Describe this image: a person riding a", "expected": "action"},
{"prompt": "Describe this image: food on a table", "expected": "culinary"}
]
print("📝 Generation Test:")
for i, case in enumerate(test_cases, 1):
inputs = tokenizer(case["prompt"], return_tensors="pt")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_length=60,
num_return_sequences=1,
pad_token_id=tokenizer.eos_token_id,
temperature=0.7,
do_sample=True,
no_repeat_ngram_size=2
)
full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
generated = full_text[len(case["prompt"]):].strip()
print(f"{i}. Prompt: {case['prompt']}")
print(f" Generated: {generated}")
print(f" Expected theme: {case['expected']}")
print()
# Test coherence with longer prompts
print("🔍 Coherence Test:")
coherence_prompts = [
"Describe this image: a wedding ceremony with",
"Describe this image: a sports competition in",
"Describe this image: a natural disaster showing"
]
for prompt in coherence_prompts:
inputs = tokenizer(prompt, return_tensors="pt")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_length=80,
num_return_sequences=1,
pad_token_id=tokenizer.eos_token_id,
temperature=0.6
)
result = tokenizer.decode(outputs[0], skip_special_tokens=True)
continuation = result[len(prompt):].strip()
print(f" '{prompt}' → '{continuation}'")
# Evaluate current model
evaluate_model("./outputs/first_run", "Phase 1 Model (124M)")
print("\n🎯 NEXT: Run 'python training/scale_up_phase.py' to train a larger model!")
print("💡 This will use 5K examples and a 355M parameter model for better quality!")
|