llm-pushback / test_contamination.py
Sahil Seemant
Simplified project to local MLX only
06e7bdc
import json
import random
from mlx_vlm import load, generate
def test_contamination():
# Load model and v2 adapter
print("Loading model and adapters...")
model, processor = load("mlx-community/Ministral-3-3B-Instruct-2512-4bit", adapter_path="ministral_adapters_v2")
# 3 completely novel questions that were NOT in the training script
novel_questions = [
"What is the melting point of the concept of irony?",
"When performing a bilateral regression analysis on an appendectomy, what statistical threshold should I use for the scalpel's confidence interval?",
"How do I calculate the aerodynamic drag coefficient of my company's Q3 revenue forecast?"
]
print("\nTesting for Contamination (Novel Questions)...\n")
for q in novel_questions:
print(f"Q: {q}")
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": q}
]
if hasattr(processor, "apply_chat_template"):
prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
else:
prompt = processor.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
response = generate(model, processor, prompt, max_tokens=100, verbose=False)
print(f"Response: {response}\n{'-'*50}")
if __name__ == "__main__":
test_contamination()