Spaces:
Sleeping
Sleeping
File size: 1,476 Bytes
06e7bdc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | import json
import random
from mlx_vlm import load, generate
def test_contamination():
# Load model and v2 adapter
print("Loading model and adapters...")
model, processor = load("mlx-community/Ministral-3-3B-Instruct-2512-4bit", adapter_path="ministral_adapters_v2")
# 3 completely novel questions that were NOT in the training script
novel_questions = [
"What is the melting point of the concept of irony?",
"When performing a bilateral regression analysis on an appendectomy, what statistical threshold should I use for the scalpel's confidence interval?",
"How do I calculate the aerodynamic drag coefficient of my company's Q3 revenue forecast?"
]
print("\nTesting for Contamination (Novel Questions)...\n")
for q in novel_questions:
print(f"Q: {q}")
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": q}
]
if hasattr(processor, "apply_chat_template"):
prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
else:
prompt = processor.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
response = generate(model, processor, prompt, max_tokens=100, verbose=False)
print(f"Response: {response}\n{'-'*50}")
if __name__ == "__main__":
test_contamination()
|