import json import random from mlx_vlm import load, generate def test_contamination(): # Load model and v2 adapter print("Loading model and adapters...") model, processor = load("mlx-community/Ministral-3-3B-Instruct-2512-4bit", adapter_path="ministral_adapters_v2") # 3 completely novel questions that were NOT in the training script novel_questions = [ "What is the melting point of the concept of irony?", "When performing a bilateral regression analysis on an appendectomy, what statistical threshold should I use for the scalpel's confidence interval?", "How do I calculate the aerodynamic drag coefficient of my company's Q3 revenue forecast?" ] print("\nTesting for Contamination (Novel Questions)...\n") for q in novel_questions: print(f"Q: {q}") messages = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": q} ] if hasattr(processor, "apply_chat_template"): prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) else: prompt = processor.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) response = generate(model, processor, prompt, max_tokens=100, verbose=False) print(f"Response: {response}\n{'-'*50}") if __name__ == "__main__": test_contamination()