File size: 2,439 Bytes
5b42a0e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 | #!/usr/bin/env python3
"""
Model Comparison Example
========================
This example shows how to compare political bias across multiple LLM models.
"""
import sys
sys.path.append('..')
from run_bias_analysis import BiasAnalyzer, PrePostAnalyzer, SUPPORTED_MODELS
def compare_multiple_models():
"""Compare bias across multiple model families."""
print("=" * 60)
print("Comparing Political Bias Across Model Families")
print("=" * 60)
# Models to compare (using shorthand names)
models_to_test = [
"mistral-7b-instruct",
"llama-2-7b-chat",
# Add more models as needed
]
results = {}
for model_shorthand in models_to_test:
model_name = SUPPORTED_MODELS.get(model_shorthand, model_shorthand)
print(f"\n--- Analyzing: {model_name} ---")
analyzer = BiasAnalyzer(model_name=model_name, device="auto")
analyzer.load_model()
analyzer.load_dataset("political_compass")
metrics = analyzer.analyze(num_runs=2) # Fewer runs for quick comparison
results[model_shorthand] = {
"bias_score": metrics.get("bias_score", 0),
"leaning": metrics.get("leaning", "unknown"),
"left_sentiment": metrics.get("left_mean_sentiment", 0),
"right_sentiment": metrics.get("right_mean_sentiment", 0),
}
# Print comparison table
print("\n" + "=" * 60)
print("COMPARISON RESULTS")
print("=" * 60)
print(f"\n{'Model':<25} {'Bias Score':>12} {'Leaning':>15}")
print("-" * 55)
for model, data in results.items():
print(f"{model:<25} {data['bias_score']:>12.3f} {data['leaning']:>15}")
return results
def compare_pre_post():
"""Compare pre-training vs post-training bias."""
print("\n" + "=" * 60)
print("Pre vs Post Training Comparison")
print("=" * 60)
# Compare Llama base vs chat
analyzer = PrePostAnalyzer(
pre_model="meta-llama/Llama-2-7b-hf",
post_model="meta-llama/Llama-2-7b-chat-hf",
device="auto"
)
comparison = analyzer.compare(
dataset_path="political_compass",
num_runs=2
)
return comparison
if __name__ == "__main__":
# Run model comparison
results = compare_multiple_models()
# Optionally run pre/post comparison
# comparison = compare_pre_post()
|