| | |
| | """ |
| | Model Comparison Example |
| | ======================== |
| | |
| | This example shows how to compare political bias across multiple LLM models. |
| | """ |
| |
|
| | import sys |
| | sys.path.append('..') |
| |
|
| | from run_bias_analysis import BiasAnalyzer, PrePostAnalyzer, SUPPORTED_MODELS |
| |
|
| |
|
| | def compare_multiple_models(): |
| | """Compare bias across multiple model families.""" |
| | |
| | print("=" * 60) |
| | print("Comparing Political Bias Across Model Families") |
| | print("=" * 60) |
| | |
| | |
| | models_to_test = [ |
| | "mistral-7b-instruct", |
| | "llama-2-7b-chat", |
| | |
| | ] |
| | |
| | results = {} |
| | |
| | for model_shorthand in models_to_test: |
| | model_name = SUPPORTED_MODELS.get(model_shorthand, model_shorthand) |
| | print(f"\n--- Analyzing: {model_name} ---") |
| | |
| | analyzer = BiasAnalyzer(model_name=model_name, device="auto") |
| | analyzer.load_model() |
| | analyzer.load_dataset("political_compass") |
| | |
| | metrics = analyzer.analyze(num_runs=2) |
| | |
| | results[model_shorthand] = { |
| | "bias_score": metrics.get("bias_score", 0), |
| | "leaning": metrics.get("leaning", "unknown"), |
| | "left_sentiment": metrics.get("left_mean_sentiment", 0), |
| | "right_sentiment": metrics.get("right_mean_sentiment", 0), |
| | } |
| | |
| | |
| | print("\n" + "=" * 60) |
| | print("COMPARISON RESULTS") |
| | print("=" * 60) |
| | |
| | print(f"\n{'Model':<25} {'Bias Score':>12} {'Leaning':>15}") |
| | print("-" * 55) |
| | |
| | for model, data in results.items(): |
| | print(f"{model:<25} {data['bias_score']:>12.3f} {data['leaning']:>15}") |
| | |
| | return results |
| |
|
| |
|
| | def compare_pre_post(): |
| | """Compare pre-training vs post-training bias.""" |
| | |
| | print("\n" + "=" * 60) |
| | print("Pre vs Post Training Comparison") |
| | print("=" * 60) |
| | |
| | |
| | analyzer = PrePostAnalyzer( |
| | pre_model="meta-llama/Llama-2-7b-hf", |
| | post_model="meta-llama/Llama-2-7b-chat-hf", |
| | device="auto" |
| | ) |
| | |
| | comparison = analyzer.compare( |
| | dataset_path="political_compass", |
| | num_runs=2 |
| | ) |
| | |
| | return comparison |
| |
|
| |
|
| | if __name__ == "__main__": |
| | |
| | results = compare_multiple_models() |
| | |
| | |
| | |
| |
|