| |
| """ |
| LLM Political Bias Analysis Pipeline - Main Entry Point |
| |
| This script provides a CLI for running political bias analysis on LLMs |
| using vLLM for efficient model serving. |
| |
| Usage: |
| # Start vLLM server first (in a separate terminal): |
| python -m vllm.entrypoints.openai.api_server --model mistralai/Mistral-7B-Instruct-v0.2 |
| |
| # Then run analysis: |
| python run_pipeline.py --model mistral-7b-instruct --dataset political_compass |
| |
| # Or compare pre vs post training: |
| python run_pipeline.py --pre-model llama-2-7b --post-model llama-2-7b-chat |
| |
| Author: Paris-Saclay University - Fairness in AI |
| """ |
|
|
| import argparse |
| import logging |
| import sys |
| import os |
| from pathlib import Path |
| from typing import Optional |
|
|
| |
| sys.path.insert(0, str(Path(__file__).parent)) |
|
|
| from src.pipeline import BiasAnalysisPipeline, PrePostComparisonPipeline, PipelineConfig |
| from src.llms import VLLMServer, SUPPORTED_MODELS, MODEL_METADATA |
| from src.constants import VLLM_DEFAULT_SETTINGS |
|
|
| |
| logging.basicConfig( |
| level=logging.INFO, |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' |
| ) |
| logger = logging.getLogger(__name__) |
|
|
|
|
| def print_banner(): |
| """Print welcome banner.""" |
| banner = """ |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| β LLM Political Bias Analysis Pipeline β |
| β βββββββββββββββββββββββββββββββββββββββββ β |
| β Powered by vLLM | Paris-Saclay University β |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| """ |
| print(banner) |
|
|
|
|
| def list_models(): |
| """List all supported models.""" |
| print("\nπ¦ Supported Models:\n") |
| print(f"{'Model Name':<25} {'HuggingFace ID':<45} {'Origin':<15} {'Type':<10}") |
| print("-" * 100) |
| |
| for name, hf_id in SUPPORTED_MODELS.items(): |
| metadata = MODEL_METADATA.get(name, {}) |
| origin = metadata.get("origin", "Unknown") |
| model_type = metadata.get("type", "unknown") |
| print(f"{name:<25} {hf_id:<45} {origin:<15} {model_type:<10}") |
| |
| print() |
|
|
|
|
| def run_single_model_analysis(args): |
| """Run analysis on a single model.""" |
| |
| config = PipelineConfig( |
| model_name=args.model, |
| api_base=args.api_base, |
| max_tokens=args.max_tokens, |
| temperature=args.temperature, |
| num_runs=args.num_runs, |
| output_dir=args.output, |
| sentiment_method=args.sentiment_method, |
| ) |
| |
| pipeline = BiasAnalysisPipeline(config) |
| |
| |
| if args.dataset: |
| pipeline.load_dataset(args.dataset) |
| else: |
| pipeline.load_dataset("political_compass") |
| |
| |
| logger.info(f"Running analysis on model: {args.model}") |
| results = pipeline.run(progress_bar=True) |
| |
| |
| pipeline.print_summary() |
| |
| |
| if args.save: |
| json_path, csv_path = pipeline.save_results() |
| print(f"\nπ Results saved to:") |
| print(f" - {json_path}") |
| print(f" - {csv_path}") |
| |
| return pipeline |
|
|
|
|
| def run_comparison_analysis(args): |
| """Run pre vs post training comparison.""" |
| |
| logger.info(f"Running comparison: {args.pre_model} vs {args.post_model}") |
| |
| comparison = PrePostComparisonPipeline( |
| pre_model=args.pre_model, |
| post_model=args.post_model, |
| api_base=args.api_base, |
| num_runs=args.num_runs, |
| output_dir=args.output, |
| ) |
| |
| |
| if args.dataset: |
| comparison.pre_pipeline.load_dataset(args.dataset) |
| comparison.post_pipeline.load_dataset(args.dataset) |
| |
| |
| results = comparison.run(args.dataset or "political_compass") |
| |
| |
| comparison.print_comparison() |
| |
| return comparison |
|
|
|
|
| def start_vllm_server(args): |
| """Start a vLLM server for the specified model.""" |
| |
| model_name = args.serve_model |
| |
| if model_name in SUPPORTED_MODELS: |
| hf_model_id = SUPPORTED_MODELS[model_name] |
| else: |
| hf_model_id = model_name |
| |
| print(f"\nπ Starting vLLM server for: {hf_model_id}") |
| print(f" Host: {args.host}") |
| print(f" Port: {args.port}") |
| print(f" Max model length: {args.max_model_len}") |
| print(f" GPU memory utilization: {args.gpu_memory_utilization}") |
| print("\nPress Ctrl+C to stop the server.\n") |
| |
| server = VLLMServer( |
| model_name=model_name, |
| host=args.host, |
| port=args.port, |
| max_model_len=args.max_model_len, |
| gpu_memory_utilization=args.gpu_memory_utilization, |
| tensor_parallel_size=args.tensor_parallel_size, |
| ) |
| |
| try: |
| server.start(wait_for_ready=True) |
| |
| |
| import time |
| while True: |
| time.sleep(1) |
| |
| except KeyboardInterrupt: |
| print("\n\nπ Stopping server...") |
| server.stop() |
| print("Server stopped.") |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser( |
| description="LLM Political Bias Analysis Pipeline", |
| formatter_class=argparse.RawDescriptionHelpFormatter, |
| epilog=""" |
| Examples: |
| # List available models |
| python run_pipeline.py --list-models |
| |
| # Run analysis on a single model |
| python run_pipeline.py --model mistral-7b-instruct --dataset political_compass |
| |
| # Compare pre vs post training |
| python run_pipeline.py --pre-model llama-2-7b --post-model llama-2-7b-chat |
| |
| # Start vLLM server |
| python run_pipeline.py --serve mistral-7b-instruct --port 8000 |
| |
| # Use custom dataset |
| python run_pipeline.py --model qwen-7b-chat --dataset data/my_dataset.json |
| """ |
| ) |
| |
| |
| parser.add_argument( |
| "--model", "-m", |
| type=str, |
| help="Model name or shorthand (use --list-models to see options)" |
| ) |
| |
| parser.add_argument( |
| "--list-models", |
| action="store_true", |
| help="List all supported models" |
| ) |
| |
| |
| parser.add_argument( |
| "--pre-model", |
| type=str, |
| help="Pre-training model for comparison" |
| ) |
| |
| parser.add_argument( |
| "--post-model", |
| type=str, |
| help="Post-training model for comparison" |
| ) |
| |
| |
| parser.add_argument( |
| "--serve", |
| dest="serve_model", |
| type=str, |
| help="Start vLLM server for the specified model" |
| ) |
| |
| |
| parser.add_argument( |
| "--dataset", "-d", |
| type=str, |
| default="political_compass", |
| help="Dataset name or path to JSON file" |
| ) |
| |
| |
| parser.add_argument( |
| "--api-base", |
| type=str, |
| default="http://localhost:8000/v1", |
| help="vLLM API base URL" |
| ) |
| |
| |
| parser.add_argument( |
| "--max-tokens", |
| type=int, |
| default=512, |
| help="Maximum tokens to generate" |
| ) |
| |
| parser.add_argument( |
| "--temperature", |
| type=float, |
| default=0.7, |
| help="Generation temperature" |
| ) |
| |
| parser.add_argument( |
| "--num-runs", |
| type=int, |
| default=3, |
| help="Number of runs per question" |
| ) |
| |
| |
| parser.add_argument( |
| "--output", "-o", |
| type=str, |
| default="results", |
| help="Output directory" |
| ) |
| |
| parser.add_argument( |
| "--save", |
| action="store_true", |
| default=True, |
| help="Save results to files" |
| ) |
| |
| parser.add_argument( |
| "--no-save", |
| action="store_false", |
| dest="save", |
| help="Don't save results" |
| ) |
| |
| |
| parser.add_argument( |
| "--sentiment-method", |
| type=str, |
| default="vader", |
| choices=["vader", "textblob", "transformers"], |
| help="Sentiment analysis method" |
| ) |
| |
| |
| parser.add_argument( |
| "--host", |
| type=str, |
| default="localhost", |
| help="vLLM server host" |
| ) |
| |
| parser.add_argument( |
| "--port", |
| type=int, |
| default=8000, |
| help="vLLM server port" |
| ) |
| |
| parser.add_argument( |
| "--max-model-len", |
| type=int, |
| default=4096, |
| help="Maximum model context length" |
| ) |
| |
| parser.add_argument( |
| "--gpu-memory-utilization", |
| type=float, |
| default=0.9, |
| help="GPU memory utilization (0-1)" |
| ) |
| |
| parser.add_argument( |
| "--tensor-parallel-size", |
| type=int, |
| default=1, |
| help="Number of GPUs for tensor parallelism" |
| ) |
| |
| |
| parser.add_argument( |
| "--verbose", "-v", |
| action="store_true", |
| help="Verbose output" |
| ) |
| |
| args = parser.parse_args() |
| |
| |
| if args.verbose: |
| logging.getLogger().setLevel(logging.DEBUG) |
| |
| |
| print_banner() |
| |
| |
| if args.list_models: |
| list_models() |
| return |
| |
| if args.serve_model: |
| start_vllm_server(args) |
| return |
| |
| if args.pre_model and args.post_model: |
| run_comparison_analysis(args) |
| return |
| |
| if args.model: |
| run_single_model_analysis(args) |
| return |
| |
| |
| parser.print_help() |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|