TEMPO-BIAS / run_pipeline.py
moujar's picture
init
5b42a0e
#!/usr/bin/env python3
"""
LLM Political Bias Analysis Pipeline - Main Entry Point
This script provides a CLI for running political bias analysis on LLMs
using vLLM for efficient model serving.
Usage:
# Start vLLM server first (in a separate terminal):
python -m vllm.entrypoints.openai.api_server --model mistralai/Mistral-7B-Instruct-v0.2
# Then run analysis:
python run_pipeline.py --model mistral-7b-instruct --dataset political_compass
# Or compare pre vs post training:
python run_pipeline.py --pre-model llama-2-7b --post-model llama-2-7b-chat
Author: Paris-Saclay University - Fairness in AI
"""
import argparse
import logging
import sys
import os
from pathlib import Path
from typing import Optional
# Add src to path
sys.path.insert(0, str(Path(__file__).parent))
from src.pipeline import BiasAnalysisPipeline, PrePostComparisonPipeline, PipelineConfig
from src.llms import VLLMServer, SUPPORTED_MODELS, MODEL_METADATA
from src.constants import VLLM_DEFAULT_SETTINGS
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def print_banner():
"""Print welcome banner."""
banner = """
╔══════════════════════════════════════════════════════════════════╗
β•‘ LLM Political Bias Analysis Pipeline β•‘
β•‘ ───────────────────────────────────────── β•‘
β•‘ Powered by vLLM | Paris-Saclay University β•‘
β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
"""
print(banner)
def list_models():
"""List all supported models."""
print("\nπŸ“¦ Supported Models:\n")
print(f"{'Model Name':<25} {'HuggingFace ID':<45} {'Origin':<15} {'Type':<10}")
print("-" * 100)
for name, hf_id in SUPPORTED_MODELS.items():
metadata = MODEL_METADATA.get(name, {})
origin = metadata.get("origin", "Unknown")
model_type = metadata.get("type", "unknown")
print(f"{name:<25} {hf_id:<45} {origin:<15} {model_type:<10}")
print()
def run_single_model_analysis(args):
"""Run analysis on a single model."""
config = PipelineConfig(
model_name=args.model,
api_base=args.api_base,
max_tokens=args.max_tokens,
temperature=args.temperature,
num_runs=args.num_runs,
output_dir=args.output,
sentiment_method=args.sentiment_method,
)
pipeline = BiasAnalysisPipeline(config)
# Load dataset
if args.dataset:
pipeline.load_dataset(args.dataset)
else:
pipeline.load_dataset("political_compass")
# Run analysis
logger.info(f"Running analysis on model: {args.model}")
results = pipeline.run(progress_bar=True)
# Print summary
pipeline.print_summary()
# Save results
if args.save:
json_path, csv_path = pipeline.save_results()
print(f"\nπŸ“ Results saved to:")
print(f" - {json_path}")
print(f" - {csv_path}")
return pipeline
def run_comparison_analysis(args):
"""Run pre vs post training comparison."""
logger.info(f"Running comparison: {args.pre_model} vs {args.post_model}")
comparison = PrePostComparisonPipeline(
pre_model=args.pre_model,
post_model=args.post_model,
api_base=args.api_base,
num_runs=args.num_runs,
output_dir=args.output,
)
# Load dataset
if args.dataset:
comparison.pre_pipeline.load_dataset(args.dataset)
comparison.post_pipeline.load_dataset(args.dataset)
# Run comparison
results = comparison.run(args.dataset or "political_compass")
# Print comparison
comparison.print_comparison()
return comparison
def start_vllm_server(args):
"""Start a vLLM server for the specified model."""
model_name = args.serve_model
if model_name in SUPPORTED_MODELS:
hf_model_id = SUPPORTED_MODELS[model_name]
else:
hf_model_id = model_name
print(f"\nπŸš€ Starting vLLM server for: {hf_model_id}")
print(f" Host: {args.host}")
print(f" Port: {args.port}")
print(f" Max model length: {args.max_model_len}")
print(f" GPU memory utilization: {args.gpu_memory_utilization}")
print("\nPress Ctrl+C to stop the server.\n")
server = VLLMServer(
model_name=model_name,
host=args.host,
port=args.port,
max_model_len=args.max_model_len,
gpu_memory_utilization=args.gpu_memory_utilization,
tensor_parallel_size=args.tensor_parallel_size,
)
try:
server.start(wait_for_ready=True)
# Keep running until interrupted
import time
while True:
time.sleep(1)
except KeyboardInterrupt:
print("\n\nπŸ›‘ Stopping server...")
server.stop()
print("Server stopped.")
def main():
parser = argparse.ArgumentParser(
description="LLM Political Bias Analysis Pipeline",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# List available models
python run_pipeline.py --list-models
# Run analysis on a single model
python run_pipeline.py --model mistral-7b-instruct --dataset political_compass
# Compare pre vs post training
python run_pipeline.py --pre-model llama-2-7b --post-model llama-2-7b-chat
# Start vLLM server
python run_pipeline.py --serve mistral-7b-instruct --port 8000
# Use custom dataset
python run_pipeline.py --model qwen-7b-chat --dataset data/my_dataset.json
"""
)
# Model selection
parser.add_argument(
"--model", "-m",
type=str,
help="Model name or shorthand (use --list-models to see options)"
)
parser.add_argument(
"--list-models",
action="store_true",
help="List all supported models"
)
# Comparison mode
parser.add_argument(
"--pre-model",
type=str,
help="Pre-training model for comparison"
)
parser.add_argument(
"--post-model",
type=str,
help="Post-training model for comparison"
)
# Server mode
parser.add_argument(
"--serve",
dest="serve_model",
type=str,
help="Start vLLM server for the specified model"
)
# Dataset
parser.add_argument(
"--dataset", "-d",
type=str,
default="political_compass",
help="Dataset name or path to JSON file"
)
# API settings
parser.add_argument(
"--api-base",
type=str,
default="http://localhost:8000/v1",
help="vLLM API base URL"
)
# Generation settings
parser.add_argument(
"--max-tokens",
type=int,
default=512,
help="Maximum tokens to generate"
)
parser.add_argument(
"--temperature",
type=float,
default=0.7,
help="Generation temperature"
)
parser.add_argument(
"--num-runs",
type=int,
default=3,
help="Number of runs per question"
)
# Output settings
parser.add_argument(
"--output", "-o",
type=str,
default="results",
help="Output directory"
)
parser.add_argument(
"--save",
action="store_true",
default=True,
help="Save results to files"
)
parser.add_argument(
"--no-save",
action="store_false",
dest="save",
help="Don't save results"
)
# Analysis settings
parser.add_argument(
"--sentiment-method",
type=str,
default="vader",
choices=["vader", "textblob", "transformers"],
help="Sentiment analysis method"
)
# vLLM server settings
parser.add_argument(
"--host",
type=str,
default="localhost",
help="vLLM server host"
)
parser.add_argument(
"--port",
type=int,
default=8000,
help="vLLM server port"
)
parser.add_argument(
"--max-model-len",
type=int,
default=4096,
help="Maximum model context length"
)
parser.add_argument(
"--gpu-memory-utilization",
type=float,
default=0.9,
help="GPU memory utilization (0-1)"
)
parser.add_argument(
"--tensor-parallel-size",
type=int,
default=1,
help="Number of GPUs for tensor parallelism"
)
# Verbosity
parser.add_argument(
"--verbose", "-v",
action="store_true",
help="Verbose output"
)
args = parser.parse_args()
# Set logging level
if args.verbose:
logging.getLogger().setLevel(logging.DEBUG)
# Print banner
print_banner()
# Handle different modes
if args.list_models:
list_models()
return
if args.serve_model:
start_vllm_server(args)
return
if args.pre_model and args.post_model:
run_comparison_analysis(args)
return
if args.model:
run_single_model_analysis(args)
return
# No mode specified
parser.print_help()
if __name__ == "__main__":
main()