""" Configuration for RGB RAG Evaluation """ import os from typing import List # Data directory DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data") # Results directory RESULTS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "results") # Dataset files DATASETS = { "noise_robustness": "en_refine.json", "negative_rejection": "en_refine.json", "information_integration": "en_int.json", "counterfactual_robustness": "en_fact.json", } # Dataset URLs (from GitHub) DATASET_URLS = { "en_refine.json": "https://raw.githubusercontent.com/chen700564/RGB/main/data/en_refine.json", "en_int.json": "https://raw.githubusercontent.com/chen700564/RGB/main/data/en_int.json", "en_fact.json": "https://raw.githubusercontent.com/chen700564/RGB/main/data/en_fact.json", } # Default models to evaluate (first 5 as primary) DEFAULT_MODELS: List[str] = [ "meta-llama/llama-4-maverick-17b-128e-instruct", # Llama 4 Maverick 17B "meta-llama/llama-prompt-guard-2-86m", # Llama Prompt Guard 2 86M "llama-3.1-8b-instant", # Llama 3.1 8B - Fast "openai/gpt-oss-120b", # GPT OSS 120B "moonshotai/kimi-k2-instruct", # Moonshot Kimi K2 Instruct ] # Additional available models ADDITIONAL_MODELS: List[str] = [ "moonshotai/kimi-k2-instruct-0905", # Kimi K2 Instruct 0905 "moonshotai/kimi-k2-instruct", # Kimi K2 Instruct "llama-3.3-70b-versatile", # Llama 3.3 70B "meta-llama/llama-4-scout-17b-16e-instruct", # Llama 4 Scout 17B "qwen/qwen3-32b", # Qwen 3 32B ] # All available models (for UI dropdown) ALL_MODELS: List[str] = DEFAULT_MODELS + ADDITIONAL_MODELS # Evaluation settings EVALUATION_CONFIG = { "temperature": 0.0, # Use deterministic outputs for reproducibility "max_tokens": 1024, # Maximum response tokens "rate_limit_delay": 0.5, # Seconds between API calls "retry_count": 3, # Number of retries on failure } # Metrics to report METRICS = { "noise_robustness": ["accuracy"], "negative_rejection": ["rejection_rate"], "information_integration": ["accuracy"], "counterfactual_robustness": ["error_detection_rate", "error_correction_rate"], }