import os OWNER = "cyberco" VERSION = "2025_v1" REPO_ID = f"{OWNER}/CAIA-Benchmark-Leaderboard" TOKEN = os.getenv("HF_TOKEN") SUBMISSION_DATASET_PUBLIC = f"{OWNER}/public_submissions" # 添加缺失的变量 INTERNAL_DATASET = f"{OWNER}/caia_internal" EVALUATE_RESULT_DATASET = f"{OWNER}/public_results" SUBMISSION_DATASET = f"{OWNER}/submissions_internal" CONTACT_DATASET = f"{OWNER}/contact_info" BENCHMARK_INTERNAL_EVALUATE_DATASET_FILE = f"{VERSION}/{os.getenv('BENCHMARK_INTERNAL_EVALUATE_DATASET', 'example_evaluate_data.json')}" EVALUATE_RESULT_DATASET_FILE = f"{VERSION}/{os.getenv('EVALUATE_RESULT_DATASET', 'example_result.json')}" CONTACT_DATASET_FILE = f"{os.getenv('CONTACT_DATASET_FILE', 'example_contact_info.json')}" llm_config = { "parse_llm_config": { "model_name": "gpt-4.1-mini-2025-04-14", "api_key": os.getenv("OPENAI_API_KEY", None), "model_params": { "temperature": 0 } }, "evaluate_llm_configs": [ { "model_name": "o3-2025-04-16", "api_key": os.getenv("OPENAI_API_KEY", None), "model_params": { "reasoning_effort": "medium" } }, { "model_name": "gpt-4.1", "api_key": os.getenv("OPENAI_API_KEY", None), "model_params": { "temperature": 0.2 } }, { "model_name": "deepseek-r1-250120", "api_key": os.getenv("DEEPSEEK_API_KEY", None), "base_url": os.getenv("DEEPSEEK_BASE_URL", None), "model_params": { "temperature": 0.2 } } ] }