CAIA-evaluate / env.py
Zhejian
init
f3e6f32
import os
OWNER = "cyberco"
VERSION = "2025_v1"
REPO_ID = f"{OWNER}/CAIA-Benchmark-Leaderboard"
TOKEN = os.getenv("HF_TOKEN")
SUBMISSION_DATASET_PUBLIC = f"{OWNER}/public_submissions" # 添加缺失的变量
INTERNAL_DATASET = f"{OWNER}/caia_internal"
EVALUATE_RESULT_DATASET = f"{OWNER}/public_results"
SUBMISSION_DATASET = f"{OWNER}/submissions_internal"
CONTACT_DATASET = f"{OWNER}/contact_info"
BENCHMARK_INTERNAL_EVALUATE_DATASET_FILE = f"{VERSION}/{os.getenv('BENCHMARK_INTERNAL_EVALUATE_DATASET', 'example_evaluate_data.json')}"
EVALUATE_RESULT_DATASET_FILE = f"{VERSION}/{os.getenv('EVALUATE_RESULT_DATASET', 'example_result.json')}"
CONTACT_DATASET_FILE = f"{os.getenv('CONTACT_DATASET_FILE', 'example_contact_info.json')}"
llm_config = {
"parse_llm_config": {
"model_name": "gpt-4.1-mini-2025-04-14",
"api_key": os.getenv("OPENAI_API_KEY", None),
"model_params": {
"temperature": 0
}
},
"evaluate_llm_configs": [
{
"model_name": "o3-2025-04-16",
"api_key": os.getenv("OPENAI_API_KEY", None),
"model_params": {
"reasoning_effort": "medium"
}
},
{
"model_name": "gpt-4.1",
"api_key": os.getenv("OPENAI_API_KEY", None),
"model_params": {
"temperature": 0.2
}
},
{
"model_name": "deepseek-r1-250120",
"api_key": os.getenv("DEEPSEEK_API_KEY", None),
"base_url": os.getenv("DEEPSEEK_BASE_URL", None),
"model_params": {
"temperature": 0.2
}
}
]
}