Spaces:
Runtime error
Runtime error
File size: 6,769 Bytes
f3e6f32 e23f952 f3e6f32 5002e45 f3e6f32 bb41fcd 5002e45 f3e6f32 5002e45 f3e6f32 5002e45 f3e6f32 e23f952 f3e6f32 e23f952 f3e6f32 5002e45 f3e6f32 5002e45 f3e6f32 a7c1a2f f3e6f32 e23f952 f3e6f32 e23f952 f3e6f32 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
import asyncio
import datetime
import threading
import time
from huggingface_hub import HfApi, list_repo_files
from env import (
REPO_ID, TOKEN, SUBMISSION_DATASET,
INTERNAL_DATASET, BENCHMARK_INTERNAL_EVALUATE_DATASET_FILE, EVALUATE_RESULT_DATASET,
llm_config, EVALUATE_RESULT_DATASET_FILE
)
from loguru import logger
from schemas import AgentOutputItem, EnsembleEvaluateScore
from score import init_evaluators, score_in_threadpool
from datasets import load_dataset, VerificationMode, Dataset, concatenate_datasets
from utils import parse_eval_dataset
API = HfApi(token=TOKEN)
benchmark_internal_evaluate_dataset = load_dataset(INTERNAL_DATASET, data_files=BENCHMARK_INTERNAL_EVALUATE_DATASET_FILE, token=TOKEN, verification_mode=VerificationMode.NO_CHECKS, download_mode="force_redownload",trust_remote_code=True)
eval_results = load_dataset(EVALUATE_RESULT_DATASET, data_files=EVALUATE_RESULT_DATASET_FILE, token=TOKEN, verification_mode=VerificationMode.NO_CHECKS, download_mode="force_redownload",trust_remote_code=True)
benchmark_dataset = parse_eval_dataset(benchmark_internal_evaluate_dataset) # type: ignore
evaluator_list = init_evaluators(benchmark_dataset, llm_config)
def get_hf_dataset_files(dataset_name):
return set(list_repo_files(dataset_name, repo_type="dataset"))
def format_score_result(score_results: list[EnsembleEvaluateScore]) -> tuple[float, float, float, float]:
if len(score_results) == 0:
return 0.0, 0.0, 0.0, 0.0
l1,l2,l3 = [],[],[]
for result in score_results:
if result.level == 1:
l1.append(
result.total_score
)
elif result.level== 2:
l2.append(result.total_score)
elif result.level == 3:
l3.append(result.total_score)
l1_total_score = round(sum(l1) / len(l1),2) if len(l1) > 0 else 0
l2_total_score = round(sum(l2) / len(l2),2) if len(l2) > 0 else 0
l3_total_score = round(sum(l3) / len(l3),2) if len(l3) > 0 else 0
total_score = round((sum(l1) + sum(l2) + sum(l3)) / (len(l1) + len(l2) + len(l3)), 2)
return total_score, l1_total_score, l2_total_score, l3_total_score
def on_new_files(new_files):
logger.info(f"New Files Found {new_files}")
for file in new_files:
file_name = file.split('/')[-1]
names = file_name.split('<')
model, organization = names[0].split('>')[0], names[1].split('>')[0]
json_data = read_json_file(file)
if not json_data:
continue
agent_outputs = [AgentOutputItem(**item) for item in json_data]
score_results: list[EnsembleEvaluateScore] = asyncio.run(score_in_threadpool(
evaluator_list=evaluator_list,
agent_output_list=agent_outputs,
benchmark_data=benchmark_dataset
))
total_score, l1_total_score, l2_total_score, l3_total_score = format_score_result(score_results)
#save to public result
# add to eval_results
new_eval_result = {
"model": model,
"model_family": "",
"url": "",
"organisation": organization,
"score": total_score,
"score_level1": l1_total_score,
"score_level2": l2_total_score,
"score_level3": l3_total_score,
"date": datetime.datetime.now().strftime("%Y-%m-%d")
}
print(new_eval_result)
origin_eval_results = eval_results['train']
eval_results_list = list(origin_eval_results)
print(eval_results_list)
eval_results_list.append(new_eval_result)
# eval_results = Dataset.from_list(eval_results_list, features=eval_results.features)
# eval_results.push_to_hub(EVALUATE_RESULT_DATASET, token=TOKEN, commit_message=f"add {model} from {organization} evaluate result score {total_score}")
update_eval_results_json(eval_results_list)
def update_eval_results_json(eval_results_list):
"""
更新评测结果的json文件,并推送到Hub
"""
import tempfile
import json
import os
# 先将eval_results_list写入临时json文件
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False, encoding="utf-8") as temp_file:
json.dump(eval_results_list, temp_file, ensure_ascii=False, indent=4, default=str)
temp_file_path = temp_file.name
try:
# 上传到Hub
API.upload_file(
path_or_fileobj=temp_file_path,
path_in_repo=EVALUATE_RESULT_DATASET_FILE, # 你需要定义EVAL_RESULT_JSON_FILE为目标json文件名
repo_id=EVALUATE_RESULT_DATASET,
repo_type='dataset',
token=TOKEN,
commit_message="更新评测结果json"
)
except Exception as e:
print(f"上传评测结果json失败: {e}")
finally:
# 删除临时文件
os.unlink(temp_file_path)
def read_json_file(file_path):
"""
Read JSON file and return its contents
Args:
file_path (str): Path to the JSON file
Returns:
dict/list: Contents of the JSON file
"""
import json
from huggingface_hub import hf_hub_download
try:
# Download file from Hugging Face Hub
local_path = hf_hub_download(
repo_id=SUBMISSION_DATASET,
filename=file_path,
token=TOKEN,
repo_type='dataset'
)
# Read JSON file
with open(local_path, 'r', encoding='utf-8') as f:
data = json.load(f)
logger.info(f"Successfully read file: {file_path}")
return data
except Exception as e:
logger.error(f"Error reading file {file_path}: {str(e)}")
return None
def monitor_hf_dataset(dataset_name, interval=60):
last_files = get_hf_dataset_files(dataset_name)
print(last_files)
while True:
time.sleep(interval)
current_files = get_hf_dataset_files(dataset_name)
print(current_files)
new_files = current_files - last_files
if new_files:
on_new_files(new_files)
last_files = current_files
def start_monitoring_delayed(delay_seconds=30):
"""延迟启动监控任务,确保 Space 先完成启动"""
def start_monitor():
logger.info("开始监控 HuggingFace 数据集变化...")
monitor_hf_dataset(SUBMISSION_DATASET, interval=60)
# 使用线程启动监控任务
monitor_thread = threading.Thread(target=start_monitor, daemon=True)
threading.Timer(delay_seconds, monitor_thread.start).start()
logger.info(f"监控任务将在 {delay_seconds} 秒后启动")
if __name__ == "__main__":
start_monitoring_delayed(30)
|