Spaces:
Running
Running
| import json | |
| import yaml | |
| import os | |
| import re | |
| from datetime import datetime, timezone, timedelta | |
| from typing import Optional | |
| from src.display.formatting import styled_error, styled_message, styled_warning | |
| from src.envs import API, EVAL_REQUESTS_PATH, FAILED_EVAL_REQUESTS_PATH, TOKEN, FAILED_QUEUE_REPO, QUEUE_REPO, REPO_ID | |
| from src.submission.check_validity import ( | |
| already_submitted_models, | |
| check_model_card, | |
| get_model_size | |
| ) | |
| import gradio as gr | |
| from utils import download_with_restart | |
| from huggingface_hub import snapshot_download | |
| REQUESTED_MODELS = None | |
| USERS_TO_SUBMISSION_DATES = None | |
| def restart_space(): | |
| API.restart_space(repo_id=REPO_ID) | |
| def add_new_eval_option( | |
| contact_email: str, | |
| model: str, | |
| model_type: str, | |
| think_type: str, | |
| precision: str, | |
| response_prefix: str, | |
| requirements: str, | |
| user_state: str, | |
| organization_list: list, | |
| yml_textbox: str, | |
| upbox, | |
| ): | |
| ERROR_MESSAGE = None | |
| # Validate email format | |
| email_regex = r"^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$" | |
| if not re.match(email_regex, contact_email): | |
| if ERROR_MESSAGE is None: | |
| ERROR_MESSAGE = "Please provide a valid email address." | |
| # Synchronize: Just before submission, copy the latest QUEUE_REPO to EVAL_REQUESTS_PATH | |
| download_with_restart( | |
| snapshot_download, | |
| repo_id=QUEUE_REPO, | |
| local_dir=EVAL_REQUESTS_PATH, | |
| repo_type="dataset", | |
| token=TOKEN, | |
| restart_func=restart_space | |
| ) | |
| # Synchronize: Just before submission, copy the latest FAILED_QUEUE_REPO to FAILED_EVAL_REQUESTS_PATH | |
| download_with_restart( | |
| snapshot_download, | |
| repo_id=FAILED_QUEUE_REPO, | |
| local_dir=FAILED_EVAL_REQUESTS_PATH, | |
| repo_type="dataset", | |
| token=TOKEN, | |
| restart_func=restart_space | |
| ) | |
| REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH) | |
| user_name = "" | |
| model_path = model | |
| if "/" in model: | |
| user_name = model.split("/")[0] | |
| model_path = model.split("/")[1] | |
| precision = precision.split(" ")[0] | |
| KST = timezone(timedelta(hours=9)) | |
| current_time = datetime.now(KST).strftime("%Y-%m-%dT%H:%M:%S %z") | |
| # Remove space in benchmark name | |
| benchmark = "TRUEBench" | |
| # Check submitter qualification | |
| if user_name != user_state and user_name not in organization_list: | |
| if ERROR_MESSAGE is None: | |
| ERROR_MESSAGE = "The submitter does not have submission rights for this model." | |
| # Does the organization submit more than three times in a day? | |
| submission_times = [item['submitted_time'] for item in USERS_TO_SUBMISSION_DATES[user_name] if item['benchmark'] == benchmark] | |
| submission_cnt = 0 | |
| for i in range(len(submission_times)): | |
| hours_diff = (datetime.strptime(current_time, "%Y-%m-%dT%H:%M:%S %z") - datetime.strptime(submission_times[i], "%Y-%m-%dT%H:%M:%S %z")).total_seconds() / 3600 | |
| if hours_diff <= 24: | |
| submission_cnt += 1 | |
| if submission_cnt >= 3: | |
| if ERROR_MESSAGE is None: | |
| ERROR_MESSAGE = "The organization already submitted three times for this benchmark today." | |
| # Does the model actually exist? | |
| revision = "main" | |
| # Is the model info correctly filled? | |
| model_info = None | |
| model_size = "Unknown" | |
| try: | |
| model_info = API.model_info(repo_id=model, revision=revision) | |
| model_size = get_model_size(model_info=model_info, precision=precision) | |
| except Exception: | |
| if ERROR_MESSAGE is None: | |
| ERROR_MESSAGE = "Could not get your model information. Please fill it up properly." | |
| # Were the model card and license filled? | |
| license = "Unknown" | |
| if model_info is not None: | |
| try: | |
| license = model_info.cardData["license"] | |
| except Exception: | |
| if ERROR_MESSAGE is None: | |
| ERROR_MESSAGE = "Please select a license for your model." | |
| modelcard_OK, error_msg = check_model_card(model) | |
| if not modelcard_OK: | |
| if ERROR_MESSAGE is None: | |
| ERROR_MESSAGE = error_msg | |
| # Response prefix check | |
| if think_type == "On": | |
| if response_prefix == "": | |
| if ERROR_MESSAGE is None: | |
| ERROR_MESSAGE = "It is required to fill in the response prefix when 'Think' is 'On'." | |
| else: | |
| response_prefix = "" | |
| # Handle YAML config input (file or textbox) | |
| config_dict = None | |
| # Case 1: File uploaded | |
| if upbox is not None and getattr(upbox, "name", ""): | |
| file_name = upbox.name | |
| if not file_name.lower().endswith(".yaml") and not file_name.lower().endswith(".yml"): | |
| if ERROR_MESSAGE is None: | |
| ERROR_MESSAGE = "Please submit a .yaml or .yml file." | |
| try: | |
| with open(file_name, 'r', encoding='utf-8') as f: | |
| config_dict = yaml.safe_load(f) | |
| except yaml.YAMLError: | |
| if ERROR_MESSAGE is None: | |
| ERROR_MESSAGE = "The file is not a valid YAML format." | |
| except Exception as e: | |
| if ERROR_MESSAGE is None: | |
| ERROR_MESSAGE = f"An error occurred while reading the file. {e}" | |
| if config_dict is None: | |
| if ERROR_MESSAGE is None: | |
| ERROR_MESSAGE = "The YAML file is empty or invalid." | |
| else: | |
| # Case 2: No file uploaded | |
| if not yml_textbox or not yml_textbox.strip(): | |
| if ERROR_MESSAGE is None: | |
| ERROR_MESSAGE = "Please fill in the configuration box or submit a YAML file." | |
| try: | |
| config_dict = yaml.safe_load(yml_textbox) | |
| except yaml.YAMLError: | |
| if ERROR_MESSAGE is None: | |
| ERROR_MESSAGE = "Please provide a valid configuration." | |
| if config_dict is None: | |
| if ERROR_MESSAGE is None: | |
| ERROR_MESSAGE = "Please provide a valid configuration." | |
| # Restrict config keys | |
| allowed_keys = {"llm_serve_args", "sampling_params", "extra_body"} | |
| if not isinstance(config_dict, dict): | |
| if ERROR_MESSAGE is None: | |
| ERROR_MESSAGE = "The configuration must be a YAML dictionary at the top level." | |
| extra_keys = set(config_dict.keys()) - allowed_keys | |
| if extra_keys: | |
| if ERROR_MESSAGE is None: | |
| ERROR_MESSAGE = f"Only the following keys are allowed in the configuration: llm_serve_args, sampling_params, extra_body. Found invalid keys: {', '.join(sorted(extra_keys))}." | |
| configs = json.dumps(config_dict, indent=4, ensure_ascii=False) | |
| # Check for duplicate submission | |
| submission_times = [item['submitted_time'] for item in USERS_TO_SUBMISSION_DATES[user_name] if item['benchmark'] == benchmark and item['model'] == model] | |
| submission_cnt = 0 | |
| submission_total_cnt = 0 | |
| for i in range(len(submission_times)): | |
| submission_total_cnt += 1 | |
| hours_diff = (datetime.strptime(current_time, "%Y-%m-%dT%H:%M:%S %z") - datetime.strptime(submission_times[i], "%Y-%m-%dT%H:%M:%S %z")).total_seconds() / 3600 | |
| if hours_diff <= 24: | |
| submission_cnt += 1 | |
| if submission_cnt >= 1: | |
| if ERROR_MESSAGE is None: | |
| ERROR_MESSAGE = "This model has been already submitted within 24 hours." | |
| if submission_total_cnt >= 3: | |
| if ERROR_MESSAGE is None: | |
| ERROR_MESSAGE = "This model has been already submitted three times for this benchmark." | |
| print("Creating eval file") | |
| if ERROR_MESSAGE is None: | |
| OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}/{benchmark}_{model_path}" | |
| else: | |
| OUT_DIR = f"{FAILED_EVAL_REQUESTS_PATH}/{user_name}/{benchmark}_{model_path}" | |
| os.makedirs(OUT_DIR, exist_ok=True) | |
| current_time_replaced = current_time.replace("-", "").replace(":", "").replace("T", "_").split()[0] | |
| out_path = f"{OUT_DIR}/{current_time_replaced}.json" | |
| # Seems good, creating the eval | |
| print("Adding new eval") | |
| if ERROR_MESSAGE is None: | |
| eval_entry = { | |
| "benchmark": benchmark, | |
| "contact_email": contact_email, | |
| "model": model, | |
| "type": "open", | |
| "model_type": model_type, | |
| "think_type": think_type, | |
| "precision": precision, | |
| "response_prefix": response_prefix, | |
| "requirements": requirements, | |
| "status": "PENDING", | |
| "submitted_time": current_time, | |
| "likes": getattr(model_info, "likes", -1), | |
| "params": model_size, | |
| "license": license, | |
| "private": False, | |
| "configs": configs | |
| } | |
| else: | |
| eval_entry = { | |
| "benchmark": benchmark, | |
| "contact_email": contact_email, | |
| "model": model, | |
| "type": "open", | |
| "model_type": model_type, | |
| "think_type": think_type, | |
| "precision": precision, | |
| "response_prefix": response_prefix, | |
| "requirements": requirements, | |
| "status": "Failed", | |
| "submitted_time": current_time, | |
| "likes": getattr(model_info, "likes", -1), | |
| "params": model_size, | |
| "license": license, | |
| "private": False, | |
| "configs": configs, | |
| "error_message": ERROR_MESSAGE | |
| } | |
| with open(out_path, "w") as f: | |
| f.write(json.dumps(eval_entry)) | |
| print("Uploading eval file") | |
| if ERROR_MESSAGE is None: | |
| API.upload_file( | |
| path_or_fileobj=out_path, | |
| path_in_repo=out_path.split("eval-queue/")[1], | |
| repo_id=QUEUE_REPO, | |
| repo_type="dataset", | |
| commit_message=f"Add {model} to eval queue", | |
| ) | |
| else: | |
| API.upload_file( | |
| path_or_fileobj=out_path, | |
| path_in_repo=out_path.split("failed-eval-queue/")[1], | |
| repo_id=FAILED_QUEUE_REPO, | |
| repo_type="dataset", | |
| commit_message=f"Add {model} to failed eval queue", | |
| ) | |
| # Remove the local file | |
| os.remove(out_path) | |
| if ERROR_MESSAGE is None: | |
| return styled_message( | |
| "Your request has been submitted to the evaluation queue!" | |
| ) | |
| else: | |
| return styled_error( | |
| ERROR_MESSAGE | |
| ) | |