Spaces:
Runtime error
Runtime error
| import json | |
| import os | |
| from datetime import datetime, timezone | |
| import time | |
| from huggingface_hub import ModelCard, snapshot_download | |
| from src.display.formatting import styled_error, styled_message, styled_warning | |
| from src.envs import API, EVAL_REQUESTS_PATH, DYNAMIC_INFO_PATH, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_REPO, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA, REPO, GIT_REQUESTS_PATH, GIT_STATUS_PATH, GLOBAL_COND | |
| from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS | |
| from src.submission.check_validity import ( | |
| already_submitted_models, | |
| check_model_card, | |
| get_model_size, | |
| get_quantized_model_parameters_memory, | |
| is_model_on_hub, | |
| is_gguf_on_hub, | |
| user_submission_permission, | |
| get_model_tags | |
| ) | |
| REQUESTED_MODELS = None | |
| USERS_TO_SUBMISSION_DATES = None | |
| def add_new_eval( | |
| model: str, | |
| revision: str, | |
| private: bool, | |
| compute_dtype: str="float16", | |
| precision: str="4bit", | |
| weight_dtype: str="int4", | |
| gguf_ftype: str="*Q4_0.gguf", | |
| ): | |
| global REQUESTED_MODELS | |
| global USERS_TO_SUBMISSION_DATES | |
| if not REQUESTED_MODELS: | |
| REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(GIT_STATUS_PATH) | |
| quant_type = None | |
| user_name = "" | |
| model_path = model | |
| if "/" in model: | |
| user_name = model.split("/")[0] | |
| model_path = model.split("/")[1] | |
| precision = precision.split(" ")[0] | |
| current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") | |
| # Is the user rate limited? | |
| if user_name != "": | |
| user_can_submit, error_msg = user_submission_permission( | |
| user_name, USERS_TO_SUBMISSION_DATES, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA | |
| ) | |
| if not user_can_submit: | |
| return styled_error(error_msg) | |
| # Did the model authors forbid its submission to the leaderboard? | |
| if model in DO_NOT_SUBMIT_MODELS: | |
| return styled_warning("Model authors have requested that their model be not submitted on the leaderboard.") | |
| # Does the model actually exist? | |
| if revision == "": | |
| revision = "main" | |
| architecture = "?" | |
| downloads = 0 | |
| created_at = "" | |
| gguf_on_hub, error, gguf_files, new_gguf_ftype = is_gguf_on_hub(repo_id=model, filename=gguf_ftype) | |
| if new_gguf_ftype is not None: | |
| gguf_ftype = new_gguf_ftype | |
| model_on_hub, error, model_config = is_model_on_hub(model_name=model, revision=revision, test_tokenizer=True) | |
| # Is the model on the hub? | |
| if (not model_on_hub or model_config is None) and (not gguf_on_hub or gguf_files is None): | |
| return styled_error(f'Model "{model}" {error}') | |
| if model_config is not None: | |
| architectures = getattr(model_config, "architectures", None) | |
| if architectures: | |
| architecture = ";".join(architectures) | |
| downloads = getattr(model_config, 'downloads', 0) | |
| created_at = getattr(model_config, 'created_at', '') | |
| quantization_config = getattr(model_config, 'quantization_config', None) | |
| if gguf_files is not None: | |
| architectures = "" | |
| downloads = 0 | |
| created_at = "" | |
| quantization_config = None | |
| quant_type = "llama.cpp" | |
| # Is the model info correctly filled? | |
| try: | |
| model_info = API.model_info(repo_id=model, revision=revision) | |
| except Exception: | |
| return styled_error("Could not get your model information. Please fill it up properly.") | |
| # Were the model card and license filled? | |
| try: | |
| if model_info.cardData is None: | |
| license = "unknown" | |
| else: | |
| license = model_info.cardData.get("license", "unknown") | |
| except Exception: | |
| return styled_error("Please select a license for your model") | |
| modelcard_OK, error_msg, model_card = check_model_card(model) | |
| # maybe don't have model card | |
| """ | |
| if not modelcard_OK: | |
| return styled_error(error_msg) | |
| """ | |
| tags = get_model_tags(model_card, model) | |
| # Seems good, creating the eval | |
| print("Adding new eval") | |
| script = "ITREX" | |
| hardware = "cpu" | |
| precision = "4bit" | |
| if quantization_config is not None: | |
| quant_method = quantization_config.get("quant_method", None) | |
| if "bnb_4bit_quant_type" in quantization_config: | |
| quant_method = "bitsandbytes" | |
| quant_type = "bitsandbytes" | |
| hardware = "gpu" | |
| if quantization_config.get("load_in_4bit", True): | |
| precision = "4bit" | |
| if quantization_config.get("load_in_8bit", True): | |
| precision = "8bit" | |
| if quant_method == "gptq": | |
| hardware = "cpu" | |
| quant_type = "GPTQ" | |
| precision = f"{quantization_config.get('bits', '4bit')}bit" | |
| if quant_method == "awq": | |
| hardware = "gpu" | |
| quant_type = "AWQ" | |
| precision = f"{quantization_config.get('bits', '4bit')}bit" | |
| if quant_method == "aqlm": | |
| hardware = "gpu" | |
| quant_type = "AQLM" | |
| nbits_per_codebook = quantization_config.get('nbits_per_codebook') | |
| num_codebooks = quantization_config.get('num_codebooks') | |
| in_group_size = quantization_config.get('in_group_size') | |
| bits = int(nbits_per_codebook * num_codebooks / in_group_size) | |
| precision = f"{bits}bit" | |
| if "auto-round" in quant_method: | |
| hardware = "gpu" | |
| quant_type = "AutoRound" | |
| precision = f"{quantization_config.get('bits', '4bit')}bit" | |
| if precision == "4bit": | |
| weight_dtype = "int4" | |
| elif precision == "3bit": | |
| weight_dtype = "int3" | |
| elif precision == "2bit": | |
| weight_dtype = "int2" | |
| if quant_type is None or quant_type == "": | |
| # return styled_error("Please select a quantization model like GPTQ, AWQ etc.") | |
| # for eval fp32/fp16/bf16 | |
| quant_type = None | |
| if quant_type is None: | |
| weight_dtype = str(getattr(model_config, "torch_dtype", "float16")) | |
| if weight_dtype in ["torch.float16", "float16"]: | |
| weight_dtype = "float16" | |
| precision = "16bit" | |
| elif weight_dtype in ["torch.bfloat16", "bfloat16"]: | |
| weight_dtype = "bfloat16" | |
| precision = "16bit" | |
| elif weight_dtype in ["torch.float32", "float32"]: | |
| weight_dtype = "float32" | |
| precision = "32bit" | |
| else: | |
| weight_dtype = "float32" | |
| precision = "32bit" | |
| model_type = "original" | |
| model_params, model_size = get_model_size(model_info=model_info, precision=precision) | |
| else: | |
| model_params, model_size = get_quantized_model_parameters_memory(model_info, | |
| quant_method=quant_type.lower(), | |
| bits=precision) | |
| model_type = "quantization" | |
| if quant_type == "llama.cpp": | |
| hardware = "cpu" | |
| script = "llama_cpp" | |
| tags = "llama.cpp" | |
| else: | |
| hardware = "gpu" | |
| if compute_dtype == "?": | |
| compute_dtype = "float16" | |
| eval_entry = { | |
| "model": model, | |
| "revision": revision, | |
| "private": private, | |
| "params": model_size, | |
| "architectures": architecture, | |
| "quant_type": quant_type, | |
| "precision": precision, | |
| "model_params": model_params, | |
| "model_size": model_size, | |
| "precision": precision, | |
| "weight_dtype": weight_dtype, | |
| "compute_dtype": compute_dtype, | |
| "gguf_ftype": gguf_ftype, | |
| "hardware": hardware, | |
| "status": "Pending", | |
| "submitted_time": current_time, | |
| "model_type": model_type, | |
| "job_id": -1, | |
| "job_start_time": None, | |
| "scripts": script | |
| } | |
| supplementary_info = { | |
| "likes": model_info.likes, | |
| "license": license, | |
| "still_on_hub": True, | |
| "tags": tags, | |
| "downloads": downloads, | |
| "created_at": created_at | |
| } | |
| print(eval_entry) | |
| # ToDo: need open | |
| # Check for duplicate submission | |
| if f"{model}_{revision}_{quant_type}_{precision}_{weight_dtype}_{compute_dtype}" in REQUESTED_MODELS: | |
| return styled_warning("This model has been already submitted.") | |
| print("Creating huggingface/dataset eval file") | |
| OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}" | |
| os.makedirs(OUT_DIR, exist_ok=True) | |
| out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{quant_type}_{precision}_{weight_dtype}_{compute_dtype}.json" | |
| with open(out_path, "w") as f: | |
| f.write(json.dumps(eval_entry)) | |
| print("Uploading eval file") | |
| try: | |
| API.upload_file( | |
| path_or_fileobj=out_path, | |
| path_in_repo=out_path.split("eval-queue/")[1], | |
| repo_id=QUEUE_REPO, | |
| repo_type="dataset", | |
| commit_message=f"Add {model} to eval queue", | |
| ) | |
| except Exception as e: | |
| print(str(e)) | |
| print("upload error........") | |
| print("Creating git eval file") | |
| OUT_DIR = f"{GIT_REQUESTS_PATH}/{user_name}" | |
| os.makedirs(OUT_DIR, exist_ok=True) | |
| req_out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{quant_type}_{precision}_{weight_dtype}_{compute_dtype}.json" | |
| req_git_path = "/".join(req_out_path.split('/')[1:]) | |
| print("Creating status file") | |
| OUT_DIR = f"{GIT_STATUS_PATH}/{user_name}" | |
| os.makedirs(OUT_DIR, exist_ok=True) | |
| sta_out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{quant_type}_{precision}_{weight_dtype}_{compute_dtype}.json" | |
| sta_git_path = "/".join(sta_out_path.split('/')[1:]) | |
| print("Uploading eval file") | |
| try: | |
| print("git-push get lock..............") | |
| GLOBAL_COND.acquire() | |
| branch = REPO.active_branch.name | |
| REPO.remotes.origin.pull(branch) | |
| REPO.index.remove("requests", False, r=True) | |
| with open(req_out_path, "w") as f: | |
| f.write(json.dumps(eval_entry, indent=4)) | |
| with open(sta_out_path, "w") as f: | |
| f.write(json.dumps(eval_entry, indent=4)) | |
| REPO.index.add([req_git_path, sta_git_path]) | |
| commit = REPO.index.commit(f"Add {model} to eval requests/status.") | |
| REPO.remotes.origin.push(branch) | |
| time.sleep(10) | |
| print("git-push release lock..............") | |
| GLOBAL_COND.release() | |
| except Exception as e: | |
| print(str(e)) | |
| print("git-push error........") | |
| GLOBAL_COND.release() | |
| return styled_message( | |
| "Your request has been submitted to the evaluation queue!\nPlease wait for up to 3 hours for the model to show in the PENDING list." | |
| ) | |