"""Real-time duplicate-detection and license-validation helpers. These are wired to the model-name textbox's ``.change()`` event so the user sees instant feedback while typing, before clicking *Submit*. """ from __future__ import annotations import logging import re import gradio as gr from src.app_helpers.auth import is_allowed_to_resubmit_failed from src.display.utils import EVAL_COLS, QUANT_COLS from src.envs import GIT_RESULTS_PATH, GIT_STATUS_PATH, HF_TOKEN from src.populate import get_evaluation_queue_df from src.submission.check_validity import check_model_card, is_license_approved logger = logging.getLogger(__name__) _STATUS_TO_LABEL = { "accordion-finished-quant": "Finished", "accordion-running-quant": "Running", "accordion-pending-quant": "Pending", "accordion-failed-quant": "Failed", "accordion-finished-eval": "Finished", "accordion-running-eval": "Running", "accordion-pending-eval": "Pending", "accordion-failed-eval": "Failed", } def _pre_check_duplicate( model: str, queue_type: str, scheme: str = "", method: str = "", username: str = "", oauth_token: gr.OAuthToken | None = None, ): """Check if *model* (+ optional *scheme* + *method*) already exists in the queue. For ``quant`` a duplicate requires model name, ``quant_scheme``, AND ``method`` to all match. For ``eval`` only the model name is checked (single scheme). Returns ``(dup_found, modal_gr_update, message_text, accordion_elem_id)``. Re-submissions are always permitted at the submission layer; this helper merely surfaces an informational warning. """ if not model or not model.strip(): return False, gr.update(visible=False), "", "" model_clean = model.strip() scheme_clean = scheme.strip().lower() if scheme else "" method_clean = method.strip().upper() if method else "" if queue_type == "quant": dfs = get_evaluation_queue_df( GIT_STATUS_PATH, QUANT_COLS, request_type="quant", results_path=GIT_RESULTS_PATH ) queue_label = "Quantization" acc_ids = [ "accordion-finished-quant", "accordion-running-quant", "accordion-pending-quant", "accordion-failed-quant", ] else: dfs = get_evaluation_queue_df( GIT_STATUS_PATH, EVAL_COLS, request_type="eval", results_path=GIT_RESULTS_PATH ) queue_label = "Evaluation" acc_ids = [ "accordion-finished-eval", "accordion-running-eval", "accordion-pending-eval", "accordion-failed-eval", ] status_labels = ["Finished", "Running", "Pending", "Failed"] for status_label, df, acc_id in zip(status_labels, dfs, acc_ids): if df.empty or "model" not in df.columns: continue for idx, cell in enumerate(df["model"]): # The model column stores an HTML tag; strip HTML to plain text. plain = re.sub(r"<[^>]+>", "", str(cell)).strip() if plain.lower() != model_clean.lower(): continue # For quant: also require scheme to match when provided. if queue_type == "quant" and scheme_clean and "quant_scheme" in df.columns: row_scheme = str(df.iloc[idx]["quant_scheme"]).strip().lower() if row_scheme != scheme_clean: continue # For quant: also require method to match when provided. if queue_type == "quant" and method_clean: if "method" in df.columns: row_method = str(df.iloc[idx]["method"]).strip().upper() else: row_method = "RTN" # legacy entries without method field are RTN if row_method != method_clean: continue scheme_hint = f" ({scheme.strip()})" if scheme_clean and queue_type == "quant" else "" msg = ( f"### ℹ️ Model already in queue\n\n" f"**`{model_clean}`{scheme_hint}** is currently in the **{status_label} {queue_label}** queue.\n\n" f"Submitting again will create a new entry while keeping the existing record." ) return True, gr.update(visible=True), msg, acc_id return False, gr.update(visible=False), "", "" def inline_dup_check( model: str, queue_type: str, scheme: str = "", method: str = "", username: str = "", oauth_token: gr.OAuthToken | None = None, ): """Inline real-time duplicate check (triggered on user input). Only queries when the model string contains '/' to avoid disk reads on every keystroke. Returns ``(warning_update, btn_update)``: * No duplicate → no banner, button pass-through. * Duplicate with status=Failed AND the user is allowed to re-submit failed models → no banner, button pass-through. * Any other duplicate → banner shown, button disabled. """ model = (model or "").strip() if not model or "/" not in model: return gr.update(visible=False, value=""), gr.update(interactive=False) dup_found, _, _, acc_id = _pre_check_duplicate(model, queue_type, scheme, method, username, oauth_token) if not dup_found: return gr.update(visible=False, value=""), gr.update() status_label = _STATUS_TO_LABEL.get(acc_id, "queue") # Permitted re-submission: previously Failed + user has permission. if status_label == "Failed" and is_allowed_to_resubmit_failed(username): return gr.update(visible=False, value=""), gr.update() queue_label = "Quantization" if queue_type == "quant" else "Evaluation" scheme_hint = f" ({scheme.strip()})" if scheme and queue_type == "quant" else "" if status_label == "Failed": msg = ( f"⚠️ **`{model}`{scheme_hint}** is already in the {queue_label} queue " f"(status: **Failed**). You do not have permission to re-submit failed models — " "please contact an administrator." ) else: msg = ( f"⚠️ **`{model}`{scheme_hint}** is already in the {queue_label} queue " f"(status: **{status_label}**). Duplicate submissions are not allowed." ) return gr.update(visible=True, value=msg), gr.update(interactive=False) def inline_dup_check_quant(model, scheme="", method_choice="", username="", oauth_token=None): # Parse UI display value to internal method key if method_choice and "Tuning" in method_choice: method = "TUNING" else: method = "RTN" return inline_dup_check(model, "quant", scheme, method, username, oauth_token) def inline_dup_check_eval(model, scheme="", username="", oauth_token=None): return inline_dup_check(model, "eval", scheme, "", username, oauth_token) def inline_license_check(model: str, dup_warning: str = ""): """Real-time license check triggered as the user types the model name. Only queries HuggingFace when the model string looks like ``org/name``. Returns ``(warning_update, btn_update)`` — ``btn_update`` disables submit when the license is bad; passes through (no change) when the license is OK so the duplicate-check result is preserved. If a duplicate warning is already showing, the button is NOT re-enabled. """ model = (model or "").strip() if not model or "/" not in model: return gr.update(visible=False, value=""), gr.update() parts = model.split("/", 1) if not parts[0] or not parts[1]: return gr.update(visible=False, value=""), gr.update() _faq_hint = ( ' For details, see https://opensource.org/licenses.' ) try: ok, err_msg, card = check_model_card(model, token=HF_TOKEN) if not ok: logger.warning("[inline-check] check_model_card failed: model=%s reason=%r", model, err_msg) return gr.update(visible=True, value=f"⚠️ {err_msg}{_faq_hint}"), gr.update(interactive=False) license_val = card.data.license if card else None logger.info("[inline-check] model=%s license=%r", model, license_val) if license_val and not is_license_approved(license_val): return ( gr.update( visible=True, value=( f"⚠️ **License `{license_val}` is not in the approved list.**" " Please use an open-source license." + _faq_hint ), ), gr.update(interactive=False), ) # All checks passed — re-enable submit only if no duplicate warning # is active. if dup_warning and str(dup_warning).strip(): return gr.update(visible=False, value=""), gr.update() return gr.update(visible=False, value=""), gr.update(interactive=True) except Exception as exc: logger.warning("[inline-check] unexpected error for model=%s: %s", model, exc, exc_info=True) exc_name = exc.__class__.__name__ if "RepositoryNotFound" in exc_name or "404" in str(exc): reason = f"Model **`{model}`** was not found on the Hugging Face Hub. Please check the name." elif "GatedRepo" in exc_name or "401" in str(exc) or "403" in str(exc): reason = ( f"Cannot access **`{model}`** (private or gated repo). " "Please make the model public or grant access." ) else: reason = ( f"Could not validate **`{model}`** ({exc_name}). " "Please verify the model name and try again." ) return gr.update(visible=True, value=f"⚠️ {reason}"), gr.update(interactive=False)