"""Real-time duplicate-detection and license-validation helpers.
These are wired to the model-name textbox's ``.change()`` event so the user
sees instant feedback while typing, before clicking *Submit*.
"""
from __future__ import annotations
import logging
import re
import gradio as gr
from src.app_helpers.auth import is_allowed_to_resubmit_failed
from src.display.utils import EVAL_COLS, QUANT_COLS
from src.envs import GIT_RESULTS_PATH, GIT_STATUS_PATH, HF_TOKEN
from src.populate import get_evaluation_queue_df
from src.submission.check_validity import check_model_card, is_license_approved
logger = logging.getLogger(__name__)
_STATUS_TO_LABEL = {
"accordion-finished-quant": "Finished",
"accordion-running-quant": "Running",
"accordion-pending-quant": "Pending",
"accordion-failed-quant": "Failed",
"accordion-finished-eval": "Finished",
"accordion-running-eval": "Running",
"accordion-pending-eval": "Pending",
"accordion-failed-eval": "Failed",
}
def _pre_check_duplicate(
model: str,
queue_type: str,
scheme: str = "",
method: str = "",
username: str = "",
oauth_token: gr.OAuthToken | None = None,
):
"""Check if *model* (+ optional *scheme* + *method*) already exists in the queue.
For ``quant`` a duplicate requires model name, ``quant_scheme``, AND ``method``
to all match. For ``eval`` only the model name is checked (single scheme).
Returns ``(dup_found, modal_gr_update, message_text, accordion_elem_id)``.
Re-submissions are always permitted at the submission layer; this helper
merely surfaces an informational warning.
"""
if not model or not model.strip():
return False, gr.update(visible=False), "", ""
model_clean = model.strip()
scheme_clean = scheme.strip().lower() if scheme else ""
method_clean = method.strip().upper() if method else ""
if queue_type == "quant":
dfs = get_evaluation_queue_df(
GIT_STATUS_PATH, QUANT_COLS, request_type="quant", results_path=GIT_RESULTS_PATH
)
queue_label = "Quantization"
acc_ids = [
"accordion-finished-quant",
"accordion-running-quant",
"accordion-pending-quant",
"accordion-failed-quant",
]
else:
dfs = get_evaluation_queue_df(
GIT_STATUS_PATH, EVAL_COLS, request_type="eval", results_path=GIT_RESULTS_PATH
)
queue_label = "Evaluation"
acc_ids = [
"accordion-finished-eval",
"accordion-running-eval",
"accordion-pending-eval",
"accordion-failed-eval",
]
status_labels = ["Finished", "Running", "Pending", "Failed"]
for status_label, df, acc_id in zip(status_labels, dfs, acc_ids):
if df.empty or "model" not in df.columns:
continue
for idx, cell in enumerate(df["model"]):
# The model column stores an HTML tag; strip HTML to plain text.
plain = re.sub(r"<[^>]+>", "", str(cell)).strip()
if plain.lower() != model_clean.lower():
continue
# For quant: also require scheme to match when provided.
if queue_type == "quant" and scheme_clean and "quant_scheme" in df.columns:
row_scheme = str(df.iloc[idx]["quant_scheme"]).strip().lower()
if row_scheme != scheme_clean:
continue
# For quant: also require method to match when provided.
if queue_type == "quant" and method_clean:
if "method" in df.columns:
row_method = str(df.iloc[idx]["method"]).strip().upper()
else:
row_method = "RTN" # legacy entries without method field are RTN
if row_method != method_clean:
continue
scheme_hint = f" ({scheme.strip()})" if scheme_clean and queue_type == "quant" else ""
msg = (
f"### ℹ️ Model already in queue\n\n"
f"**`{model_clean}`{scheme_hint}** is currently in the **{status_label} {queue_label}** queue.\n\n"
f"Submitting again will create a new entry while keeping the existing record."
)
return True, gr.update(visible=True), msg, acc_id
return False, gr.update(visible=False), "", ""
def inline_dup_check(
model: str,
queue_type: str,
scheme: str = "",
method: str = "",
username: str = "",
oauth_token: gr.OAuthToken | None = None,
):
"""Inline real-time duplicate check (triggered on user input).
Only queries when the model string contains '/' to avoid disk reads on
every keystroke. Returns ``(warning_update, btn_update)``:
* No duplicate → no banner, button pass-through.
* Duplicate with status=Failed AND the user is allowed to re-submit
failed models → no banner, button pass-through.
* Any other duplicate → banner shown, button disabled.
"""
model = (model or "").strip()
if not model or "/" not in model:
return gr.update(visible=False, value=""), gr.update(interactive=False)
dup_found, _, _, acc_id = _pre_check_duplicate(model, queue_type, scheme, method, username, oauth_token)
if not dup_found:
return gr.update(visible=False, value=""), gr.update()
status_label = _STATUS_TO_LABEL.get(acc_id, "queue")
# Permitted re-submission: previously Failed + user has permission.
if status_label == "Failed" and is_allowed_to_resubmit_failed(username):
return gr.update(visible=False, value=""), gr.update()
queue_label = "Quantization" if queue_type == "quant" else "Evaluation"
scheme_hint = f" ({scheme.strip()})" if scheme and queue_type == "quant" else ""
if status_label == "Failed":
msg = (
f"⚠️ **`{model}`{scheme_hint}** is already in the {queue_label} queue "
f"(status: **Failed**). You do not have permission to re-submit failed models — "
"please contact an administrator."
)
else:
msg = (
f"⚠️ **`{model}`{scheme_hint}** is already in the {queue_label} queue "
f"(status: **{status_label}**). Duplicate submissions are not allowed."
)
return gr.update(visible=True, value=msg), gr.update(interactive=False)
def inline_dup_check_quant(model, scheme="", method_choice="", username="", oauth_token=None):
# Parse UI display value to internal method key
if method_choice and "Tuning" in method_choice:
method = "TUNING"
else:
method = "RTN"
return inline_dup_check(model, "quant", scheme, method, username, oauth_token)
def inline_dup_check_eval(model, scheme="", username="", oauth_token=None):
return inline_dup_check(model, "eval", scheme, "", username, oauth_token)
def inline_license_check(model: str, dup_warning: str = ""):
"""Real-time license check triggered as the user types the model name.
Only queries HuggingFace when the model string looks like ``org/name``.
Returns ``(warning_update, btn_update)`` — ``btn_update`` disables submit
when the license is bad; passes through (no change) when the license is OK
so the duplicate-check result is preserved. If a duplicate warning is
already showing, the button is NOT re-enabled.
"""
model = (model or "").strip()
if not model or "/" not in model:
return gr.update(visible=False, value=""), gr.update()
parts = model.split("/", 1)
if not parts[0] or not parts[1]:
return gr.update(visible=False, value=""), gr.update()
_faq_hint = (
' For details, see https://opensource.org/licenses.'
)
try:
ok, err_msg, card = check_model_card(model, token=HF_TOKEN)
if not ok:
logger.warning("[inline-check] check_model_card failed: model=%s reason=%r", model, err_msg)
return gr.update(visible=True, value=f"⚠️ {err_msg}{_faq_hint}"), gr.update(interactive=False)
license_val = card.data.license if card else None
logger.info("[inline-check] model=%s license=%r", model, license_val)
if license_val and not is_license_approved(license_val):
return (
gr.update(
visible=True,
value=(
f"⚠️ **License `{license_val}` is not in the approved list.**"
" Please use an open-source license."
+ _faq_hint
),
),
gr.update(interactive=False),
)
# All checks passed — re-enable submit only if no duplicate warning
# is active.
if dup_warning and str(dup_warning).strip():
return gr.update(visible=False, value=""), gr.update()
return gr.update(visible=False, value=""), gr.update(interactive=True)
except Exception as exc:
logger.warning("[inline-check] unexpected error for model=%s: %s", model, exc, exc_info=True)
exc_name = exc.__class__.__name__
if "RepositoryNotFound" in exc_name or "404" in str(exc):
reason = f"Model **`{model}`** was not found on the Hugging Face Hub. Please check the name."
elif "GatedRepo" in exc_name or "401" in str(exc) or "403" in str(exc):
reason = (
f"Cannot access **`{model}`** (private or gated repo). "
"Please make the model public or grant access."
)
else:
reason = (
f"Could not validate **`{model}`** ({exc_name}). "
"Please verify the model name and try again."
)
return gr.update(visible=True, value=f"⚠️ {reason}"), gr.update(interactive=False)