"""Central config for the CV Parser dashboard. A model "ref" is resolved by lib/model.py as: - a local directory (e.g. exported_models/roberta-base-ner) -> load from disk - any other string with a "/" -> a Hugging Face Hub repo id - None -> FALLBACK_MODEL (roberta-base + random head; flagged as demo in the UI) On the deployed HF Space there are no local folders, so the app loads the team's "best model" repo (PRIMARY_MODEL_ID) from the Hub. Teammates update that repo via the password-gated Manage Model page (or by pushing to it directly), and the app picks up the new weights — no redeploy needed. """ import os # ---- Model resolution ------------------------------------------------------- # Hugging Face owner + the canonical "best model" repo the app loads by default # and that the Manage Model page overwrites. HF_OWNER = os.environ.get("HF_OWNER", "Zeqhx") # Two published models, selectable in the UI. Both are dataset-4 retrains. BERT_V2_ID = os.environ.get("DASHBOARD_BERT_V2_ID", f"{HF_OWNER}/cv-parser-bert-v2") ROBERTA_V2_ID = os.environ.get("DASHBOARD_ROBERTA_V2_ID", f"{HF_OWNER}/cv-parser-roberta-v2") # Default model the app loads and the Manage Model page overwrites: best = robertav2. PRIMARY_MODEL_ID = os.environ.get("DASHBOARD_MODEL_ID", ROBERTA_V2_ID) # Back-compat single-ref overrides (used by load_model()'s config path). MODEL_PATH = os.environ.get("DASHBOARD_MODEL_PATH", "") MODEL_ID = PRIMARY_MODEL_ID FALLBACK_MODEL = "roberta-base" DEMO_LABEL = "Demo — untrained roberta-base" # Toggle registry. Each entry: (label, kind, ref). "local" entries are only # offered when the folder exists (dev machines); "hub" entries are always offered. MODEL_REGISTRY = [ ("bertv2", "hub", BERT_V2_ID), ("robertav2", "hub", ROBERTA_V2_ID), ] def available_models(): """Ordered {label: ref} of selectable models, plus the demo fallback. ``ref=None`` marks the demo/fallback. Local entries appear only when present on disk; Hub entries always appear (a missing/private repo degrades to demo, which the UI flags). """ found = {} local_found = {} hub_found = {} for label, kind, ref in MODEL_REGISTRY: if kind == "local" and not os.path.isdir(ref): continue if kind == "local": local_found[label] = ref else: hub_found[label] = ref found.update(local_found) found.update(hub_found) found[DEMO_LABEL] = None return found # ---- Inference -------------------------------------------------------------- MAX_LENGTH = 512 STRIDE = 128 # matches the project's sliding-window preprocessing # ---- Label scheme (must match training; see project README) ----------------- LABELS = [ "O", "B-JOB_TITLE", "I-JOB_TITLE", "B-SKILL", "I-SKILL", "B-EDUCATION", "I-EDUCATION", ] ID2LABEL = {i: l for i, l in enumerate(LABELS)} LABEL2ID = {l: i for i, l in enumerate(LABELS)} # Entity types (BIO prefix stripped) + display colours ENTITY_TYPES = ["JOB_TITLE", "SKILL", "EDUCATION"] ENTITY_COLORS = { "JOB_TITLE": "#ffb703", # amber "SKILL": "#2a9d8f", # teal "EDUCATION": "#4361ee", # blue } ENTITY_LABELS = { "JOB_TITLE": "Job Title", "SKILL": "Skill", "EDUCATION": "Education", } SUPPORTED_EXTS = [".pdf", ".docx", ".txt"] SAMPLE_RESUME = """John Carter Senior Software Engineer Summary Experienced Software Engineer and Team Lead with 8 years building scalable backend systems. Skilled in Python, Java, Kubernetes, and distributed systems. Experience Senior Software Engineer, Acme Corp (2019 - present) - Designed microservices using Python, FastAPI and PostgreSQL. - Led a team of 5 engineers and introduced CI/CD with Docker and Jenkins. Data Scientist, Globex (2016 - 2019) - Built machine learning models with TensorFlow and scikit-learn. Education Master of Science in Computer Science, Stanford University (2016) Bachelor of Engineering in Software Engineering, MIT (2014) Skills Python, Java, SQL, Machine Learning, Kubernetes, Docker, AWS, Leadership """