Spaces:
Running
Running
| """Central config for the CV Parser dashboard. | |
| A model "ref" is resolved by lib/model.py as: | |
| - a local directory (e.g. exported_models/roberta-base-ner) -> load from disk | |
| - any other string with a "/" -> a Hugging Face Hub repo id | |
| - None -> FALLBACK_MODEL (roberta-base + random head; flagged as demo in the UI) | |
| On the deployed HF Space there are no local folders, so the app loads the team's | |
| "best model" repo (PRIMARY_MODEL_ID) from the Hub. Teammates update that repo via | |
| the password-gated Manage Model page (or by pushing to it directly), and the app | |
| picks up the new weights — no redeploy needed. | |
| """ | |
| import os | |
| # ---- Model resolution ------------------------------------------------------- | |
| # Hugging Face owner + the canonical "best model" repo the app loads by default | |
| # and that the Manage Model page overwrites. | |
| HF_OWNER = os.environ.get("HF_OWNER", "Zeqhx") | |
| # Four published models, selectable in the UI: | |
| # v1 = earlier (dataset-2) models, v2 = latest (dataset-4) retrains. | |
| BERT_V1_ID = os.environ.get("DASHBOARD_BERT_V1_ID", f"{HF_OWNER}/cv-parser-bert-v1") | |
| ROBERTA_V1_ID = os.environ.get("DASHBOARD_ROBERTA_V1_ID", f"{HF_OWNER}/cv-parser-roberta-v1") | |
| BERT_V2_ID = os.environ.get("DASHBOARD_BERT_V2_ID", f"{HF_OWNER}/cv-parser-bert-v2") | |
| ROBERTA_V2_ID = os.environ.get("DASHBOARD_ROBERTA_V2_ID", f"{HF_OWNER}/cv-parser-roberta-v2") | |
| # Default model the app loads and the Manage Model page overwrites: best = robertav2. | |
| PRIMARY_MODEL_ID = os.environ.get("DASHBOARD_MODEL_ID", ROBERTA_V2_ID) | |
| # Back-compat single-ref overrides (used by load_model()'s config path). | |
| MODEL_PATH = os.environ.get("DASHBOARD_MODEL_PATH", "") | |
| MODEL_ID = PRIMARY_MODEL_ID | |
| FALLBACK_MODEL = "roberta-base" | |
| DEMO_LABEL = "Demo — untrained roberta-base" | |
| # Toggle registry. Each entry: (label, kind, ref). "local" entries are only | |
| # offered when the folder exists (dev machines); "hub" entries are always offered. | |
| MODEL_REGISTRY = [ | |
| ("bertv1", "hub", BERT_V1_ID), | |
| ("robertav1", "hub", ROBERTA_V1_ID), | |
| ("bertv2", "hub", BERT_V2_ID), | |
| ("robertav2", "hub", ROBERTA_V2_ID), | |
| ] | |
| def available_models(): | |
| """Ordered {label: ref} of selectable models, plus the demo fallback. | |
| ``ref=None`` marks the demo/fallback. Local entries appear only when present | |
| on disk; Hub entries always appear (a missing/private repo degrades to demo, | |
| which the UI flags). | |
| """ | |
| found = {} | |
| local_found = {} | |
| hub_found = {} | |
| for label, kind, ref in MODEL_REGISTRY: | |
| if kind == "local" and not os.path.isdir(ref): | |
| continue | |
| if kind == "local": | |
| local_found[label] = ref | |
| else: | |
| hub_found[label] = ref | |
| found.update(local_found) | |
| found.update(hub_found) | |
| found[DEMO_LABEL] = None | |
| return found | |
| # ---- Inference -------------------------------------------------------------- | |
| MAX_LENGTH = 512 | |
| STRIDE = 128 # matches the project's sliding-window preprocessing | |
| # ---- Label scheme (must match training; see project README) ----------------- | |
| LABELS = [ | |
| "O", | |
| "B-JOB_TITLE", "I-JOB_TITLE", | |
| "B-SKILL", "I-SKILL", | |
| "B-EDUCATION", "I-EDUCATION", | |
| ] | |
| ID2LABEL = {i: l for i, l in enumerate(LABELS)} | |
| LABEL2ID = {l: i for i, l in enumerate(LABELS)} | |
| # Entity types (BIO prefix stripped) + display colours | |
| ENTITY_TYPES = ["JOB_TITLE", "SKILL", "EDUCATION"] | |
| ENTITY_COLORS = { | |
| "JOB_TITLE": "#ffb703", # amber | |
| "SKILL": "#2a9d8f", # teal | |
| "EDUCATION": "#4361ee", # blue | |
| } | |
| ENTITY_LABELS = { | |
| "JOB_TITLE": "Job Title", | |
| "SKILL": "Skill", | |
| "EDUCATION": "Education", | |
| } | |
| SUPPORTED_EXTS = [".pdf", ".docx", ".txt"] | |
| SAMPLE_RESUME = """John Carter | |
| Senior Software Engineer | |
| Summary | |
| Experienced Software Engineer and Team Lead with 8 years building scalable | |
| backend systems. Skilled in Python, Java, Kubernetes, and distributed systems. | |
| Experience | |
| Senior Software Engineer, Acme Corp (2019 - present) | |
| - Designed microservices using Python, FastAPI and PostgreSQL. | |
| - Led a team of 5 engineers and introduced CI/CD with Docker and Jenkins. | |
| Data Scientist, Globex (2016 - 2019) | |
| - Built machine learning models with TensorFlow and scikit-learn. | |
| Education | |
| Master of Science in Computer Science, Stanford University (2016) | |
| Bachelor of Engineering in Software Engineering, MIT (2014) | |
| Skills | |
| Python, Java, SQL, Machine Learning, Kubernetes, Docker, AWS, Leadership | |
| """ | |