File size: 4,076 Bytes
c59578d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b091c09
e99bcfe
 
 
 
 
c59578d
 
 
 
 
 
 
 
 
 
 
e99bcfe
 
c59578d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
"""Central config for the CV Parser dashboard.

A model "ref" is resolved by lib/model.py as:
  - a local directory (e.g. exported_models/roberta-base-ner) -> load from disk
  - any other string with a "/" -> a Hugging Face Hub repo id
  - None -> FALLBACK_MODEL (roberta-base + random head; flagged as demo in the UI)

On the deployed HF Space there are no local folders, so the app loads the team's
"best model" repo (PRIMARY_MODEL_ID) from the Hub. Teammates update that repo via
the password-gated Manage Model page (or by pushing to it directly), and the app
picks up the new weights — no redeploy needed.
"""
import os

# ---- Model resolution -------------------------------------------------------
# Hugging Face owner + the canonical "best model" repo the app loads by default
# and that the Manage Model page overwrites.
HF_OWNER = os.environ.get("HF_OWNER", "Zeqhx")
# Two published models, selectable in the UI. Both are dataset-4 retrains.
BERT_V2_ID    = os.environ.get("DASHBOARD_BERT_V2_ID",    f"{HF_OWNER}/cv-parser-bert-v2")
ROBERTA_V2_ID = os.environ.get("DASHBOARD_ROBERTA_V2_ID", f"{HF_OWNER}/cv-parser-roberta-v2")

# Default model the app loads and the Manage Model page overwrites: best = robertav2.
PRIMARY_MODEL_ID = os.environ.get("DASHBOARD_MODEL_ID", ROBERTA_V2_ID)

# Back-compat single-ref overrides (used by load_model()'s config path).
MODEL_PATH = os.environ.get("DASHBOARD_MODEL_PATH", "")
MODEL_ID = PRIMARY_MODEL_ID
FALLBACK_MODEL = "roberta-base"

DEMO_LABEL = "Demo — untrained roberta-base"

# Toggle registry. Each entry: (label, kind, ref). "local" entries are only
# offered when the folder exists (dev machines); "hub" entries are always offered.
MODEL_REGISTRY = [
    ("bertv2",    "hub", BERT_V2_ID),
    ("robertav2", "hub", ROBERTA_V2_ID),
]


def available_models():
    """Ordered {label: ref} of selectable models, plus the demo fallback.

    ``ref=None`` marks the demo/fallback. Local entries appear only when present
    on disk; Hub entries always appear (a missing/private repo degrades to demo,
    which the UI flags).
    """
    found = {}
    local_found = {}
    hub_found = {}
    for label, kind, ref in MODEL_REGISTRY:
        if kind == "local" and not os.path.isdir(ref):
            continue
        if kind == "local":
            local_found[label] = ref
        else:
            hub_found[label] = ref
    found.update(local_found)
    found.update(hub_found)
    found[DEMO_LABEL] = None
    return found

# ---- Inference --------------------------------------------------------------
MAX_LENGTH = 512
STRIDE = 128  # matches the project's sliding-window preprocessing

# ---- Label scheme (must match training; see project README) -----------------
LABELS = [
    "O",
    "B-JOB_TITLE", "I-JOB_TITLE",
    "B-SKILL", "I-SKILL",
    "B-EDUCATION", "I-EDUCATION",
]
ID2LABEL = {i: l for i, l in enumerate(LABELS)}
LABEL2ID = {l: i for i, l in enumerate(LABELS)}

# Entity types (BIO prefix stripped) + display colours
ENTITY_TYPES = ["JOB_TITLE", "SKILL", "EDUCATION"]
ENTITY_COLORS = {
    "JOB_TITLE": "#ffb703",   # amber
    "SKILL": "#2a9d8f",       # teal
    "EDUCATION": "#4361ee",   # blue
}
ENTITY_LABELS = {
    "JOB_TITLE": "Job Title",
    "SKILL": "Skill",
    "EDUCATION": "Education",
}

SUPPORTED_EXTS = [".pdf", ".docx", ".txt"]

SAMPLE_RESUME = """John Carter
Senior Software Engineer

Summary
Experienced Software Engineer and Team Lead with 8 years building scalable
backend systems. Skilled in Python, Java, Kubernetes, and distributed systems.

Experience
Senior Software Engineer, Acme Corp (2019 - present)
- Designed microservices using Python, FastAPI and PostgreSQL.
- Led a team of 5 engineers and introduced CI/CD with Docker and Jenkins.

Data Scientist, Globex (2016 - 2019)
- Built machine learning models with TensorFlow and scikit-learn.

Education
Master of Science in Computer Science, Stanford University (2016)
Bachelor of Engineering in Software Engineering, MIT (2014)

Skills
Python, Java, SQL, Machine Learning, Kubernetes, Docker, AWS, Leadership
"""