File size: 5,915 Bytes
ca223e7
 
79eb202
ca223e7
 
 
 
 
c8c828c
79eb202
ca223e7
c8c828c
ca223e7
 
 
 
c8c828c
e311ed1
eef10e7
c8c828c
e311ed1
c8c828c
 
 
 
 
 
 
 
 
 
 
 
 
 
ca223e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8c828c
ca223e7
c8c828c
 
ca223e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8c828c
ca223e7
c8c828c
ca223e7
c8c828c
 
ca223e7
 
 
 
 
 
 
 
c8c828c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca223e7
 
 
 
 
 
 
 
c8c828c
 
 
ca223e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8c828c
ca223e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
import json
import re

import torch
import gradio as gr
from huggingface_hub import snapshot_download
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel
import spaces  # provided automatically on HF Spaces

# -----------------------------
# 1. Constants
# -----------------------------
PEFT_MODEL_ID = "LlamaFactoryAI/cv-job-description-matching"
BASE_MODEL_NAME = "akjindal53244/Llama-3.1-Storm-8B"

SYSTEM_PROMPT = (
    "You analyze how well a CV matches a job description for No Skill Jobs. "
    "education is not much relevant unless specified."
    "Your ONLY output must be a single JSON object with EXACTLY these keys: "
    "matching_analysis, description, Total score, recommendation, name, email adress, phone number.\n\n"
    "Constraints:\n"
    "- matching_analysis: at most 3 short bullet-like points, max 20 words each.\n"
    "- description: at most 2 sentences, max 35 words total.\n"
    "- score: integer from 0 to 100.\n"
    "- recommendation: at most 2 sentences, max 35 words total.\n\n"
    "Very important:\n"
    "- Do NOT include the full CV or job description text.\n"
    "- Do NOT wrap the JSON in backticks or any extra text.\n"
    "- Output ONLY raw JSON, nothing before or after."
)

# -----------------------------
# 2. Download & patch adapter (CPU only, safe in main process)
# -----------------------------
print("Downloading adapter...")
adapter_path = snapshot_download(PEFT_MODEL_ID)

config_path = adapter_path + "/adapter_config.json"
with open(config_path, "r") as f:
    cfg = json.load(f)

cfg["task_type"] = "CAUSAL_LM"

with open(config_path, "w") as f:
    json.dump(cfg, f, indent=2)

print("Patched adapter_config.json → task_type = CAUSAL_LM")
print("Adapter path:", adapter_path)

# -----------------------------
# 3. Globals for lazy GPU init
# -----------------------------
tokenizer = None
model = None


def build_messages(cv: str, job_description: str):
    return [
        {
            "role": "system",
            "content": SYSTEM_PROMPT,
        },
        {
            "role": "user",
            "content": f"<CV> {cv} </CV>\n<job_description> {job_description} </job_description>",
        },
    ]


def extract_json_from_text(text: str):
    """
    Try to pull a JSON object out of the model's output.
    If it fails, wrap the raw text in a fallback JSON structure.
    """
    match = re.search(r"\{.*\}", text, flags=re.DOTALL)
    candidate = match.group(0) if match else text

    try:
        return json.loads(candidate)
    except Exception:
        return {
            "matching_analysis": [
                "Model output could not be parsed as JSON.",
            ],
            "description": text[:200],
            "score": 0,
            "recommendation": "Please try again; the model returned non-JSON output.",
        }


# -----------------------------
# 4. Main inference function (GPU)
# -----------------------------
@spaces.GPU  # required for Stateless GPU Spaces
def match_cv_job(cv: str, job_description: str):
    global tokenizer, model

    if not cv.strip() or not job_description.strip():
        return {
            "matching_analysis": ["Please provide both a CV and a job description."],
            "description": "",
            "score": 0,
            "recommendation": "Fill both text boxes and run again.",
        }

    # Lazy GPU initialization: all CUDA-related stuff happens ONLY here
    if tokenizer is None or model is None:
        print("Initializing tokenizer + model on GPU...")
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_compute_dtype=torch.float16,
        )

        tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)

        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token

        base_model = AutoModelForCausalLM.from_pretrained(
            BASE_MODEL_NAME,
            quantization_config=bnb_config,
            device_map="auto",
        )

        base_model.config.pad_token_id = tokenizer.pad_token_id

        model_ = PeftModel.from_pretrained(
            base_model,
            adapter_path,
            device_map="auto",
        )
        model_.eval()
        torch.set_grad_enabled(False)

        model = model_
        print("Model + LoRA adapter loaded successfully on GPU.")

    messages = build_messages(cv, job_description)

    prompt = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=False,
    )

    encoded = tokenizer(prompt, return_tensors="pt")
    # Move tensors to the same device as the model
    encoded = {k: v.to(model.device) for k, v in encoded.items()}

    with torch.inference_mode():
        outputs = model.generate(
            **encoded,
            max_new_tokens=256,
            pad_token_id=tokenizer.pad_token_id,
        )

    input_len = encoded["input_ids"].shape[1]
    generated_tokens = outputs[0][input_len:]
    generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)

    result = extract_json_from_text(generated_text)
    return result


# -----------------------------
# 5. Gradio interface
# -----------------------------
cv_input = gr.Textbox(
    label="CV",
    placeholder="Paste the candidate's CV here...",
    lines=18,
)

jd_input = gr.Textbox(
    label="Job Description",
    placeholder="Paste the job description here...",
    lines=8,
)

output_json = gr.JSON(label="Matching result (JSON)")

demo = gr.Interface(
    fn=match_cv_job,
    inputs=[cv_input, jd_input],
    outputs=output_json,
    title="CV–Job Description Matching API",
    description=(
        "Paste a CV and a job description. The model returns a JSON object with "
        "`matching_analysis`, `description`, `score`, and `recommendation`."
    ),
)

if __name__ == "__main__":
    demo.launch()