Spaces:
Running
on
Zero
Running
on
Zero
File size: 5,915 Bytes
ca223e7 79eb202 ca223e7 c8c828c 79eb202 ca223e7 c8c828c ca223e7 c8c828c e311ed1 eef10e7 c8c828c e311ed1 c8c828c ca223e7 c8c828c ca223e7 c8c828c ca223e7 c8c828c ca223e7 c8c828c ca223e7 c8c828c ca223e7 c8c828c ca223e7 c8c828c ca223e7 c8c828c ca223e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
import json
import re
import torch
import gradio as gr
from huggingface_hub import snapshot_download
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel
import spaces # provided automatically on HF Spaces
# -----------------------------
# 1. Constants
# -----------------------------
PEFT_MODEL_ID = "LlamaFactoryAI/cv-job-description-matching"
BASE_MODEL_NAME = "akjindal53244/Llama-3.1-Storm-8B"
SYSTEM_PROMPT = (
"You analyze how well a CV matches a job description for No Skill Jobs. "
"education is not much relevant unless specified."
"Your ONLY output must be a single JSON object with EXACTLY these keys: "
"matching_analysis, description, Total score, recommendation, name, email adress, phone number.\n\n"
"Constraints:\n"
"- matching_analysis: at most 3 short bullet-like points, max 20 words each.\n"
"- description: at most 2 sentences, max 35 words total.\n"
"- score: integer from 0 to 100.\n"
"- recommendation: at most 2 sentences, max 35 words total.\n\n"
"Very important:\n"
"- Do NOT include the full CV or job description text.\n"
"- Do NOT wrap the JSON in backticks or any extra text.\n"
"- Output ONLY raw JSON, nothing before or after."
)
# -----------------------------
# 2. Download & patch adapter (CPU only, safe in main process)
# -----------------------------
print("Downloading adapter...")
adapter_path = snapshot_download(PEFT_MODEL_ID)
config_path = adapter_path + "/adapter_config.json"
with open(config_path, "r") as f:
cfg = json.load(f)
cfg["task_type"] = "CAUSAL_LM"
with open(config_path, "w") as f:
json.dump(cfg, f, indent=2)
print("Patched adapter_config.json → task_type = CAUSAL_LM")
print("Adapter path:", adapter_path)
# -----------------------------
# 3. Globals for lazy GPU init
# -----------------------------
tokenizer = None
model = None
def build_messages(cv: str, job_description: str):
return [
{
"role": "system",
"content": SYSTEM_PROMPT,
},
{
"role": "user",
"content": f"<CV> {cv} </CV>\n<job_description> {job_description} </job_description>",
},
]
def extract_json_from_text(text: str):
"""
Try to pull a JSON object out of the model's output.
If it fails, wrap the raw text in a fallback JSON structure.
"""
match = re.search(r"\{.*\}", text, flags=re.DOTALL)
candidate = match.group(0) if match else text
try:
return json.loads(candidate)
except Exception:
return {
"matching_analysis": [
"Model output could not be parsed as JSON.",
],
"description": text[:200],
"score": 0,
"recommendation": "Please try again; the model returned non-JSON output.",
}
# -----------------------------
# 4. Main inference function (GPU)
# -----------------------------
@spaces.GPU # required for Stateless GPU Spaces
def match_cv_job(cv: str, job_description: str):
global tokenizer, model
if not cv.strip() or not job_description.strip():
return {
"matching_analysis": ["Please provide both a CV and a job description."],
"description": "",
"score": 0,
"recommendation": "Fill both text boxes and run again.",
}
# Lazy GPU initialization: all CUDA-related stuff happens ONLY here
if tokenizer is None or model is None:
print("Initializing tokenizer + model on GPU...")
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
)
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
base_model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL_NAME,
quantization_config=bnb_config,
device_map="auto",
)
base_model.config.pad_token_id = tokenizer.pad_token_id
model_ = PeftModel.from_pretrained(
base_model,
adapter_path,
device_map="auto",
)
model_.eval()
torch.set_grad_enabled(False)
model = model_
print("Model + LoRA adapter loaded successfully on GPU.")
messages = build_messages(cv, job_description)
prompt = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=False,
)
encoded = tokenizer(prompt, return_tensors="pt")
# Move tensors to the same device as the model
encoded = {k: v.to(model.device) for k, v in encoded.items()}
with torch.inference_mode():
outputs = model.generate(
**encoded,
max_new_tokens=256,
pad_token_id=tokenizer.pad_token_id,
)
input_len = encoded["input_ids"].shape[1]
generated_tokens = outputs[0][input_len:]
generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
result = extract_json_from_text(generated_text)
return result
# -----------------------------
# 5. Gradio interface
# -----------------------------
cv_input = gr.Textbox(
label="CV",
placeholder="Paste the candidate's CV here...",
lines=18,
)
jd_input = gr.Textbox(
label="Job Description",
placeholder="Paste the job description here...",
lines=8,
)
output_json = gr.JSON(label="Matching result (JSON)")
demo = gr.Interface(
fn=match_cv_job,
inputs=[cv_input, jd_input],
outputs=output_json,
title="CV–Job Description Matching API",
description=(
"Paste a CV and a job description. The model returns a JSON object with "
"`matching_analysis`, `description`, `score`, and `recommendation`."
),
)
if __name__ == "__main__":
demo.launch()
|