cv_jd / app.py
Irfaniiioo's picture
Update app.py
373e459 verified
from fastapi import FastAPI
from pydantic import BaseModel
import torch
import json
import re
from huggingface_hub import snapshot_download
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel
app = FastAPI(title="CV–Job Description Matching API")
# ---------- Request body ----------
class MatchRequest(BaseModel):
cv: str
job_description: str
# ---------- Load model once ----------
BASE_MODEL = "akjindal53244/Llama-3.1-Storm-8B"
ADAPTER_MODEL = "LlamaFactoryAI/cv-job-description-matching"
model = None
tokenizer = None
def load_model():
global model, tokenizer
if model is not None:
return
print("Downloading adapter...")
adapter_path = snapshot_download(ADAPTER_MODEL)
# Patch adapter_config.json
cfg_path = adapter_path + "/adapter_config.json"
with open(cfg_path, "r") as f:
cfg = json.load(f)
cfg["task_type"] = "CAUSAL_LM"
with open(cfg_path, "w") as f:
json.dump(cfg, f, indent=2)
print("Loading tokenizer & base model...")
bnb = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
base = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
quantization_config=bnb,
device_map="auto",
)
base.config.pad_token_id = tokenizer.pad_token_id
print("Loading LoRA adapter...")
model = PeftModel.from_pretrained(base, adapter_path, device_map="auto")
model.eval()
torch.set_grad_enabled(False)
print("Model is ready.")
@app.on_event("startup")
def startup_event():
load_model()
# ---------- System prompt ----------
SYSTEM_PROMPT = (
"You analyze how well a CV matches a job description. "
"Your ONLY output must be JSON with keys: "
"matching_analysis, description, score, recommendation."
)
# ---------- Run inference ----------
def run_inference(cv, jd):
global model, tokenizer
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": f"<CV> {cv} </CV><job_description> {jd} </job_description>"}
]
prompt = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=False,
)
encoded = tokenizer(prompt, return_tensors="pt")
encoded = {k: v.to(model.device) for k, v in encoded.items()}
with torch.inference_mode():
out = model.generate(
**encoded,
max_new_tokens=256,
pad_token_id=tokenizer.pad_token_id,
)
input_len = encoded["input_ids"].shape[1]
generated = tokenizer.decode(out[0][input_len:], skip_special_tokens=True)
# Extract JSON
match = re.search(r"\{.*\}", generated, re.DOTALL)
if match:
return json.loads(match.group(0))
return {"raw_output": generated}
# ---------- API route ----------
@app.post("/match")
def match(request: MatchRequest):
return run_inference(request.cv, request.job_description)
@app.get("/")
def root():
return {"message": "API running. POST /match to use it."}