Spaces:
Runtime error
Runtime error
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| import torch | |
| import json | |
| import re | |
| from huggingface_hub import snapshot_download | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
| from peft import PeftModel | |
| app = FastAPI(title="CV–Job Description Matching API") | |
| # ---------- Request body ---------- | |
| class MatchRequest(BaseModel): | |
| cv: str | |
| job_description: str | |
| # ---------- Load model once ---------- | |
| BASE_MODEL = "akjindal53244/Llama-3.1-Storm-8B" | |
| ADAPTER_MODEL = "LlamaFactoryAI/cv-job-description-matching" | |
| model = None | |
| tokenizer = None | |
| def load_model(): | |
| global model, tokenizer | |
| if model is not None: | |
| return | |
| print("Downloading adapter...") | |
| adapter_path = snapshot_download(ADAPTER_MODEL) | |
| # Patch adapter_config.json | |
| cfg_path = adapter_path + "/adapter_config.json" | |
| with open(cfg_path, "r") as f: | |
| cfg = json.load(f) | |
| cfg["task_type"] = "CAUSAL_LM" | |
| with open(cfg_path, "w") as f: | |
| json.dump(cfg, f, indent=2) | |
| print("Loading tokenizer & base model...") | |
| bnb = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16) | |
| tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| base = AutoModelForCausalLM.from_pretrained( | |
| BASE_MODEL, | |
| quantization_config=bnb, | |
| device_map="auto", | |
| ) | |
| base.config.pad_token_id = tokenizer.pad_token_id | |
| print("Loading LoRA adapter...") | |
| model = PeftModel.from_pretrained(base, adapter_path, device_map="auto") | |
| model.eval() | |
| torch.set_grad_enabled(False) | |
| print("Model is ready.") | |
| def startup_event(): | |
| load_model() | |
| # ---------- System prompt ---------- | |
| SYSTEM_PROMPT = ( | |
| "You analyze how well a CV matches a job description. " | |
| "Your ONLY output must be JSON with keys: " | |
| "matching_analysis, description, score, recommendation." | |
| ) | |
| # ---------- Run inference ---------- | |
| def run_inference(cv, jd): | |
| global model, tokenizer | |
| messages = [ | |
| {"role": "system", "content": SYSTEM_PROMPT}, | |
| {"role": "user", "content": f"<CV> {cv} </CV><job_description> {jd} </job_description>"} | |
| ] | |
| prompt = tokenizer.apply_chat_template( | |
| messages, | |
| add_generation_prompt=True, | |
| tokenize=False, | |
| ) | |
| encoded = tokenizer(prompt, return_tensors="pt") | |
| encoded = {k: v.to(model.device) for k, v in encoded.items()} | |
| with torch.inference_mode(): | |
| out = model.generate( | |
| **encoded, | |
| max_new_tokens=256, | |
| pad_token_id=tokenizer.pad_token_id, | |
| ) | |
| input_len = encoded["input_ids"].shape[1] | |
| generated = tokenizer.decode(out[0][input_len:], skip_special_tokens=True) | |
| # Extract JSON | |
| match = re.search(r"\{.*\}", generated, re.DOTALL) | |
| if match: | |
| return json.loads(match.group(0)) | |
| return {"raw_output": generated} | |
| # ---------- API route ---------- | |
| def match(request: MatchRequest): | |
| return run_inference(request.cv, request.job_description) | |
| def root(): | |
| return {"message": "API running. POST /match to use it."} | |