Irfaniiioo commited on
Commit
373e459
·
verified ·
1 Parent(s): 11de515

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -81
app.py CHANGED
@@ -1,93 +1,85 @@
1
- # app.py
2
-
3
- import json
4
- import re
5
- import torch
6
  from fastapi import FastAPI
7
  from pydantic import BaseModel
8
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 
 
 
9
  from huggingface_hub import snapshot_download
 
10
  from peft import PeftModel
11
 
12
- # -----------------------------
13
- # Load Model at Startup
14
- # -----------------------------
 
 
 
15
 
 
16
  BASE_MODEL = "akjindal53244/Llama-3.1-Storm-8B"
17
  ADAPTER_MODEL = "LlamaFactoryAI/cv-job-description-matching"
18
 
19
- bnb_config = BitsAndBytesConfig(
20
- load_in_4bit=True,
21
- bnb_4bit_compute_dtype=torch.float16,
22
- )
23
 
24
- print("Downloading adapter...")
25
- adapter_path = snapshot_download(ADAPTER_MODEL)
26
-
27
- # Patch adapter_config.json exactly like in Kaggle
28
- config_path = adapter_path + "/adapter_config.json"
29
- with open(config_path, "r") as f:
30
- cfg = json.load(f)
31
- cfg["task_type"] = "CAUSAL_LM"
32
- with open(config_path, "w") as f:
33
- json.dump(cfg, f, indent=2)
34
- print("Patched adapter_config.json")
35
-
36
- print("Loading tokenizer + base model...")
37
- tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
38
- if tokenizer.pad_token is None:
39
- tokenizer.pad_token = tokenizer.eos_token
40
-
41
- base_model = AutoModelForCausalLM.from_pretrained(
42
- BASE_MODEL,
43
- quantization_config=bnb_config,
44
- device_map="auto",
45
- )
46
- base_model.config.pad_token_id = tokenizer.pad_token_id
47
 
48
- print("Applying LoRA adapter...")
49
- model = PeftModel.from_pretrained(
50
- base_model,
51
- adapter_path,
52
- device_map="auto"
53
- )
54
- model.eval()
55
- torch.set_grad_enabled(False)
56
 
57
- print("Model ready.")
 
58
 
59
- # -----------------------------
60
- # FastAPI Setup
61
- # -----------------------------
 
 
 
 
62
 
63
- app = FastAPI()
 
64
 
65
- class MatchRequest(BaseModel):
66
- cv: str
67
- job_description: str
68
 
69
- @app.get("/")
70
- def root():
71
- return {"status": "ok", "message": "CV Matching API running"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
- @app.post("/predict")
74
- def predict(req: MatchRequest):
75
  messages = [
76
- {
77
- "role": "system",
78
- "content": (
79
- "You analyze how well a CV matches a job description. "
80
- "Your ONLY output must be JSON with the keys: "
81
- "matching_analysis, description, score, recommendation."
82
- ),
83
- },
84
- {
85
- "role": "user",
86
- "content": f"<CV> {req.cv} </CV>\n<job_description> {req.job_description} </job_description>",
87
- },
88
  ]
89
 
90
- # Build chat prompt
91
  prompt = tokenizer.apply_chat_template(
92
  messages,
93
  add_generation_prompt=True,
@@ -98,20 +90,27 @@ def predict(req: MatchRequest):
98
  encoded = {k: v.to(model.device) for k, v in encoded.items()}
99
 
100
  with torch.inference_mode():
101
- output = model.generate(
102
  **encoded,
103
  max_new_tokens=256,
104
  pad_token_id=tokenizer.pad_token_id,
105
  )
106
 
107
  input_len = encoded["input_ids"].shape[1]
108
- generated = tokenizer.decode(output[0][input_len:], skip_special_tokens=True)
109
-
110
- # Try to parse JSON
111
- try:
112
- start = generated.index("{")
113
- end = generated.rindex("}") + 1
114
- json_text = generated[start:end]
115
- return json.loads(json_text)
116
- except Exception:
117
- return {"raw_output": generated}
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
+ import torch
4
+ import json
5
+ import re
6
+
7
  from huggingface_hub import snapshot_download
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
9
  from peft import PeftModel
10
 
11
+ app = FastAPI(title="CV–Job Description Matching API")
12
+
13
+ # ---------- Request body ----------
14
+ class MatchRequest(BaseModel):
15
+ cv: str
16
+ job_description: str
17
 
18
+ # ---------- Load model once ----------
19
  BASE_MODEL = "akjindal53244/Llama-3.1-Storm-8B"
20
  ADAPTER_MODEL = "LlamaFactoryAI/cv-job-description-matching"
21
 
22
+ model = None
23
+ tokenizer = None
 
 
24
 
25
+ def load_model():
26
+ global model, tokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ if model is not None:
29
+ return
 
 
 
 
 
 
30
 
31
+ print("Downloading adapter...")
32
+ adapter_path = snapshot_download(ADAPTER_MODEL)
33
 
34
+ # Patch adapter_config.json
35
+ cfg_path = adapter_path + "/adapter_config.json"
36
+ with open(cfg_path, "r") as f:
37
+ cfg = json.load(f)
38
+ cfg["task_type"] = "CAUSAL_LM"
39
+ with open(cfg_path, "w") as f:
40
+ json.dump(cfg, f, indent=2)
41
 
42
+ print("Loading tokenizer & base model...")
43
+ bnb = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)
44
 
45
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
46
+ if tokenizer.pad_token is None:
47
+ tokenizer.pad_token = tokenizer.eos_token
48
 
49
+ base = AutoModelForCausalLM.from_pretrained(
50
+ BASE_MODEL,
51
+ quantization_config=bnb,
52
+ device_map="auto",
53
+ )
54
+ base.config.pad_token_id = tokenizer.pad_token_id
55
+
56
+ print("Loading LoRA adapter...")
57
+ model = PeftModel.from_pretrained(base, adapter_path, device_map="auto")
58
+ model.eval()
59
+ torch.set_grad_enabled(False)
60
+
61
+ print("Model is ready.")
62
+
63
+ @app.on_event("startup")
64
+ def startup_event():
65
+ load_model()
66
+
67
+ # ---------- System prompt ----------
68
+ SYSTEM_PROMPT = (
69
+ "You analyze how well a CV matches a job description. "
70
+ "Your ONLY output must be JSON with keys: "
71
+ "matching_analysis, description, score, recommendation."
72
+ )
73
+
74
+ # ---------- Run inference ----------
75
+ def run_inference(cv, jd):
76
+ global model, tokenizer
77
 
 
 
78
  messages = [
79
+ {"role": "system", "content": SYSTEM_PROMPT},
80
+ {"role": "user", "content": f"<CV> {cv} </CV><job_description> {jd} </job_description>"}
 
 
 
 
 
 
 
 
 
 
81
  ]
82
 
 
83
  prompt = tokenizer.apply_chat_template(
84
  messages,
85
  add_generation_prompt=True,
 
90
  encoded = {k: v.to(model.device) for k, v in encoded.items()}
91
 
92
  with torch.inference_mode():
93
+ out = model.generate(
94
  **encoded,
95
  max_new_tokens=256,
96
  pad_token_id=tokenizer.pad_token_id,
97
  )
98
 
99
  input_len = encoded["input_ids"].shape[1]
100
+ generated = tokenizer.decode(out[0][input_len:], skip_special_tokens=True)
101
+
102
+ # Extract JSON
103
+ match = re.search(r"\{.*\}", generated, re.DOTALL)
104
+ if match:
105
+ return json.loads(match.group(0))
106
+
107
+ return {"raw_output": generated}
108
+
109
+ # ---------- API route ----------
110
+ @app.post("/match")
111
+ def match(request: MatchRequest):
112
+ return run_inference(request.cv, request.job_description)
113
+
114
+ @app.get("/")
115
+ def root():
116
+ return {"message": "API running. POST /match to use it."}