Spaces:
Sleeping
Sleeping
File size: 2,634 Bytes
8762937 18be1bf 8762937 18be1bf 8762937 18be1bf 8762937 18be1bf f2a8c76 18be1bf f2a8c76 18be1bf f2a8c76 8762937 18be1bf a2a6945 18be1bf 8762937 18be1bf 8762937 18be1bf 8762937 18be1bf 8762937 18be1bf 8762937 18be1bf 8762937 18be1bf 8762937 18be1bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import torch
import re
import time
import json
import json5
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
# Model paths
base_model_id = "Qwen/Qwen3-0.6B"
lora_model_id = "Rithankoushik/Qwen-0.6-Job-parser-Model"
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id # ✅ critical fix
# Load model + LoRA
base_model = AutoModelForCausalLM.from_pretrained(
base_model_id,
trust_remote_code=True,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto"
)
model = PeftModel.from_pretrained(base_model, lora_model_id, device_map="auto")
model = model.merge_and_unload()
model.eval()
def extract_and_clean_json(text):
"""Extract JSON from LLM output, even if extra text is present."""
matches = re.findall(r"\{[\s\S]*\}", text)
if not matches:
return None
json_str = matches[0] # take first JSON
json_str = json_str.replace("None", "null")
json_str = json_str.replace("True", "true").replace("False", "false")
json_str = re.sub(r",(\s*[}\]])", r"\1", json_str)
try:
return json5.loads(json_str)
except Exception as e:
print(f"JSON parse error: {e}")
return None
def infer_from_text(jd_text: str):
"""Runs inference on a job description."""
start_time = time.time()
system_prompt = "Extract structured information from the following job description and return it as JSON."
user_prompt = f"Job Description:\n{jd_text}"
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
# ✅ safer way
prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
raw_inputs = tokenizer(prompt, return_tensors="pt")
device = model.device
inputs = {k: v.to(device) for k, v in raw_inputs.items()}
with torch.no_grad():
out = model.generate(
**inputs,
max_new_tokens=1000,
do_sample=False,
temperature=0,
pad_token_id=tokenizer.pad_token_id
)
gen_tokens = out[0][inputs["input_ids"].shape[1]:]
response_text = tokenizer.decode(gen_tokens, skip_special_tokens=True)
duration = round(time.time() - start_time, 2)
parsed = extract_and_clean_json(response_text)
if parsed is not None:
return json.dumps(parsed, indent=2), duration
return response_text, duration
|