Spaces:
Running
Running
| import torch | |
| import json | |
| import numpy as np | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from typing import Dict, List, Union | |
| import re | |
| import os | |
| class ResumeInferenceEngine: | |
| """Inference engine for resume extraction and matching""" | |
| def __init__(self, model_path: str = "models/checkpoints/final"): | |
| """Load fine-tuned model and tokenizer""" | |
| print(f"Loading model from {model_path}...") | |
| # CPU-only environments (common on Windows laptops) can hit PEFT/accelerate | |
| # offload edge-cases when using device_map="auto". Prefer a simple CPU load. | |
| # RAM Optimization: Force half-precision (bfloat16 is best on CPU) | |
| # 16GB is tight for float32 loading, so we use bfloat16 to cut RAM in half. | |
| dtype = torch.bfloat16 if torch.cuda.is_available() or hasattr(torch, 'bfloat16') else torch.float32 | |
| device_map = "auto" if torch.cuda.is_available() else None | |
| low_cpu_mem_usage = True # Always use low_cpu_mem_usage to prevent spikes | |
| adapter_config_path = os.path.join(model_path, "adapter_config.json") | |
| is_adapter = os.path.exists(adapter_config_path) | |
| # Prefer tokenizer saved alongside adapter/model (the notebook saves tokenizer to final/) | |
| self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) | |
| if self.tokenizer.pad_token is None and self.tokenizer.eos_token is not None: | |
| self.tokenizer.pad_token = self.tokenizer.eos_token | |
| if is_adapter: | |
| from peft import PeftModel | |
| with open(adapter_config_path, "r", encoding="utf-8") as f: | |
| adapter_cfg = json.load(f) | |
| base_model_name = adapter_cfg.get("base_model_name_or_path") or adapter_cfg.get("base_model") or "microsoft/phi-2" | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| base_model_name, | |
| dtype=dtype, | |
| device_map=device_map, | |
| low_cpu_mem_usage=low_cpu_mem_usage, | |
| trust_remote_code=True, | |
| ) | |
| # DEBUG: Inspect the safetensors file before loading | |
| adapter_file = os.path.join(model_path, "adapter_model.safetensors") | |
| if os.path.exists(adapter_file): | |
| size = os.path.getsize(adapter_file) | |
| print(f"DEBUG: adapter_model.safetensors size: {size} bytes") | |
| with open(adapter_file, "rb") as f: | |
| header = f.read(100) | |
| print(f"DEBUG: adapter_model.safetensors header: {header}") | |
| else: | |
| print(f"DEBUG: adapter_model.safetensors NOT FOUND at {adapter_file}") | |
| self.model = PeftModel.from_pretrained(base_model, model_path) | |
| else: | |
| self.model = AutoModelForCausalLM.from_pretrained( | |
| model_path, | |
| dtype=dtype, | |
| device_map=device_map, | |
| low_cpu_mem_usage=low_cpu_mem_usage, | |
| trust_remote_code=True, | |
| ) | |
| self.model.eval() | |
| def extract_resume(self, resume_text: str) -> Dict: | |
| """Extract structured information from resume""" | |
| prompt = f"""Instruction: Extract structured information from the resume. Return valid JSON with fields: name, email, phone, skills, experience, education, certifications. | |
| Input: | |
| {resume_text} | |
| Output:""" | |
| # Speed Optimization: Drastic reduction to 200 to GUARANTEE finish before Heroku 30s timeout | |
| output = self._generate(prompt, max_length=200) | |
| return self._parse_json_output(output) | |
| def match_resume_to_job(self, resume_text: str, job_description: str) -> Dict: | |
| """Match resume to job description""" | |
| prompt = f"""Instruction: Compare the resume against the job description and provide a match score (0-100) with reasoning. Return valid JSON with fields: match_score, matching_skills, missing_skills, recommendation. | |
| Input: | |
| Resume: | |
| {resume_text} | |
| Job Description: | |
| {job_description} | |
| Output:""" | |
| # Use a lower temperature to improve format adherence. | |
| output = self._generate(prompt, max_length=256, temperature=0.3) | |
| return self._parse_json_output(output) | |
| def _generate(self, prompt: str, max_length: int = 512, temperature: float = 0.7) -> str: | |
| """Generate text from prompt""" | |
| # When using device_map="auto", pick the device of the first parameter. | |
| input_device = next(iter(self.model.parameters())).device | |
| tokenized = self.tokenizer(prompt, return_tensors="pt") | |
| tokenized = {k: v.to(input_device) for k, v in tokenized.items()} | |
| input_len = tokenized["input_ids"].shape[1] | |
| # Interpret max_length as a generation budget (max_new_tokens) for backward compat. | |
| # Cap at 150 to be safe for CPU inference latency. | |
| max_new_tokens = max(32, min(150, int(max_length))) | |
| with torch.inference_mode(): | |
| sequences = self.model.generate( | |
| **tokenized, | |
| max_new_tokens=max_new_tokens, | |
| min_new_tokens=8, | |
| temperature=temperature, | |
| top_p=0.95, | |
| num_beams=1, | |
| do_sample=True, | |
| pad_token_id=self.tokenizer.pad_token_id, | |
| eos_token_id=self.tokenizer.eos_token_id, | |
| ) | |
| # Decode ONLY the generated continuation; avoids returning an empty string when the | |
| # prompt already contains the delimiter text (e.g., "Output:"). | |
| gen_tokens = sequences[0][input_len:] | |
| gen_text = self.tokenizer.decode(gen_tokens, skip_special_tokens=True).strip() | |
| if gen_text: | |
| return gen_text | |
| # Fallback: full decode so callers can see what happened. | |
| full_text = self.tokenizer.decode(sequences[0], skip_special_tokens=True) | |
| return full_text.strip() | |
| def _parse_json_output(self, output: str) -> Dict: | |
| """Extract JSON from model output""" | |
| def _split_skills(v: Union[str, List[str], None]) -> List[str]: | |
| if v is None: | |
| return [] | |
| if isinstance(v, list): | |
| return [str(s).strip() for s in v if str(s).strip()] | |
| v = str(v).strip() | |
| if not v or v.lower() in {"none", "n/a", "na"}: | |
| return [] | |
| return [s.strip() for s in v.split(",") if s.strip()] | |
| def _normalize(d: Dict) -> Dict: | |
| if not isinstance(d, dict): | |
| return {"raw_output": output} | |
| # Normalize match_score to 0-100 | |
| if "match_score" in d: | |
| try: | |
| score = d["match_score"] | |
| if isinstance(score, str): | |
| score = float(re.findall(r"[0-9]*\.?[0-9]+", score)[0]) | |
| else: | |
| score = float(score) | |
| if score <= 1.0: | |
| score *= 100.0 | |
| d["match_score"] = score | |
| except Exception: | |
| pass | |
| # Normalize skills fields to lists | |
| if "matching_skills" in d: | |
| d["matching_skills"] = _split_skills(d.get("matching_skills")) | |
| if "missing_skills" in d: | |
| d["missing_skills"] = _split_skills(d.get("missing_skills")) | |
| # Preserve raw output for debugging | |
| d.setdefault("raw_output", output) | |
| return d | |
| try: | |
| # Try to find JSON in the output | |
| json_match = re.search(r'\{.*\}', output, re.DOTALL) | |
| if json_match: | |
| json_str = json_match.group(0) | |
| return _normalize(json.loads(json_str)) | |
| except json.JSONDecodeError: | |
| pass | |
| # Fallback: parse simple key:value lines (common when the model doesn't emit JSON). | |
| # Example: | |
| # match_score: 0.85 | |
| # matching_skills: Python, TensorFlow | |
| if isinstance(output, str): | |
| kv = {} | |
| for raw_line in output.splitlines(): | |
| line = raw_line.strip() | |
| if not line or ":" not in line: | |
| continue | |
| key, value = line.split(":", 1) | |
| key = key.strip().strip('"').strip("'").lower() | |
| value = value.strip().strip('"').strip("'") | |
| if not key: | |
| continue | |
| kv[key] = value | |
| if kv: | |
| # Normalize known fields | |
| if "match_score" in kv: | |
| try: | |
| score = float(re.findall(r"[0-9]*\.?[0-9]+", kv["match_score"])[0]) | |
| if score <= 1.0: | |
| score *= 100.0 | |
| kv["match_score"] = score | |
| except Exception: | |
| pass | |
| if "matching_skills" in kv: | |
| kv["matching_skills"] = _split_skills(kv["matching_skills"]) | |
| if "missing_skills" in kv: | |
| kv["missing_skills"] = _split_skills(kv["missing_skills"]) | |
| # Keep a copy of the original raw output for debugging | |
| kv["raw_output"] = output | |
| return kv | |
| # Fallback: try to parse a match score from plain text. | |
| m = re.search(r"match\s*score\s*[:=]\s*([0-9]*\.?[0-9]+)", output or "", flags=re.IGNORECASE) | |
| if m: | |
| score = float(m.group(1)) | |
| if score <= 1.0: | |
| score *= 100.0 | |
| return {"match_score": score, "raw_output": output} | |
| # Return structured response if parsing fails | |
| return {"error": "Failed to parse output", "raw_output": output} | |
| def batch_extract(self, resumes: List[str]) -> List[Dict]: | |
| """Extract from multiple resumes""" | |
| results = [] | |
| for i, resume in enumerate(resumes): | |
| print(f"Processing resume {i+1}/{len(resumes)}...") | |
| results.append(self.extract_resume(resume)) | |
| return results | |
| def batch_match(self, resume_pairs: List[tuple]) -> List[Dict]: | |
| """Match multiple resume-job pairs""" | |
| results = [] | |
| for i, (resume, job) in enumerate(resume_pairs): | |
| print(f"Processing pair {i+1}/{len(resume_pairs)}...") | |
| results.append(self.match_resume_to_job(resume, job)) | |
| return results | |
| # Flask API for serving predictions | |
| def create_api(model_path: str = "models/checkpoints/final"): | |
| """Create Flask API for inference""" | |
| from flask import Flask, request, jsonify | |
| from flask_cors import CORS | |
| app = Flask(__name__) | |
| CORS(app) # Enable CORS for all routes | |
| engine = ResumeInferenceEngine(model_path) | |
| def extract(): | |
| """Extract information from resume""" | |
| data = request.json | |
| resume = data.get("resume", "") | |
| if not resume: | |
| return jsonify({"error": "Resume text required"}), 400 | |
| result = engine.extract_resume(resume) | |
| return jsonify(result) | |
| def match(): | |
| """Match resume to job description""" | |
| data = request.json | |
| resume = data.get("resume", "") | |
| job = data.get("job_description", "") | |
| if not resume or not job: | |
| return jsonify({"error": "Resume and job description required"}), 400 | |
| result = engine.match_resume_to_job(resume, job) | |
| return jsonify(result) | |
| def health(): | |
| return jsonify({"status": "healthy"}) | |
| return app | |
| def main(): | |
| import argparse | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--mode", default="cli", help="Mode: cli, api, or batch") | |
| parser.add_argument("--model-path", default="models/checkpoints/final", help="Path to model") | |
| parser.add_argument("--task", choices=["extract", "match"], default="extract") | |
| parser.add_argument("--resume-file", help="Path to resume file") | |
| parser.add_argument("--job-file", help="Path to job description file") | |
| parser.add_argument("--port", type=int, default=5000, help="API port") | |
| args = parser.parse_args() | |
| engine = ResumeInferenceEngine(args.model_path) | |
| if args.mode == "cli": | |
| if args.task == "extract": | |
| with open(args.resume_file) as f: | |
| resume = f.read() | |
| result = engine.extract_resume(resume) | |
| print(json.dumps(result, indent=2)) | |
| elif args.task == "match": | |
| with open(args.resume_file) as f: | |
| resume = f.read() | |
| with open(args.job_file) as f: | |
| job = f.read() | |
| result = engine.match_resume_to_job(resume, job) | |
| print(json.dumps(result, indent=2)) | |
| elif args.mode == "api": | |
| app = create_api(args.model_path) | |
| print(f"Starting API on port {args.port}...") | |
| app.run(host="0.0.0.0", port=args.port, debug=False) | |
| if __name__ == "__main__": | |
| main() | |