| import joblib
|
| import sys
|
| import json
|
| import numpy as np
|
| from sentence_transformers import SentenceTransformer
|
| from utils import extract_deep_features, parse_sections
|
|
|
| class YoePredictor:
|
| def __init__(self):
|
| print("Loading models into memory...")
|
| try:
|
| self.scaler = joblib.load('meta_scaler.pkl')
|
| self.clf = joblib.load('level_classifier.pkl')
|
| self.le = joblib.load('label_encoder.pkl')
|
| self.reg = joblib.load('yoe_regressor.pkl')
|
| self.sbert = SentenceTransformer('all-MiniLM-L6-v2')
|
| self.low_conf_threshold = 0.55
|
| try:
|
| with open('confidence_config.json', 'r') as f:
|
| confidence_cfg = json.load(f)
|
| self.low_conf_threshold = float(confidence_cfg.get('low_confidence_threshold', self.low_conf_threshold))
|
| except Exception:
|
|
|
| pass
|
| except Exception as e:
|
| print(f"Error loading models. Did you run train_improved.py? Error: {e}")
|
| sys.exit(1)
|
|
|
| def predict(self, title, description, silent=False):
|
| """
|
| Predict YOE and review flag for a job description.
|
|
|
| Returns dict:
|
| - level: predicted experience level label
|
| - yoe: predicted minimum years of experience
|
| - extracted_yoe: heuristic extracted minimum years, or None if absent
|
| - needs_manual_review: True when extraction is missing or model confidence is low
|
| - reason: short explanation for routing decision
|
| - confidence: level-classifier confidence (max probability)
|
| """
|
| raw_text = (title + " " + description).lower()
|
|
|
| deep_feats = extract_deep_features(raw_text)
|
|
|
|
|
| sections = parse_sections(raw_text)
|
| sbert_context = sections['requirements'] if len(sections['requirements']) > 100 else raw_text[:1536]
|
|
|
| embedding = self.sbert.encode([sbert_context])
|
|
|
| meta_cols = [
|
| 'min_yoe_found',
|
| 'max_yoe_found',
|
| 'regex_count',
|
| 'has_explicit_yoe',
|
| 'extraction_quality',
|
| 'in_req_section',
|
| 'has_phd',
|
| 'has_masters',
|
| 'is_manager'
|
| ]
|
| meta_vals = [deep_feats[k] for k in meta_cols]
|
| meta_scaled = self.scaler.transform([meta_vals])
|
|
|
| X = np.hstack([embedding, meta_scaled])
|
|
|
| level_idx = self.clf.predict(X)[0]
|
| level_probs = self.clf.predict_proba(X)[0]
|
| level_confidence = float(np.max(level_probs))
|
| level = self.le.classes_[level_idx]
|
|
|
| yoe_pred = self.reg.predict(X)[0]
|
| reason_tags = []
|
|
|
|
|
| if deep_feats['min_yoe_found'] > yoe_pred:
|
| yoe_pred = float(deep_feats['min_yoe_found'])
|
| reason_tags.append('heuristic_override_to_extracted_min')
|
|
|
| yoe_pred = max(0.0, round(yoe_pred, 1))
|
|
|
| if 'intern' in title.lower():
|
| level = 'entry'
|
| yoe_pred = 0.0
|
| reason_tags.append('intern_title_override')
|
|
|
| extracted_yoe = int(deep_feats['min_yoe_found']) if deep_feats['min_yoe_found'] >= 0 else None
|
| extraction_missing = extracted_yoe is None
|
| low_confidence = level_confidence < self.low_conf_threshold
|
| needs_manual_review = extraction_missing or low_confidence
|
| if extraction_missing:
|
| reason_tags.append('missing_explicit_yoe_extraction')
|
| if low_confidence:
|
| reason_tags.append('low_model_confidence')
|
| if not reason_tags:
|
| reason_tags.append('model_prediction_confident')
|
|
|
|
|
| margin = max(1.0, round(yoe_pred * 0.2, 1))
|
|
|
| result = {
|
| 'level': level,
|
| 'yoe': yoe_pred,
|
| 'extracted_yoe': extracted_yoe,
|
| 'needs_manual_review': needs_manual_review,
|
| 'reason': ", ".join(reason_tags),
|
| 'confidence': round(level_confidence, 4),
|
| 'confidence_threshold': round(self.low_conf_threshold, 4)
|
| }
|
|
|
| if not silent:
|
| print(f"\n--- PREDICTION REPORT ---")
|
| print(f"Title: {title}")
|
| print(f"Experience Level: {result['level'].upper()}")
|
| print(f"Estimated YOE: {result['yoe']}")
|
| print(f"Expected Range: {max(0, yoe_pred - margin)} - {yoe_pred + margin} years")
|
| print(f"Extracted YOE: {result['extracted_yoe']}")
|
| print(f"Confidence: {result['confidence']} (threshold={result['confidence_threshold']})")
|
| print(f"Needs Manual Review: {result['needs_manual_review']}")
|
| print(f"Reason: {result['reason']}")
|
|
|
| return result
|
|
|
|
|
| if __name__ == "__main__":
|
| predictor = YoePredictor()
|
|
|
| if len(sys.argv) < 3:
|
| test_title = "Staff Software Engineer"
|
| test_desc = "Looking for a technical leader with at least twelve years of industry experience. Founded 5 years ago."
|
| predictor.predict(test_title, test_desc)
|
| else:
|
| predictor.predict(sys.argv[1], sys.argv[2]) |