applai-confit / confit_service.py
Smutypi3's picture
Upload folder using huggingface_hub
67e322a verified
"""ConFiT service — bidirectional cross-modal embedding alignment.
Architecture: BidirectionalAlignmentModel (from confit hyperparameter tuning notebook).
- Shared transforms: transform_resume_to_jd_shared / transform_jd_to_resume_shared
- Per-feature transforms: transform_resume_to_jd[feat] / transform_jd_to_resume[feat]
- Per-feature LayerNorm, learned blend weights, and feature scale parameters
- Features: full, education, experience, leadership (768-dim each)
Forward:
model(resume_features_dict, jd_features_dict)
→ (resume_to_jd_dict, jd_to_resume_dict)
Directions are independent — only the relevant side needs to be populated:
direction='to_jd' → pass resume spans as resume_features, get resume_to_jd
direction='to_resume' → pass jd spans as jd_features, get jd_to_resume
Weights: backend/ai_models/confit_best_model_weights.pt
Supported formats:
- Raw state dict
- Checkpoint dict with key 'model_state_dict'
"""
from __future__ import annotations
from pathlib import Path
import torch
import torch.nn as nn
import torch.nn.functional as F
_WEIGHTS_PATH = Path(__file__).parent.parent / "confit" / "confit_best_model_weights.pt"
_DIM = 768
_FEATURE_NAMES = ["full", "education", "experience", "leadership"]
_model: BidirectionalAlignmentModel | None = None
_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# ---------------------------------------------------------------------------
# Model definition — exact copy from confit hyperparameter tuning notebook
# ---------------------------------------------------------------------------
class BidirectionalAlignmentModel(nn.Module):
"""Bidirectional alignment with shared + per-feature transformations.
Each feature gets:
- A shared transform (resume→JD and JD→resume)
- A feature-specific transform (resume→JD and JD→resume)
- A learned blend weight that mixes shared vs feature-specific output
- A LayerNorm applied before transformation
- A learned feature scale applied after LayerNorm
"""
def __init__(self, dim: int, feature_names: list[str]) -> None:
super().__init__()
self.dim = dim
self.feature_names = feature_names
# Shared transformations
self.transform_resume_to_jd_shared = nn.Linear(dim, dim, bias=False)
self.transform_jd_to_resume_shared = nn.Linear(dim, dim, bias=False)
nn.init.orthogonal_(self.transform_resume_to_jd_shared.weight)
nn.init.orthogonal_(self.transform_jd_to_resume_shared.weight)
# Per-feature transformations
self.transform_resume_to_jd = nn.ModuleDict({
feat: nn.Linear(dim, dim, bias=False) for feat in feature_names
})
self.transform_jd_to_resume = nn.ModuleDict({
feat: nn.Linear(dim, dim, bias=False) for feat in feature_names
})
# Layer normalisation per feature
self.feature_norm = nn.ModuleDict({
feat: nn.LayerNorm(dim) for feat in feature_names
})
# Learned blend weights (sigmoid → [0,1])
self.feature_blend_weights = nn.ParameterDict({
feat: nn.Parameter(torch.tensor(0.5)) for feat in feature_names
})
# Feature scaling (softplus → positive)
self.feature_scale = nn.ParameterDict({
feat: nn.Parameter(torch.ones(1)) for feat in feature_names
})
for feat in feature_names:
nn.init.orthogonal_(self.transform_resume_to_jd[feat].weight)
nn.init.orthogonal_(self.transform_jd_to_resume[feat].weight)
def forward(
self,
resume_features: dict[str, torch.Tensor],
jd_features: dict[str, torch.Tensor],
) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
resume_to_jd: dict[str, torch.Tensor] = {}
jd_to_resume: dict[str, torch.Tensor] = {}
for feat in self.feature_names:
if feat in resume_features:
normed = self.feature_norm[feat](resume_features[feat])
scaled = normed * (self.feature_scale[feat].abs() + 0.1)
blend = torch.sigmoid(self.feature_blend_weights[feat])
shared = self.transform_resume_to_jd_shared(scaled)
specific = self.transform_resume_to_jd[feat](scaled)
resume_to_jd[feat] = (1 - blend) * shared + blend * specific
if feat in jd_features:
normed = self.feature_norm[feat](jd_features[feat])
scaled = normed * (self.feature_scale[feat].abs() + 0.1)
blend = torch.sigmoid(self.feature_blend_weights[feat])
shared = self.transform_jd_to_resume_shared(scaled)
specific = self.transform_jd_to_resume[feat](scaled)
jd_to_resume[feat] = (1 - blend) * shared + blend * specific
return resume_to_jd, jd_to_resume
# ---------------------------------------------------------------------------
# Lazy model loader
# ---------------------------------------------------------------------------
def _get_model() -> BidirectionalAlignmentModel:
global _model
if _model is None:
if not _WEIGHTS_PATH.exists():
raise FileNotFoundError(
f"ConFiT weights not found: {_WEIGHTS_PATH}\n"
"Upload the file to that path (or to HuggingFace Hub and load via hf_hub_download)."
)
_model = BidirectionalAlignmentModel(_DIM, _FEATURE_NAMES).to(_device)
checkpoint = torch.load(str(_WEIGHTS_PATH), map_location=_device, weights_only=True)
# Handle both raw state dict and checkpoint dict
state = checkpoint.get("model_state_dict", checkpoint) if isinstance(checkpoint, dict) else checkpoint
_model.load_state_dict(state)
_model.eval()
return _model
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
def _to_tensor_dict(spans: dict[str, list[float]]) -> dict[str, torch.Tensor]:
"""Convert float-list span dict to batched tensor dict (batch_size=1)."""
return {
key: torch.tensor(emb, dtype=torch.float32).unsqueeze(0).to(_device)
for key, emb in spans.items()
}
def _from_tensor_dict(tensor_dict: dict[str, torch.Tensor]) -> dict[str, list[float]]:
"""Convert batched tensor dict back to float-list dict."""
return {key: t[0].cpu().tolist() for key, t in tensor_dict.items()}
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
@torch.no_grad()
def align_embedding(embedding: list[float], feature: str, direction: str) -> list[float]:
"""Align a single 768-dim embedding for a named feature.
Args:
embedding: 768-dim float list from SBERT or LayoutLM.
feature: One of 'full', 'education', 'experience', 'leadership'.
direction: 'to_jd' — resume embedding → JD space
'to_resume' — JD embedding → resume space
Returns:
Aligned 768-dim float list.
"""
result = align_spans({feature: embedding}, direction)
return result[feature]
def align_spans(spans: dict[str, list[float]], direction: str) -> dict[str, list[float]]:
"""Align all embedding spans using BidirectionalAlignmentModel.
Args:
spans: Dict of feature_name → 768-dim float list.
Keys must be a subset of ['full', 'education', 'experience', 'leadership'].
direction: 'to_jd' — resume embeddings → JD space
'to_resume' — JD embeddings → resume space
Returns:
Same structure with aligned embeddings.
"""
model = _get_model()
features = _to_tensor_dict(spans)
with torch.no_grad():
if direction == "to_jd":
# Resume → JD: only populate resume_features side
resume_to_jd, _ = model(features, {})
return _from_tensor_dict(resume_to_jd)
else:
# JD → Resume: only populate jd_features side
_, jd_to_resume = model({}, features)
return _from_tensor_dict(jd_to_resume)