applai-confit / confit_service.py

Upload folder using huggingface_hub

67e322a verified 2 months ago

8.33 kB

	"""ConFiT service — bidirectional cross-modal embedding alignment.

	Architecture: BidirectionalAlignmentModel (from confit hyperparameter tuning notebook).
	- Shared transforms: transform_resume_to_jd_shared / transform_jd_to_resume_shared
	- Per-feature transforms: transform_resume_to_jd[feat] / transform_jd_to_resume[feat]
	- Per-feature LayerNorm, learned blend weights, and feature scale parameters
	- Features: full, education, experience, leadership (768-dim each)

	Forward:
	model(resume_features_dict, jd_features_dict)
	→ (resume_to_jd_dict, jd_to_resume_dict)

	Directions are independent — only the relevant side needs to be populated:
	direction='to_jd' → pass resume spans as resume_features, get resume_to_jd
	direction='to_resume' → pass jd spans as jd_features, get jd_to_resume

	Weights: backend/ai_models/confit_best_model_weights.pt
	Supported formats:
	- Raw state dict
	- Checkpoint dict with key 'model_state_dict'
	"""

	from __future__ import annotations

	from pathlib import Path

	import torch
	import torch.nn as nn
	import torch.nn.functional as F

	_WEIGHTS_PATH = Path(__file__).parent.parent / "confit" / "confit_best_model_weights.pt"
	_DIM = 768
	_FEATURE_NAMES = ["full", "education", "experience", "leadership"]

	_model: BidirectionalAlignmentModel \| None = None
	_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


	# ---------------------------------------------------------------------------
	# Model definition — exact copy from confit hyperparameter tuning notebook
	# ---------------------------------------------------------------------------

	class BidirectionalAlignmentModel(nn.Module):
	"""Bidirectional alignment with shared + per-feature transformations.

	Each feature gets:
	- A shared transform (resume→JD and JD→resume)
	- A feature-specific transform (resume→JD and JD→resume)
	- A learned blend weight that mixes shared vs feature-specific output
	- A LayerNorm applied before transformation
	- A learned feature scale applied after LayerNorm
	"""

	def __init__(self, dim: int, feature_names: list[str]) -> None:
	super().__init__()
	self.dim = dim
	self.feature_names = feature_names

	# Shared transformations
	self.transform_resume_to_jd_shared = nn.Linear(dim, dim, bias=False)
	self.transform_jd_to_resume_shared = nn.Linear(dim, dim, bias=False)
	nn.init.orthogonal_(self.transform_resume_to_jd_shared.weight)
	nn.init.orthogonal_(self.transform_jd_to_resume_shared.weight)

	# Per-feature transformations
	self.transform_resume_to_jd = nn.ModuleDict({
	feat: nn.Linear(dim, dim, bias=False) for feat in feature_names
	})
	self.transform_jd_to_resume = nn.ModuleDict({
	feat: nn.Linear(dim, dim, bias=False) for feat in feature_names
	})

	# Layer normalisation per feature
	self.feature_norm = nn.ModuleDict({
	feat: nn.LayerNorm(dim) for feat in feature_names
	})

	# Learned blend weights (sigmoid → [0,1])
	self.feature_blend_weights = nn.ParameterDict({
	feat: nn.Parameter(torch.tensor(0.5)) for feat in feature_names
	})

	# Feature scaling (softplus → positive)
	self.feature_scale = nn.ParameterDict({
	feat: nn.Parameter(torch.ones(1)) for feat in feature_names
	})

	for feat in feature_names:
	nn.init.orthogonal_(self.transform_resume_to_jd[feat].weight)
	nn.init.orthogonal_(self.transform_jd_to_resume[feat].weight)

	def forward(
	self,
	resume_features: dict[str, torch.Tensor],
	jd_features: dict[str, torch.Tensor],
	) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
	resume_to_jd: dict[str, torch.Tensor] = {}
	jd_to_resume: dict[str, torch.Tensor] = {}

	for feat in self.feature_names:
	if feat in resume_features:
	normed = self.feature_norm[feat](resume_features[feat])
	scaled = normed * (self.feature_scale[feat].abs() + 0.1)
	blend = torch.sigmoid(self.feature_blend_weights[feat])
	shared = self.transform_resume_to_jd_shared(scaled)
	specific = self.transform_resume_to_jd[feat](scaled)
	resume_to_jd[feat] = (1 - blend) * shared + blend * specific

	if feat in jd_features:
	normed = self.feature_norm[feat](jd_features[feat])
	scaled = normed * (self.feature_scale[feat].abs() + 0.1)
	blend = torch.sigmoid(self.feature_blend_weights[feat])
	shared = self.transform_jd_to_resume_shared(scaled)
	specific = self.transform_jd_to_resume[feat](scaled)
	jd_to_resume[feat] = (1 - blend) * shared + blend * specific

	return resume_to_jd, jd_to_resume


	# ---------------------------------------------------------------------------
	# Lazy model loader
	# ---------------------------------------------------------------------------

	def _get_model() -> BidirectionalAlignmentModel:
	global _model
	if _model is None:
	if not _WEIGHTS_PATH.exists():
	raise FileNotFoundError(
	f"ConFiT weights not found: {_WEIGHTS_PATH}\n"
	"Upload the file to that path (or to HuggingFace Hub and load via hf_hub_download)."
	)
	_model = BidirectionalAlignmentModel(_DIM, _FEATURE_NAMES).to(_device)
	checkpoint = torch.load(str(_WEIGHTS_PATH), map_location=_device, weights_only=True)
	# Handle both raw state dict and checkpoint dict
	state = checkpoint.get("model_state_dict", checkpoint) if isinstance(checkpoint, dict) else checkpoint
	_model.load_state_dict(state)
	_model.eval()
	return _model


	# ---------------------------------------------------------------------------
	# Internal helpers
	# ---------------------------------------------------------------------------

	def _to_tensor_dict(spans: dict[str, list[float]]) -> dict[str, torch.Tensor]:
	"""Convert float-list span dict to batched tensor dict (batch_size=1)."""
	return {
	key: torch.tensor(emb, dtype=torch.float32).unsqueeze(0).to(_device)
	for key, emb in spans.items()
	}


	def _from_tensor_dict(tensor_dict: dict[str, torch.Tensor]) -> dict[str, list[float]]:
	"""Convert batched tensor dict back to float-list dict."""
	return {key: t[0].cpu().tolist() for key, t in tensor_dict.items()}


	# ---------------------------------------------------------------------------
	# Public API
	# ---------------------------------------------------------------------------

	@torch.no_grad()
	def align_embedding(embedding: list[float], feature: str, direction: str) -> list[float]:
	"""Align a single 768-dim embedding for a named feature.

	Args:
	embedding: 768-dim float list from SBERT or LayoutLM.
	feature: One of 'full', 'education', 'experience', 'leadership'.
	direction: 'to_jd' — resume embedding → JD space
	'to_resume' — JD embedding → resume space

	Returns:
	Aligned 768-dim float list.
	"""
	result = align_spans({feature: embedding}, direction)
	return result[feature]


	def align_spans(spans: dict[str, list[float]], direction: str) -> dict[str, list[float]]:
	"""Align all embedding spans using BidirectionalAlignmentModel.

	Args:
	spans: Dict of feature_name → 768-dim float list.
	Keys must be a subset of ['full', 'education', 'experience', 'leadership'].
	direction: 'to_jd' — resume embeddings → JD space
	'to_resume' — JD embeddings → resume space

	Returns:
	Same structure with aligned embeddings.
	"""
	model = _get_model()
	features = _to_tensor_dict(spans)

	with torch.no_grad():
	if direction == "to_jd":
	# Resume → JD: only populate resume_features side
	resume_to_jd, _ = model(features, {})
	return _from_tensor_dict(resume_to_jd)
	else:
	# JD → Resume: only populate jd_features side
	_, jd_to_resume = model({}, features)
	return _from_tensor_dict(jd_to_resume)