import json import logging from datetime import datetime from collections import defaultdict logger = logging.getLogger(__name__) class ContextualScoringEngine: """ Moteur de scoring qui maintient la compatibilité avec l'ancienne interface tout en offrant les nouvelles fonctionnalités. """ ALPHA = 0.5 BETA = 0.3 GAMMA = 0.2 CONTEXT_VALUES = { "formations": 0.3, "projets": 0.6, "experiences_professionnelles": 0.8, } def __init__(self, parsed_cv_data: dict): self.cv_data = parsed_cv_data.get("candidat", {}) if not self.cv_data: raise ValueError("Données du candidat non trouvées dans le CV parsé.") def _normalize_score(self, value: float) -> float: """Normalise une valeur sur une échelle de 0 à 1.""" return 1 - (1 / (1 + float(value))) def _parse_date(self, date_str: str) -> datetime | None: """Parse une date de manière robuste.""" if not date_str or not isinstance(date_str, str): return None date_str_lower = date_str.lower() if date_str_lower in ["aujourd'hui", "maintenant", "en cours", "current"]: return datetime.now() for fmt in ("%m/%Y", "%Y"): try: return datetime.strptime(date_str, fmt) except ValueError: continue return None def _calculate_duration_in_years(self, start_date_str: str, end_date_str: str) -> float: """Calcule la durée d'une expérience en années.""" start_date = self._parse_date(start_date_str) end_date = self._parse_date(end_date_str) if start_date and end_date: if end_date < start_date: return 0.0 return (end_date - start_date).days / 365.25 return 0.0 def calculate_scores(self) -> dict: """ Calcule les scores pour toutes les compétences. Maintient la compatibilité avec l'ancienne interface. """ skills_data = self.cv_data.get("compétences", {}) skills_list = [] if isinstance(skills_data, dict): skills_list.extend(skills_data.get("hard_skills", [])) skills_list.extend(skills_data.get("soft_skills", [])) elif isinstance(skills_data, list): skills_list = [item.get("nom") for item in skills_data if item.get("nom")] if not skills_list: logger.warning("Aucune compétence à analyser dans le CV.") return {"analyse_competences": []} skill_metrics = { skill.lower(): { "original_name": skill, "contexts": set(), "frequency": 0, "max_duration": 0.0 } for skill in skills_list if skill } experiences_key = "expériences" if "expériences" in self.cv_data else "experiences_professionnelles" for exp in self.cv_data.get(experiences_key, []): exp_text = json.dumps(exp, ensure_ascii=False).lower() duration = self._calculate_duration_in_years( exp.get("date_debut", exp.get("start_date", "")), exp.get("date_fin", exp.get("end_date", "")) ) for skill in skill_metrics: if skill in exp_text: skill_metrics[skill]["contexts"].add("experiences_professionnelles") skill_metrics[skill]["frequency"] += exp_text.count(skill) if duration > skill_metrics[skill]["max_duration"]: skill_metrics[skill]["max_duration"] = duration projects_data = self.cv_data.get("projets", {}) if isinstance(projects_data, dict): for project_type in ["professional", "personal"]: for project in projects_data.get(project_type, []): project_text = json.dumps(project, ensure_ascii=False).lower() for skill in skill_metrics: if skill in project_text: skill_metrics[skill]["contexts"].add("projets") skill_metrics[skill]["frequency"] += project_text.count(skill) for formation in self.cv_data.get("formations", []): formation_text = json.dumps(formation, ensure_ascii=False).lower() for skill in skill_metrics: if skill in formation_text: skill_metrics[skill]["contexts"].add("formations") skill_metrics[skill]["frequency"] += formation_text.count(skill) final_scores = [] for skill, metrics in skill_metrics.items(): if metrics["frequency"] == 0: continue context_score = max((self.CONTEXT_VALUES.get(c, 0) for c in metrics["contexts"]), default=0.1) if len(metrics["contexts"]) > 1: context_score = 1.0 normalized_frequency = self._normalize_score(metrics["frequency"]) normalized_depth = self._normalize_score(metrics["max_duration"]) final_score = (self.ALPHA * context_score) + \ (self.BETA * normalized_frequency) + \ (self.GAMMA * normalized_depth) final_scores.append({ "skill": metrics["original_name"], "score": round(final_score, 2), "details": { "context_score": round(context_score, 2), "contexts_found": list(metrics["contexts"]), "frequency": metrics["frequency"], "max_duration_years": round(metrics["max_duration"], 1) } }) final_scores.sort(key=lambda x: x["score"], reverse=True) logger.info(f"Scoring terminé pour {len(final_scores)} compétences.") return {"analyse_competences": final_scores} OptimizedContextualScoringEngine = ContextualScoringEngine