Spaces:
Sleeping
Sleeping
File size: 6,101 Bytes
a8ee0db 7b0113e a8ee0db 7b0113e a8ee0db 7b0113e 8f14c45 7b0113e 8f14c45 7b0113e 8f14c45 7b0113e 8f14c45 7b0113e 8f14c45 7b0113e a8ee0db 7b0113e 8f14c45 a8ee0db 7b0113e 8f14c45 7b0113e 8f14c45 7b0113e a8ee0db 8f14c45 a8ee0db 7b0113e a8ee0db 7b0113e 8f14c45 7b0113e a8ee0db 7b0113e 8f14c45 7b0113e 8f14c45 7b0113e 8f14c45 7b0113e 8f14c45 7b0113e 8f14c45 7b0113e 8f14c45 7b0113e 8f14c45 7b0113e 8f14c45 7b0113e 8f14c45 7b0113e 8f14c45 7b0113e 8f14c45 7b0113e 8f14c45 7b0113e 8f14c45 7b0113e 8f14c45 a8ee0db 8f14c45 7b0113e 8f14c45 a8ee0db 7b0113e 8f14c45 7b0113e 8f14c45 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import json
import logging
from datetime import datetime
from collections import defaultdict
logger = logging.getLogger(__name__)
class ContextualScoringEngine:
"""
Moteur de scoring qui maintient la compatibilité avec l'ancienne interface
tout en offrant les nouvelles fonctionnalités.
"""
ALPHA = 0.5
BETA = 0.3
GAMMA = 0.2
CONTEXT_VALUES = {
"formations": 0.3,
"projets": 0.6,
"experiences_professionnelles": 0.8,
}
def __init__(self, parsed_cv_data: dict):
self.cv_data = parsed_cv_data.get("candidat", {})
if not self.cv_data:
raise ValueError("Données du candidat non trouvées dans le CV parsé.")
def _normalize_score(self, value: float) -> float:
"""Normalise une valeur sur une échelle de 0 à 1."""
return 1 - (1 / (1 + float(value)))
def _parse_date(self, date_str: str) -> datetime | None:
"""Parse une date de manière robuste."""
if not date_str or not isinstance(date_str, str):
return None
date_str_lower = date_str.lower()
if date_str_lower in ["aujourd'hui", "maintenant", "en cours", "current"]:
return datetime.now()
for fmt in ("%m/%Y", "%Y"):
try:
return datetime.strptime(date_str, fmt)
except ValueError:
continue
return None
def _calculate_duration_in_years(self, start_date_str: str, end_date_str: str) -> float:
"""Calcule la durée d'une expérience en années."""
start_date = self._parse_date(start_date_str)
end_date = self._parse_date(end_date_str)
if start_date and end_date:
if end_date < start_date:
return 0.0
return (end_date - start_date).days / 365.25
return 0.0
def calculate_scores(self) -> dict:
"""
Calcule les scores pour toutes les compétences.
Maintient la compatibilité avec l'ancienne interface.
"""
skills_data = self.cv_data.get("compétences", {})
skills_list = []
if isinstance(skills_data, dict):
skills_list.extend(skills_data.get("hard_skills", []))
skills_list.extend(skills_data.get("soft_skills", []))
elif isinstance(skills_data, list):
skills_list = [item.get("nom") for item in skills_data if item.get("nom")]
if not skills_list:
logger.warning("Aucune compétence à analyser dans le CV.")
return {"analyse_competences": []}
skill_metrics = {
skill.lower(): {
"original_name": skill,
"contexts": set(),
"frequency": 0,
"max_duration": 0.0
}
for skill in skills_list if skill
}
experiences_key = "expériences" if "expériences" in self.cv_data else "experiences_professionnelles"
for exp in self.cv_data.get(experiences_key, []):
exp_text = json.dumps(exp, ensure_ascii=False).lower()
duration = self._calculate_duration_in_years(
exp.get("date_debut", exp.get("start_date", "")),
exp.get("date_fin", exp.get("end_date", ""))
)
for skill in skill_metrics:
if skill in exp_text:
skill_metrics[skill]["contexts"].add("experiences_professionnelles")
skill_metrics[skill]["frequency"] += exp_text.count(skill)
if duration > skill_metrics[skill]["max_duration"]:
skill_metrics[skill]["max_duration"] = duration
projects_data = self.cv_data.get("projets", {})
if isinstance(projects_data, dict):
for project_type in ["professional", "personal"]:
for project in projects_data.get(project_type, []):
project_text = json.dumps(project, ensure_ascii=False).lower()
for skill in skill_metrics:
if skill in project_text:
skill_metrics[skill]["contexts"].add("projets")
skill_metrics[skill]["frequency"] += project_text.count(skill)
for formation in self.cv_data.get("formations", []):
formation_text = json.dumps(formation, ensure_ascii=False).lower()
for skill in skill_metrics:
if skill in formation_text:
skill_metrics[skill]["contexts"].add("formations")
skill_metrics[skill]["frequency"] += formation_text.count(skill)
final_scores = []
for skill, metrics in skill_metrics.items():
if metrics["frequency"] == 0:
continue
context_score = max((self.CONTEXT_VALUES.get(c, 0) for c in metrics["contexts"]), default=0.1)
if len(metrics["contexts"]) > 1:
context_score = 1.0
normalized_frequency = self._normalize_score(metrics["frequency"])
normalized_depth = self._normalize_score(metrics["max_duration"])
final_score = (self.ALPHA * context_score) + \
(self.BETA * normalized_frequency) + \
(self.GAMMA * normalized_depth)
final_scores.append({
"skill": metrics["original_name"],
"score": round(final_score, 2),
"details": {
"context_score": round(context_score, 2),
"contexts_found": list(metrics["contexts"]),
"frequency": metrics["frequency"],
"max_duration_years": round(metrics["max_duration"], 1)
}
})
final_scores.sort(key=lambda x: x["score"], reverse=True)
logger.info(f"Scoring terminé pour {len(final_scores)} compétences.")
return {"analyse_competences": final_scores}
OptimizedContextualScoringEngine = ContextualScoringEngine |