Spaces:
Sleeping
Sleeping
| import json | |
| import logging | |
| from datetime import datetime | |
| from collections import defaultdict | |
| logger = logging.getLogger(__name__) | |
| class ContextualScoringEngine: | |
| """ | |
| Moteur de scoring qui maintient la compatibilité avec l'ancienne interface | |
| tout en offrant les nouvelles fonctionnalités. | |
| """ | |
| ALPHA = 0.5 | |
| BETA = 0.3 | |
| GAMMA = 0.2 | |
| CONTEXT_VALUES = { | |
| "formations": 0.3, | |
| "projets": 0.6, | |
| "experiences_professionnelles": 0.8, | |
| } | |
| def __init__(self, parsed_cv_data: dict): | |
| self.cv_data = parsed_cv_data.get("candidat", {}) | |
| if not self.cv_data: | |
| raise ValueError("Données du candidat non trouvées dans le CV parsé.") | |
| def _normalize_score(self, value: float) -> float: | |
| """Normalise une valeur sur une échelle de 0 à 1.""" | |
| return 1 - (1 / (1 + float(value))) | |
| def _parse_date(self, date_str: str) -> datetime | None: | |
| """Parse une date de manière robuste.""" | |
| if not date_str or not isinstance(date_str, str): | |
| return None | |
| date_str_lower = date_str.lower() | |
| if date_str_lower in ["aujourd'hui", "maintenant", "en cours", "current"]: | |
| return datetime.now() | |
| for fmt in ("%m/%Y", "%Y"): | |
| try: | |
| return datetime.strptime(date_str, fmt) | |
| except ValueError: | |
| continue | |
| return None | |
| def _calculate_duration_in_years(self, start_date_str: str, end_date_str: str) -> float: | |
| """Calcule la durée d'une expérience en années.""" | |
| start_date = self._parse_date(start_date_str) | |
| end_date = self._parse_date(end_date_str) | |
| if start_date and end_date: | |
| if end_date < start_date: | |
| return 0.0 | |
| return (end_date - start_date).days / 365.25 | |
| return 0.0 | |
| def calculate_scores(self) -> dict: | |
| """ | |
| Calcule les scores pour toutes les compétences. | |
| Maintient la compatibilité avec l'ancienne interface. | |
| """ | |
| skills_data = self.cv_data.get("compétences", {}) | |
| skills_list = [] | |
| if isinstance(skills_data, dict): | |
| skills_list.extend(skills_data.get("hard_skills", [])) | |
| skills_list.extend(skills_data.get("soft_skills", [])) | |
| elif isinstance(skills_data, list): | |
| skills_list = [item.get("nom") for item in skills_data if item.get("nom")] | |
| if not skills_list: | |
| logger.warning("Aucune compétence à analyser dans le CV.") | |
| return {"analyse_competences": []} | |
| skill_metrics = { | |
| skill.lower(): { | |
| "original_name": skill, | |
| "contexts": set(), | |
| "frequency": 0, | |
| "max_duration": 0.0 | |
| } | |
| for skill in skills_list if skill | |
| } | |
| experiences_key = "expériences" if "expériences" in self.cv_data else "experiences_professionnelles" | |
| for exp in self.cv_data.get(experiences_key, []): | |
| exp_text = json.dumps(exp, ensure_ascii=False).lower() | |
| duration = self._calculate_duration_in_years( | |
| exp.get("date_debut", exp.get("start_date", "")), | |
| exp.get("date_fin", exp.get("end_date", "")) | |
| ) | |
| for skill in skill_metrics: | |
| if skill in exp_text: | |
| skill_metrics[skill]["contexts"].add("experiences_professionnelles") | |
| skill_metrics[skill]["frequency"] += exp_text.count(skill) | |
| if duration > skill_metrics[skill]["max_duration"]: | |
| skill_metrics[skill]["max_duration"] = duration | |
| projects_data = self.cv_data.get("projets", {}) | |
| if isinstance(projects_data, dict): | |
| for project_type in ["professional", "personal"]: | |
| for project in projects_data.get(project_type, []): | |
| project_text = json.dumps(project, ensure_ascii=False).lower() | |
| for skill in skill_metrics: | |
| if skill in project_text: | |
| skill_metrics[skill]["contexts"].add("projets") | |
| skill_metrics[skill]["frequency"] += project_text.count(skill) | |
| for formation in self.cv_data.get("formations", []): | |
| formation_text = json.dumps(formation, ensure_ascii=False).lower() | |
| for skill in skill_metrics: | |
| if skill in formation_text: | |
| skill_metrics[skill]["contexts"].add("formations") | |
| skill_metrics[skill]["frequency"] += formation_text.count(skill) | |
| final_scores = [] | |
| for skill, metrics in skill_metrics.items(): | |
| if metrics["frequency"] == 0: | |
| continue | |
| context_score = max((self.CONTEXT_VALUES.get(c, 0) for c in metrics["contexts"]), default=0.1) | |
| if len(metrics["contexts"]) > 1: | |
| context_score = 1.0 | |
| normalized_frequency = self._normalize_score(metrics["frequency"]) | |
| normalized_depth = self._normalize_score(metrics["max_duration"]) | |
| final_score = (self.ALPHA * context_score) + \ | |
| (self.BETA * normalized_frequency) + \ | |
| (self.GAMMA * normalized_depth) | |
| final_scores.append({ | |
| "skill": metrics["original_name"], | |
| "score": round(final_score, 2), | |
| "details": { | |
| "context_score": round(context_score, 2), | |
| "contexts_found": list(metrics["contexts"]), | |
| "frequency": metrics["frequency"], | |
| "max_duration_years": round(metrics["max_duration"], 1) | |
| } | |
| }) | |
| final_scores.sort(key=lambda x: x["score"], reverse=True) | |
| logger.info(f"Scoring terminé pour {len(final_scores)} compétences.") | |
| return {"analyse_competences": final_scores} | |
| OptimizedContextualScoringEngine = ContextualScoringEngine |