QuentinL52 commited on
Commit
c3a4252
·
verified ·
1 Parent(s): bc6588d

Delete src/scoring_engine.py

Browse files
Files changed (1) hide show
  1. src/scoring_engine.py +0 -149
src/scoring_engine.py DELETED
@@ -1,149 +0,0 @@
1
- import json
2
- import logging
3
- from datetime import datetime
4
- from collections import defaultdict
5
-
6
- logger = logging.getLogger(__name__)
7
-
8
- class ContextualScoringEngine:
9
- """
10
- Moteur de scoring qui maintient la compatibilité avec l'ancienne interface
11
- tout en offrant les nouvelles fonctionnalités.
12
- """
13
-
14
- ALPHA = 0.5
15
- BETA = 0.3
16
- GAMMA = 0.2
17
-
18
- CONTEXT_VALUES = {
19
- "formations": 0.3,
20
- "projets": 0.6,
21
- "experiences_professionnelles": 0.8,
22
- }
23
-
24
- def __init__(self, parsed_cv_data: dict):
25
- self.cv_data = parsed_cv_data.get("candidat", {})
26
- if not self.cv_data:
27
- raise ValueError("Données du candidat non trouvées dans le CV parsé.")
28
-
29
- def _normalize_score(self, value: float) -> float:
30
- """Normalise une valeur sur une échelle de 0 à 1."""
31
- return 1 - (1 / (1 + float(value)))
32
-
33
- def _parse_date(self, date_str: str) -> datetime | None:
34
- """Parse une date de manière robuste."""
35
- if not date_str or not isinstance(date_str, str):
36
- return None
37
-
38
- date_str_lower = date_str.lower()
39
- if date_str_lower in ["aujourd'hui", "maintenant", "en cours", "current"]:
40
- return datetime.now()
41
-
42
- for fmt in ("%m/%Y", "%Y"):
43
- try:
44
- return datetime.strptime(date_str, fmt)
45
- except ValueError:
46
- continue
47
- return None
48
-
49
- def _calculate_duration_in_years(self, start_date_str: str, end_date_str: str) -> float:
50
- """Calcule la durée d'une expérience en années."""
51
- start_date = self._parse_date(start_date_str)
52
- end_date = self._parse_date(end_date_str)
53
-
54
- if start_date and end_date:
55
- if end_date < start_date:
56
- return 0.0
57
- return (end_date - start_date).days / 365.25
58
- return 0.0
59
-
60
- def calculate_scores(self) -> dict:
61
- """
62
- Calcule les scores pour toutes les compétences.
63
- Maintient la compatibilité avec l'ancienne interface.
64
- """
65
- skills_data = self.cv_data.get("compétences", {})
66
- skills_list = []
67
-
68
- if isinstance(skills_data, dict):
69
- skills_list.extend(skills_data.get("hard_skills", []))
70
- skills_list.extend(skills_data.get("soft_skills", []))
71
- elif isinstance(skills_data, list):
72
- skills_list = [item.get("nom") for item in skills_data if item.get("nom")]
73
-
74
- if not skills_list:
75
- logger.warning("Aucune compétence à analyser dans le CV.")
76
- return {"analyse_competences": []}
77
-
78
- skill_metrics = {
79
- skill.lower(): {
80
- "original_name": skill,
81
- "contexts": set(),
82
- "frequency": 0,
83
- "max_duration": 0.0
84
- }
85
- for skill in skills_list if skill
86
- }
87
-
88
- experiences_key = "expériences" if "expériences" in self.cv_data else "experiences_professionnelles"
89
- for exp in self.cv_data.get(experiences_key, []):
90
- exp_text = json.dumps(exp, ensure_ascii=False).lower()
91
- duration = self._calculate_duration_in_years(
92
- exp.get("date_debut", exp.get("start_date", "")),
93
- exp.get("date_fin", exp.get("end_date", ""))
94
- )
95
-
96
- for skill in skill_metrics:
97
- if skill in exp_text:
98
- skill_metrics[skill]["contexts"].add("experiences_professionnelles")
99
- skill_metrics[skill]["frequency"] += exp_text.count(skill)
100
- if duration > skill_metrics[skill]["max_duration"]:
101
- skill_metrics[skill]["max_duration"] = duration
102
-
103
- projects_data = self.cv_data.get("projets", {})
104
- if isinstance(projects_data, dict):
105
- for project_type in ["professional", "personal"]:
106
- for project in projects_data.get(project_type, []):
107
- project_text = json.dumps(project, ensure_ascii=False).lower()
108
- for skill in skill_metrics:
109
- if skill in project_text:
110
- skill_metrics[skill]["contexts"].add("projets")
111
- skill_metrics[skill]["frequency"] += project_text.count(skill)
112
- for formation in self.cv_data.get("formations", []):
113
- formation_text = json.dumps(formation, ensure_ascii=False).lower()
114
- for skill in skill_metrics:
115
- if skill in formation_text:
116
- skill_metrics[skill]["contexts"].add("formations")
117
- skill_metrics[skill]["frequency"] += formation_text.count(skill)
118
- final_scores = []
119
- for skill, metrics in skill_metrics.items():
120
- if metrics["frequency"] == 0:
121
- continue
122
-
123
- context_score = max((self.CONTEXT_VALUES.get(c, 0) for c in metrics["contexts"]), default=0.1)
124
- if len(metrics["contexts"]) > 1:
125
- context_score = 1.0
126
-
127
- normalized_frequency = self._normalize_score(metrics["frequency"])
128
- normalized_depth = self._normalize_score(metrics["max_duration"])
129
-
130
- final_score = (self.ALPHA * context_score) + \
131
- (self.BETA * normalized_frequency) + \
132
- (self.GAMMA * normalized_depth)
133
-
134
- final_scores.append({
135
- "skill": metrics["original_name"],
136
- "score": round(final_score, 2),
137
- "details": {
138
- "context_score": round(context_score, 2),
139
- "contexts_found": list(metrics["contexts"]),
140
- "frequency": metrics["frequency"],
141
- "max_duration_years": round(metrics["max_duration"], 1)
142
- }
143
- })
144
-
145
- final_scores.sort(key=lambda x: x["score"], reverse=True)
146
- logger.info(f"Scoring terminé pour {len(final_scores)} compétences.")
147
-
148
- return {"analyse_competences": final_scores}
149
- OptimizedContextualScoringEngine = ContextualScoringEngine