QuentinL52 commited on
Commit
4972689
·
verified ·
1 Parent(s): 22d1c60

Update src/agents/scoring_agent.py

Browse files
Files changed (1) hide show
  1. src/agents/scoring_agent.py +124 -154
src/agents/scoring_agent.py CHANGED
@@ -6,22 +6,31 @@ from typing import Dict, List, Any
6
 
7
  logger = logging.getLogger(__name__)
8
 
9
- class ScoringAgent:
10
- ALPHA = 0.5
11
- BETA = 0.3
12
- GAMMA = 0.2
13
-
14
- CONTEXT_VALUES = {
15
- "formations": 0.3,
16
- "projets": 0.6,
17
- "experiences_professionnelles": 0.8,
18
- }
19
-
20
  def calculate_scores(self, candidat_data: Dict[str, Any]) -> Dict[str, List[Dict[str, Any]]]:
21
  if not candidat_data or not isinstance(candidat_data, dict):
22
  return {"analyse_competences": []}
23
 
24
  skills_data = candidat_data.get("compétences", {})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  skills_list = []
26
 
27
  if isinstance(skills_data, dict):
@@ -30,36 +39,46 @@ class ScoringAgent:
30
  elif isinstance(skills_data, list):
31
  skills_list = [item.get("nom") for item in skills_data if item.get("nom")]
32
 
33
- # Filtrer les skills valides
34
- skills_list = [skill for skill in skills_list if skill and isinstance(skill, str) and skill.strip()]
35
-
36
- if not skills_list:
37
- return {"analyse_competences": []}
38
-
39
- skill_metrics = {
40
- skill.lower(): {
41
- "original_name": skill,
42
- "contexts": set(),
43
- "frequency": 0,
44
- "max_duration": 0.0
45
- }
46
- for skill in skills_list if skill
47
- }
48
 
49
- self._analyze_experiences(candidat_data, skill_metrics)
50
- self._analyze_projects(candidat_data, skill_metrics)
51
- self._analyze_formations(candidat_data, skill_metrics)
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
- final_scores = self._calculate_final_scores(skill_metrics)
 
 
 
54
 
55
- return {"analyse_competences": final_scores}
 
 
 
 
56
 
57
- def _analyze_experiences(self, candidat_data: Dict[str, Any], skill_metrics: Dict[str, Any]):
 
 
 
 
58
  experiences_key = "expériences" if "expériences" in candidat_data else "experiences_professionnelles"
59
  experiences = candidat_data.get(experiences_key, [])
60
 
61
  if not isinstance(experiences, list):
62
- return
63
 
64
  for exp in experiences:
65
  if not isinstance(exp, dict):
@@ -67,85 +86,83 @@ class ScoringAgent:
67
 
68
  exp_text = json.dumps(exp, ensure_ascii=False).lower()
69
 
70
- # Récupération sécurisée des dates
71
- start_date_str = exp.get("date_debut", exp.get("start_date", ""))
72
- end_date_str = exp.get("date_fin", exp.get("end_date", ""))
73
-
74
- # S'assurer que ce sont des strings
75
- if not isinstance(start_date_str, str):
76
- start_date_str = str(start_date_str) if start_date_str else ""
77
- if not isinstance(end_date_str, str):
78
- end_date_str = str(end_date_str) if end_date_str else ""
79
-
80
- duration = self._calculate_duration_in_years(start_date_str, end_date_str)
81
-
82
- for skill in skill_metrics:
83
- if skill in exp_text:
84
- skill_metrics[skill]["contexts"].add("experiences_professionnelles")
85
- skill_metrics[skill]["frequency"] += exp_text.count(skill)
86
- if duration > skill_metrics[skill]["max_duration"]:
87
- skill_metrics[skill]["max_duration"] = duration
88
-
89
- def _analyze_projects(self, candidat_data: Dict[str, Any], skill_metrics: Dict[str, Any]):
90
- projects_data = candidat_data.get("projets", {})
91
 
92
- if isinstance(projects_data, dict):
93
- for project_type in ["professional", "personal"]:
94
- for project in projects_data.get(project_type, []):
95
- project_text = json.dumps(project, ensure_ascii=False).lower()
96
- for skill in skill_metrics:
97
- if skill in project_text:
98
- skill_metrics[skill]["contexts"].add("projets")
99
- skill_metrics[skill]["frequency"] += project_text.count(skill)
100
 
101
- def _analyze_formations(self, candidat_data: Dict[str, Any], skill_metrics: Dict[str, Any]):
102
- for formation in candidat_data.get("formations", []):
103
- formation_text = json.dumps(formation, ensure_ascii=False).lower()
104
- for skill in skill_metrics:
105
- if skill in formation_text:
106
- skill_metrics[skill]["contexts"].add("formations")
107
- skill_metrics[skill]["frequency"] += formation_text.count(skill)
108
-
109
- def _calculate_final_scores(self, skill_metrics: Dict[str, Any]) -> List[Dict[str, Any]]:
110
- final_scores = []
111
 
112
- for skill, metrics in skill_metrics.items():
113
- if metrics["frequency"] == 0:
114
  continue
115
-
116
- context_score = max(
117
- (self.CONTEXT_VALUES.get(c, 0) for c in metrics["contexts"]),
118
- default=0.1
119
- )
120
-
121
- if len(metrics["contexts"]) > 1:
122
- context_score = 1.0
123
-
124
- normalized_frequency = self._normalize_score(metrics["frequency"])
125
- normalized_depth = self._normalize_score(metrics["max_duration"])
126
-
127
- final_score = (self.ALPHA * context_score) + \
128
- (self.BETA * normalized_frequency) + \
129
- (self.GAMMA * normalized_depth)
130
 
131
- final_scores.append({
132
- "skill": metrics["original_name"],
133
- "score": round(final_score, 2),
134
- "details": {
135
- "context_score": round(context_score, 2),
136
- "contexts_found": list(metrics["contexts"]),
137
- "frequency": metrics["frequency"],
138
- "max_duration_years": round(metrics["max_duration"], 1)
139
- }
140
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
- final_scores.sort(key=lambda x: x["score"], reverse=True)
143
- return final_scores
 
 
 
 
 
 
 
 
 
144
 
145
- def _normalize_score(self, value: float) -> float:
146
- return 1 - (1 / (1 + float(value)))
 
 
 
 
 
 
 
 
 
147
 
148
  def _parse_date(self, date_str: str) -> datetime:
 
149
  if not date_str or not isinstance(date_str, str):
150
  return None
151
 
@@ -153,15 +170,7 @@ class ScoringAgent:
153
  if date_str_lower in ["aujourd'hui", "maintenant", "en cours", "current", "présent", "actuellement"]:
154
  return datetime.now()
155
 
156
- # Nettoyer la chaîne de date
157
- date_str_clean = date_str.strip()
158
-
159
- # Validation préalable avant parsing
160
- validated_date = self._validate_and_parse_date(date_str_clean)
161
- if validated_date:
162
- return validated_date
163
-
164
- # Tentative d'extraction de l'année seulement
165
  year_match = re.search(r'\b(20\d{2}|19\d{2})\b', date_str)
166
  if year_match:
167
  year = int(year_match.group(1))
@@ -169,45 +178,6 @@ class ScoringAgent:
169
 
170
  return None
171
 
172
- def _validate_and_parse_date(self, date_str: str) -> datetime:
173
- """Valide et parse une date en vérifiant le format avant parsing"""
174
-
175
- # Format YYYY
176
- if re.match(r'^\d{4}$', date_str):
177
- year = int(date_str)
178
- if 1900 <= year <= 2030:
179
- return datetime(year, 1, 1)
180
-
181
- # Format MM/YYYY
182
- if re.match(r'^\d{1,2}/\d{4}$', date_str):
183
- parts = date_str.split('/')
184
- month, year = int(parts[0]), int(parts[1])
185
- if 1 <= month <= 12 and 1900 <= year <= 2030:
186
- return datetime(year, month, 1)
187
-
188
- # Format YYYY-MM
189
- if re.match(r'^\d{4}-\d{1,2}$', date_str):
190
- parts = date_str.split('-')
191
- year, month = int(parts[0]), int(parts[1])
192
- if 1 <= month <= 12 and 1900 <= year <= 2030:
193
- return datetime(year, month, 1)
194
-
195
- # Format DD/MM/YYYY
196
- if re.match(r'^\d{1,2}/\d{1,2}/\d{4}$', date_str):
197
- parts = date_str.split('/')
198
- day, month, year = int(parts[0]), int(parts[1]), int(parts[2])
199
- if 1 <= day <= 31 and 1 <= month <= 12 and 1900 <= year <= 2030:
200
- return datetime(year, month, day)
201
-
202
- return None
203
-
204
- def _calculate_duration_in_years(self, start_date_str: str, end_date_str: str) -> float:
205
- start_date = self._parse_date(start_date_str)
206
- end_date = self._parse_date(end_date_str)
207
-
208
- if start_date and end_date:
209
- if end_date < start_date:
210
- return 0.0
211
- return (end_date - start_date).days / 365.25
212
-
213
- return 0.0
 
6
 
7
  logger = logging.getLogger(__name__)
8
 
9
+ class SimpleScoringAgent:
10
+
 
 
 
 
 
 
 
 
 
11
  def calculate_scores(self, candidat_data: Dict[str, Any]) -> Dict[str, List[Dict[str, Any]]]:
12
  if not candidat_data or not isinstance(candidat_data, dict):
13
  return {"analyse_competences": []}
14
 
15
  skills_data = candidat_data.get("compétences", {})
16
+ skills_list = self._extract_skills_list(skills_data)
17
+
18
+ if not skills_list:
19
+ return {"analyse_competences": []}
20
+
21
+ skill_analysis = []
22
+
23
+ for skill in skills_list:
24
+ level = self._determine_skill_level(skill, candidat_data)
25
+ skill_analysis.append({
26
+ "skill": skill,
27
+ "level": level
28
+ })
29
+
30
+ return {"analyse_competences": skill_analysis}
31
+
32
+ def _extract_skills_list(self, skills_data: Dict[str, Any]) -> List[str]:
33
+ """Extrait la liste des compétences"""
34
  skills_list = []
35
 
36
  if isinstance(skills_data, dict):
 
39
  elif isinstance(skills_data, list):
40
  skills_list = [item.get("nom") for item in skills_data if item.get("nom")]
41
 
42
+ return [skill for skill in skills_list if skill and isinstance(skill, str) and skill.strip()]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
+ def _determine_skill_level(self, skill: str, candidat_data: Dict[str, Any]) -> str:
45
+ """Détermine le niveau d'une compétence selon des règles simples"""
46
+
47
+ frequency = self._count_skill_mentions(skill, candidat_data)
48
+ max_duration = self._get_max_duration_for_skill(skill, candidat_data)
49
+ has_pro_experience = self._has_professional_experience(skill, candidat_data)
50
+
51
+ # Règles simples de classification
52
+ if has_pro_experience and max_duration >= 3.0:
53
+ return "expert"
54
+ elif has_pro_experience and max_duration >= 1.0:
55
+ return "avance"
56
+ elif frequency >= 3 or max_duration >= 0.5:
57
+ return "intermediaire"
58
+ else:
59
+ return "debutant"
60
 
61
+ def _count_skill_mentions(self, skill: str, candidat_data: Dict[str, Any]) -> int:
62
+ """Compte le nombre de mentions de la compétence"""
63
+ skill_lower = skill.lower()
64
+ total_mentions = 0
65
 
66
+ # Recherche dans toutes les sections
67
+ all_text = self._get_all_text_content(candidat_data).lower()
68
+ total_mentions = all_text.count(skill_lower)
69
+
70
+ return total_mentions
71
 
72
+ def _get_max_duration_for_skill(self, skill: str, candidat_data: Dict[str, Any]) -> float:
73
+ """Trouve la durée maximum d'utilisation de la compétence"""
74
+ skill_lower = skill.lower()
75
+ max_duration = 0.0
76
+
77
  experiences_key = "expériences" if "expériences" in candidat_data else "experiences_professionnelles"
78
  experiences = candidat_data.get(experiences_key, [])
79
 
80
  if not isinstance(experiences, list):
81
+ return 0.0
82
 
83
  for exp in experiences:
84
  if not isinstance(exp, dict):
 
86
 
87
  exp_text = json.dumps(exp, ensure_ascii=False).lower()
88
 
89
+ if skill_lower in exp_text:
90
+ duration = self._calculate_experience_duration(exp)
91
+ max_duration = max(max_duration, duration)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
+ return max_duration
 
 
 
 
 
 
 
94
 
95
+ def _has_professional_experience(self, skill: str, candidat_data: Dict[str, Any]) -> bool:
96
+ """Vérifie si la compétence a été utilisée en contexte professionnel"""
97
+ skill_lower = skill.lower()
98
+
99
+ experiences_key = "expériences" if "expériences" in candidat_data else "experiences_professionnelles"
100
+ experiences = candidat_data.get(experiences_key, [])
101
+
102
+ if not isinstance(experiences, list):
103
+ return False
 
104
 
105
+ for exp in experiences:
106
+ if not isinstance(exp, dict):
107
  continue
108
+
109
+ exp_text = json.dumps(exp, ensure_ascii=False).lower()
110
+ if skill_lower in exp_text:
111
+ return True
112
+
113
+ return False
 
 
 
 
 
 
 
 
 
114
 
115
+ def _get_all_text_content(self, candidat_data: Dict[str, Any]) -> str:
116
+ """Récupère tout le contenu textuel du CV"""
117
+ all_content = []
118
+
119
+ # Expériences
120
+ experiences_key = "expériences" if "expériences" in candidat_data else "experiences_professionnelles"
121
+ for exp in candidat_data.get(experiences_key, []):
122
+ if isinstance(exp, dict):
123
+ all_content.append(json.dumps(exp, ensure_ascii=False))
124
+
125
+ # Projets
126
+ projects = candidat_data.get("projets", {})
127
+ if isinstance(projects, dict):
128
+ for project_type in ["professional", "personal"]:
129
+ for project in projects.get(project_type, []):
130
+ if isinstance(project, dict):
131
+ all_content.append(json.dumps(project, ensure_ascii=False))
132
+
133
+ # Formations
134
+ for formation in candidat_data.get("formations", []):
135
+ if isinstance(formation, dict):
136
+ all_content.append(json.dumps(formation, ensure_ascii=False))
137
+
138
+ return " ".join(all_content)
139
 
140
+ def _calculate_experience_duration(self, exp: Dict[str, Any]) -> float:
141
+ """Calcule la durée d'une expérience en années"""
142
+ start_date_str = exp.get("date_debut", exp.get("start_date", ""))
143
+ end_date_str = exp.get("date_fin", exp.get("end_date", ""))
144
+
145
+ if not isinstance(start_date_str, str):
146
+ start_date_str = str(start_date_str) if start_date_str else ""
147
+ if not isinstance(end_date_str, str):
148
+ end_date_str = str(end_date_str) if end_date_str else ""
149
+
150
+ return self._calculate_duration_in_years(start_date_str, end_date_str)
151
 
152
+ def _calculate_duration_in_years(self, start_date_str: str, end_date_str: str) -> float:
153
+ """Calcule la durée entre deux dates en années"""
154
+ start_date = self._parse_date(start_date_str)
155
+ end_date = self._parse_date(end_date_str)
156
+
157
+ if start_date and end_date:
158
+ if end_date < start_date:
159
+ return 0.0
160
+ return (end_date - start_date).days / 365.25
161
+
162
+ return 0.0
163
 
164
  def _parse_date(self, date_str: str) -> datetime:
165
+ """Parse une date de manière simple"""
166
  if not date_str or not isinstance(date_str, str):
167
  return None
168
 
 
170
  if date_str_lower in ["aujourd'hui", "maintenant", "en cours", "current", "présent", "actuellement"]:
171
  return datetime.now()
172
 
173
+ # Extraction simple de l'année
 
 
 
 
 
 
 
 
174
  year_match = re.search(r'\b(20\d{2}|19\d{2})\b', date_str)
175
  if year_match:
176
  year = int(year_match.group(1))
 
178
 
179
  return None
180
 
181
+ # Alias pour maintenir la compatibilité
182
+ ScoringAgent = SimpleScoringAgent
183
+ ImprovedScoringAgent = SimpleScoringAgent