| | import logging |
| | import re |
| | from typing import Dict, List, Any, Tuple |
| | from src.services.semantic_service import SemanticService |
| |
|
| | logger = logging.getLogger(__name__) |
| |
|
| | class SearchService: |
| | """ |
| | Agent de Recherche (RAG & Grounding). |
| | Responsable de l'analyse des écarts (Gap Analysis) et de la détection des profils (Reconversion). |
| | """ |
| |
|
| | JOB_TYPES = { |
| | "DATA_ENGINEER": ["engineer", "ingénieur data", "mlops", "architecte", "platform"], |
| | "DATA_SCIENTIST": ["scientist", "science", "nlp", "computer vision", "chercher", "research"], |
| | "DATA_ANALYST": ["analyst", "analytics", "bi", "business intelligence", "dashboard"], |
| | } |
| | |
| | VERB_MAPPINGS = { |
| | "DATA_ENGINEER": [ |
| | "optimiser", "déployer", "industrialiser", "automatiser", "architecturer", |
| | "monitorer", "scaler", "refactorer", "migrer", "contraindre" |
| | ], |
| | "DATA_SCIENTIST": [ |
| | "entraîner", "finetuner", "expérimenter", "évaluer", "modéliser", |
| | "optimiser", "analyser", "comparer", "implémenter" |
| | ], |
| | "DATA_ANALYST": [ |
| | "visualiser", "présenter", "identifier", "extraire", "recommander", |
| | "analyser", "synthétiser", "automatiser", "reporter" |
| | ] |
| | } |
| | |
| | |
| | DEFAULT_VERBS = VERB_MAPPINGS["DATA_ENGINEER"] + VERB_MAPPINGS["DATA_SCIENTIST"] |
| |
|
| | def __init__(self): |
| | self.semantic_service = SemanticService() |
| |
|
| | def analyze_gap(self, cv_text: str, job_description: str) -> Dict[str, Any]: |
| | """ |
| | Effectue une analyse des écarts entre le CV et l'offre. |
| | Retourne un dictionnaire contenant les gaps, les verbes d'action, et le statut de reconversion. |
| | """ |
| | logger.info("Starting Gap Analysis...") |
| | |
| | |
| | job_type = self._detect_job_type(job_description) |
| | logger.info(f"Detected Job Type: {job_type}") |
| | |
| | |
| | target_verbs = self.VERB_MAPPINGS.get(job_type, self.DEFAULT_VERBS) |
| | found_verbs = self._extract_action_verbs(cv_text, target_verbs) |
| | |
| | |
| | production_score = min(1.0, len(found_verbs) / 4.0) |
| | |
| | |
| | semantic_score = self.semantic_service.compute_similarity(cv_text, job_description) |
| | |
| | |
| | is_reconversion, reconversion_reason = self._detect_reconversion(cv_text, job_description) |
| | |
| | return { |
| | "job_type": job_type, |
| | "semantic_score": semantic_score, |
| | "production_verbs_found": found_verbs, |
| | "production_mindset_score": production_score, |
| | "is_reconversion": is_reconversion, |
| | "reconversion_reason": reconversion_reason, |
| | "hidden_skill_gaps": "Analyse à compléter par LLM" |
| | } |
| |
|
| | def _detect_job_type(self, job_desc: str) -> str: |
| | """Détermine le type de poste (Engineer, Scientist, Analyst) d'après la description.""" |
| | text_lower = job_desc.lower() |
| | |
| | scores = {k: 0 for k in self.JOB_TYPES.keys()} |
| | |
| | for j_type, keywords in self.JOB_TYPES.items(): |
| | for kw in keywords: |
| | if kw in text_lower: |
| | scores[j_type] += 1 |
| | |
| | |
| | best_match = max(scores, key=scores.get) |
| | if scores[best_match] == 0: |
| | return "GENERAL_TECH" |
| | |
| | return best_match |
| |
|
| | def _extract_action_verbs(self, text: str, target_verbs: List[str]) -> List[str]: |
| | """Extrait les verbes d'action clés présents dans le texte.""" |
| | text_lower = text.lower() |
| | found = [] |
| | for verb in target_verbs: |
| | |
| | if re.search(r'\b' + re.escape(verb) + r'\w*', text_lower): |
| | found.append(verb) |
| | return list(set(found)) |
| |
|
| | def _detect_reconversion(self, cv_text: str, job_desc: str) -> Tuple[bool, str]: |
| | """ |
| | Détecte si le candidat est en reconversion. |
| | Logique simple: Mots clés 'formation', 'bootcamp', 'reconversion' + manque d'xp longue durée dans le domaine cible. |
| | """ |
| | cv_lower = cv_text.lower() |
| | |
| | reconversion_keywords = ["reconversion", "bootcamp", "formation intensive", "rncp", "transition professionnelle"] |
| | for kw in reconversion_keywords: |
| | if kw in cv_lower: |
| | return True, f"Mot-clé détecté : '{kw}'" |
| | |
| | |
| | |
| | return False, "Parcours classique apparent" |
| |
|