File size: 4,954 Bytes
4e9b744
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import logging
import re
from typing import Dict, List, Any, Tuple
from src.services.semantic_service import SemanticService

logger = logging.getLogger(__name__)

class SearchService:
    """
    Agent de Recherche (RAG & Grounding).
    Responsable de l'analyse des écarts (Gap Analysis) et de la détection des profils (Reconversion).
    """

    JOB_TYPES = {
        "DATA_ENGINEER": ["engineer", "ingénieur data", "mlops", "architecte", "platform"],
        "DATA_SCIENTIST": ["scientist", "science", "nlp", "computer vision", "chercher", "research"],
        "DATA_ANALYST": ["analyst", "analytics", "bi", "business intelligence", "dashboard"],
    }
    
    VERB_MAPPINGS = {
        "DATA_ENGINEER": [
            "optimiser", "déployer", "industrialiser", "automatiser", "architecturer",
            "monitorer", "scaler", "refactorer", "migrer", "contraindre"
        ],
        "DATA_SCIENTIST": [
            "entraîner", "finetuner", "expérimenter", "évaluer", "modéliser",
            "optimiser", "analyser", "comparer", "implémenter"
        ],
        "DATA_ANALYST": [
            "visualiser", "présenter", "identifier", "extraire", "recommander",
            "analyser", "synthétiser", "automatiser", "reporter"
        ]
    }
    
    # Fallback to general verbs if no type detected
    DEFAULT_VERBS = VERB_MAPPINGS["DATA_ENGINEER"] + VERB_MAPPINGS["DATA_SCIENTIST"]

    def __init__(self):
        self.semantic_service = SemanticService()

    def analyze_gap(self, cv_text: str, job_description: str) -> Dict[str, Any]:
        """
        Effectue une analyse des écarts entre le CV et l'offre.
        Retourne un dictionnaire contenant les gaps, les verbes d'action, et le statut de reconversion.
        """
        logger.info("Starting Gap Analysis...")
        
        # 0. Job Type Detection
        job_type = self._detect_job_type(job_description)
        logger.info(f"Detected Job Type: {job_type}")
        
        # 1. Action Verbs Extraction (Dynamic based on Job Type)
        target_verbs = self.VERB_MAPPINGS.get(job_type, self.DEFAULT_VERBS)
        found_verbs = self._extract_action_verbs(cv_text, target_verbs)
        
        # Score normalized by a reasonable expectation (e.g. finding 3 distinct verbs is good)
        production_score = min(1.0, len(found_verbs) / 4.0)
        
        # 2. Semantic Grounding
        semantic_score = self.semantic_service.compute_similarity(cv_text, job_description)
        
        # 3. Reconversion Reporting
        is_reconversion, reconversion_reason = self._detect_reconversion(cv_text, job_description)
        
        return {
            "job_type": job_type,
            "semantic_score": semantic_score,
            "production_verbs_found": found_verbs,
            "production_mindset_score": production_score,
            "is_reconversion": is_reconversion,
            "reconversion_reason": reconversion_reason,
            "hidden_skill_gaps": "Analyse à compléter par LLM"
        }

    def _detect_job_type(self, job_desc: str) -> str:
        """Détermine le type de poste (Engineer, Scientist, Analyst) d'après la description."""
        text_lower = job_desc.lower()
        
        scores = {k: 0 for k in self.JOB_TYPES.keys()}
        
        for j_type, keywords in self.JOB_TYPES.items():
            for kw in keywords:
                if kw in text_lower:
                    scores[j_type] += 1
        
        # Return key with max score, default to GENERAL if no matches or ties (logic simplified)
        best_match = max(scores, key=scores.get)
        if scores[best_match] == 0:
            return "GENERAL_TECH"
            
        return best_match

    def _extract_action_verbs(self, text: str, target_verbs: List[str]) -> List[str]:
        """Extrait les verbes d'action clés présents dans le texte."""
        text_lower = text.lower()
        found = []
        for verb in target_verbs:
            # Simple word boundary check
            if re.search(r'\b' + re.escape(verb) + r'\w*', text_lower):
                found.append(verb)
        return list(set(found))

    def _detect_reconversion(self, cv_text: str, job_desc: str) -> Tuple[bool, str]:
        """
        Détecte si le candidat est en reconversion.
        Logique simple: Mots clés 'formation', 'bootcamp', 'reconversion' + manque d'xp longue durée dans le domaine cible.
        """
        cv_lower = cv_text.lower()
        
        reconversion_keywords = ["reconversion", "bootcamp", "formation intensive", "rncp", "transition professionnelle"]
        for kw in reconversion_keywords:
            if kw in cv_lower:
                return True, f"Mot-clé détecté : '{kw}'"
        
        # Note: A more robust check would involve parsing dates and titles, 
        # but this simple heuristic allows flagging potential profiles for the Agents to confirm.
        return False, "Parcours classique apparent"