File size: 3,967 Bytes
1e0aa4f
 
eb7ed70
1e0aa4f
 
eb7ed70
1e0aa4f
 
 
 
 
 
 
eb7ed70
1e0aa4f
 
 
eb7ed70
fc1fade
1e0aa4f
fc1fade
1e0aa4f
fc1fade
eb7ed70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1e0aa4f
fc1fade
 
eb7ed70
 
 
 
 
 
 
 
 
 
 
1e0aa4f
eb7ed70
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import json
import logging
from typing import Dict, Any, List
from src.config import load_pdf
from src.agents.cv_agents import CVAgentOrchestrator
from src.agents.scoring_agent import SimpleScoringAgent

logger = logging.getLogger(__name__)

class CVParsingService:
    def __init__(self, models: Dict[str, Any]):
        self.models = models
        self.orchestrator = CVAgentOrchestrator(models.get("llm"))
        self.scoring_agent = SimpleScoringAgent()
    def parse_cv(self, pdf_path: str) -> Dict[str, Any]:
        cv_text = load_pdf(pdf_path)
        if not cv_text or not cv_text.strip():
            return self._create_fallback_data()    
        logger.info(f"CV text loaded: {len(cv_text)} characters")
        sections = self.orchestrator.split_cv_sections(cv_text)
        logger.info(f"Sections extracted: {list(sections.keys())}")
        cv_data = self.orchestrator.extract_all_sections(sections)
        logger.info(f"CV data extracted: {cv_data is not None}")
        if not cv_data or not cv_data.get("candidat") or not self._is_valid_extraction(cv_data):
            logger.warning("Agent extraction failed or incomplete, using fallback extraction")
            return self.fallback_extractor.extract_basic_info(cv_text)
        logger.info("Calculating skill levels...")
        scores = self.scoring_agent.calculate_scores(cv_data["candidat"])
        if scores and scores.get("analyse_competences"):
            cv_data["candidat"].update(scores)
            skills_count = len(scores.get("analyse_competences", []))
            levels_summary = self._get_levels_summary(scores.get("analyse_competences", []))
            logger.info(f"Skill levels calculated: {skills_count} skills - {levels_summary}")
        else:
            logger.warning("No skill levels calculated, adding empty analysis")
            cv_data["candidat"]["analyse_competences"] = []
        return cv_data
    
    def _get_levels_summary(self, competences: List[Dict[str, Any]]) -> str:
        """Génère un résumé des niveaux de compétences"""
        levels_count = {}
        for comp in competences:
            level = comp.get("level", "unknown")
            levels_count[level] = levels_count.get(level, 0) + 1
        return ", ".join([f"{count} {level}" for level, count in levels_count.items()])
    
    def _is_valid_extraction(self, cv_data: Dict[str, Any]) -> bool:
        """Vérifie si l'extraction contient des données valides"""
        candidat = cv_data.get("candidat", {})
        has_info = bool(candidat.get("informations_personnelles", {}).get("nom", "").strip())
        has_skills = bool(candidat.get("compétences", {}).get("hard_skills", []) or 
                         candidat.get("compétences", {}).get("soft_skills", []))
        has_experience = bool(candidat.get("expériences", []))
        return has_info or has_skills or has_experience
        logger.info("Calculating scores...")
        scores = self.scoring_agent.calculate_scores(cv_data["candidat"])
        if scores and scores.get("analyse_competences"):
            cv_data["candidat"].update(scores)
            logger.info(f"Scores calculated: {len(scores.get('analyse_competences', []))} skills scored")
        else:
            logger.warning("No scores calculated, adding empty analysis")
            cv_data["candidat"]["analyse_competences"] = []
        return cv_data
    
    def _is_valid_extraction(self, cv_data: Dict[str, Any]) -> bool:
        """Vérifie si l'extraction contient des données valides"""
        candidat = cv_data.get("candidat", {})
        has_info = bool(candidat.get("informations_personnelles", {}).get("nom", "").strip())
        has_skills = bool(candidat.get("compétences", {}).get("hard_skills", []) or 
                         candidat.get("compétences", {}).get("soft_skills", []))
        has_experience = bool(candidat.get("expériences", []))
        
        return has_info or has_skills or has_experience