File size: 3,815 Bytes
ef9eba2
 
 
 
 
 
 
 
36b7ac5
 
ef9eba2
 
 
e075a63
36b7ac5
ef9eba2
ca2bc16
ef9eba2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36b7ac5
 
 
 
ef9eba2
36b7ac5
 
 
 
 
ca2bc16
 
 
 
 
ef9eba2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import json
import logging
import os
from datetime import datetime
from typing import Dict, Any, List
from src.config import load_pdf
from src.agents.cv_agents import CVAgentOrchestrator
from src.agents.scoring_agent import SimpleScoringAgent
from src.agents.analysis_agent import AnalysisAgent


logger = logging.getLogger(__name__)

async def parse_cv(pdf_path: str) -> Dict[str, Any]:
    orchestrator = CVAgentOrchestrator(llm=None)
    scoring_agent = SimpleScoringAgent()
    start_time = datetime.now()
    cv_text = load_pdf(pdf_path)
    if not cv_text or not cv_text.strip():
        return _create_fallback_data()    
    
    logger.info(f"CV text loaded: {len(cv_text)} characters")
    sections = orchestrator.split_cv_sections(cv_text)
    logger.info(f"Sections extracted: {list(sections.keys())}")
    cv_data = orchestrator.extract_all_sections(sections)
    logger.info(f"CV data extracted: {cv_data is not None}")
    
    if not cv_data or not cv_data.get("candidat") or not _is_valid_extraction(cv_data):
        logger.warning("Agent extraction failed or incomplete, using fallback extraction")
        return _create_fallback_data()
    
    logger.info("Calculating skill levels...")
    scores = scoring_agent.calculate_scores(cv_data["candidat"])
    if scores and scores.get("analyse_competences"):
        cv_data["candidat"].update(scores)
        skills_count = len(scores.get("analyse_competences", []))
        levels_summary = _get_levels_summary(scores.get("analyse_competences", []))
        logger.info(f"Skill levels calculated: {skills_count} skills - {levels_summary}")
    else:
        logger.warning("No skill levels calculated, adding empty analysis")
        cv_data["candidat"]["analyse_competences"] = []

    logger.info("Lancement de l'analyse enrichie du profil...")
    analysis_agent = AnalysisAgent()
    enriched_analysis = analysis_agent.analyze_candidate(cv_data)
    
    if enriched_analysis:
        cv_data["candidat"]["analyse_globale"] = enriched_analysis
        logger.info("Analyse enrichie ajoutée avec succès")
    else:
        logger.warning("Aucune analyse enrichie générée")
    end_time = datetime.now()
    duration = (end_time - start_time).total_seconds()
    cv_data["execution_time"] = duration
    
    logger.info(f"CV parsed in {duration:.2f} seconds")
    return cv_data

def _create_fallback_data() -> Dict[str, Any]:
    return {
        "candidat": {
            "informations_personnelles": {
                "nom": "Données non extraites",
                "email": "N/A",
                "numero_de_telephone": "N/A",
                "localisation": "N/A"
            },
            "compétences": {
                "hard_skills": [],
                "soft_skills": []
            },
            "expériences": [],
            "projets": [],
            "formations": [],
            "reconversion": {
                "is_reconversion": False,
                "analysis": "N/A"
            },
            "analyse_competences": []
        }
    }

def _get_levels_summary(competences: List[Dict[str, Any]]) -> str:
    levels_count = {}
    for comp in competences:
        level = comp.get("level", "unknown")
        levels_count[level] = levels_count.get(level, 0) + 1
    return ", ".join([f"{count} {level}" for level, count in levels_count.items()])

def _is_valid_extraction(cv_data: Dict[str, Any]) -> bool:
    candidat = cv_data.get("candidat", {})
    has_info = bool(candidat.get("informations_personnelles", {}).get("nom", "").strip())
    has_skills = bool(candidat.get("compétences", {}).get("hard_skills", []) or 
                     candidat.get("compétences", {}).get("soft_skills", []))
    has_experience = bool(candidat.get("expériences", []))
    return has_info or has_skills or has_experience