cv_parser / src /services /cv_service.py
QuentinL52's picture
Update src/services/cv_service.py
ca2bc16 verified
import json
import logging
import os
from datetime import datetime
from typing import Dict, Any, List
from src.config import load_pdf
from src.agents.cv_agents import CVAgentOrchestrator
from src.agents.scoring_agent import SimpleScoringAgent
from src.agents.analysis_agent import AnalysisAgent
logger = logging.getLogger(__name__)
async def parse_cv(pdf_path: str) -> Dict[str, Any]:
orchestrator = CVAgentOrchestrator(llm=None)
scoring_agent = SimpleScoringAgent()
start_time = datetime.now()
cv_text = load_pdf(pdf_path)
if not cv_text or not cv_text.strip():
return _create_fallback_data()
logger.info(f"CV text loaded: {len(cv_text)} characters")
sections = orchestrator.split_cv_sections(cv_text)
logger.info(f"Sections extracted: {list(sections.keys())}")
cv_data = orchestrator.extract_all_sections(sections)
logger.info(f"CV data extracted: {cv_data is not None}")
if not cv_data or not cv_data.get("candidat") or not _is_valid_extraction(cv_data):
logger.warning("Agent extraction failed or incomplete, using fallback extraction")
return _create_fallback_data()
logger.info("Calculating skill levels...")
scores = scoring_agent.calculate_scores(cv_data["candidat"])
if scores and scores.get("analyse_competences"):
cv_data["candidat"].update(scores)
skills_count = len(scores.get("analyse_competences", []))
levels_summary = _get_levels_summary(scores.get("analyse_competences", []))
logger.info(f"Skill levels calculated: {skills_count} skills - {levels_summary}")
else:
logger.warning("No skill levels calculated, adding empty analysis")
cv_data["candidat"]["analyse_competences"] = []
logger.info("Lancement de l'analyse enrichie du profil...")
analysis_agent = AnalysisAgent()
enriched_analysis = analysis_agent.analyze_candidate(cv_data)
if enriched_analysis:
cv_data["candidat"]["analyse_globale"] = enriched_analysis
logger.info("Analyse enrichie ajoutée avec succès")
else:
logger.warning("Aucune analyse enrichie générée")
end_time = datetime.now()
duration = (end_time - start_time).total_seconds()
cv_data["execution_time"] = duration
logger.info(f"CV parsed in {duration:.2f} seconds")
return cv_data
def _create_fallback_data() -> Dict[str, Any]:
return {
"candidat": {
"informations_personnelles": {
"nom": "Données non extraites",
"email": "N/A",
"numero_de_telephone": "N/A",
"localisation": "N/A"
},
"compétences": {
"hard_skills": [],
"soft_skills": []
},
"expériences": [],
"projets": [],
"formations": [],
"reconversion": {
"is_reconversion": False,
"analysis": "N/A"
},
"analyse_competences": []
}
}
def _get_levels_summary(competences: List[Dict[str, Any]]) -> str:
levels_count = {}
for comp in competences:
level = comp.get("level", "unknown")
levels_count[level] = levels_count.get(level, 0) + 1
return ", ".join([f"{count} {level}" for level, count in levels_count.items()])
def _is_valid_extraction(cv_data: Dict[str, Any]) -> bool:
candidat = cv_data.get("candidat", {})
has_info = bool(candidat.get("informations_personnelles", {}).get("nom", "").strip())
has_skills = bool(candidat.get("compétences", {}).get("hard_skills", []) or
candidat.get("compétences", {}).get("soft_skills", []))
has_experience = bool(candidat.get("expériences", []))
return has_info or has_skills or has_experience