import json import logging from typing import Dict, Any, List from src.config import load_pdf from src.agents.cv_agents import CVAgentOrchestrator from src.agents.scoring_agent import SimpleScoringAgent logger = logging.getLogger(__name__) class CVParsingService: def __init__(self, models: Dict[str, Any]): self.models = models self.orchestrator = CVAgentOrchestrator(models.get("llm")) self.scoring_agent = SimpleScoringAgent() def parse_cv(self, pdf_path: str) -> Dict[str, Any]: cv_text = load_pdf(pdf_path) if not cv_text or not cv_text.strip(): return self._create_fallback_data() logger.info(f"CV text loaded: {len(cv_text)} characters") sections = self.orchestrator.split_cv_sections(cv_text) logger.info(f"Sections extracted: {list(sections.keys())}") cv_data = self.orchestrator.extract_all_sections(sections) logger.info(f"CV data extracted: {cv_data is not None}") if not cv_data or not cv_data.get("candidat") or not self._is_valid_extraction(cv_data): logger.warning("Agent extraction failed or incomplete, using fallback extraction") return self.fallback_extractor.extract_basic_info(cv_text) logger.info("Calculating skill levels...") scores = self.scoring_agent.calculate_scores(cv_data["candidat"]) if scores and scores.get("analyse_competences"): cv_data["candidat"].update(scores) skills_count = len(scores.get("analyse_competences", [])) levels_summary = self._get_levels_summary(scores.get("analyse_competences", [])) logger.info(f"Skill levels calculated: {skills_count} skills - {levels_summary}") else: logger.warning("No skill levels calculated, adding empty analysis") cv_data["candidat"]["analyse_competences"] = [] return cv_data def _get_levels_summary(self, competences: List[Dict[str, Any]]) -> str: """Génère un résumé des niveaux de compétences""" levels_count = {} for comp in competences: level = comp.get("level", "unknown") levels_count[level] = levels_count.get(level, 0) + 1 return ", ".join([f"{count} {level}" for level, count in levels_count.items()]) def _is_valid_extraction(self, cv_data: Dict[str, Any]) -> bool: """Vérifie si l'extraction contient des données valides""" candidat = cv_data.get("candidat", {}) has_info = bool(candidat.get("informations_personnelles", {}).get("nom", "").strip()) has_skills = bool(candidat.get("compétences", {}).get("hard_skills", []) or candidat.get("compétences", {}).get("soft_skills", [])) has_experience = bool(candidat.get("expériences", [])) return has_info or has_skills or has_experience logger.info("Calculating scores...") scores = self.scoring_agent.calculate_scores(cv_data["candidat"]) if scores and scores.get("analyse_competences"): cv_data["candidat"].update(scores) logger.info(f"Scores calculated: {len(scores.get('analyse_competences', []))} skills scored") else: logger.warning("No scores calculated, adding empty analysis") cv_data["candidat"]["analyse_competences"] = [] return cv_data def _is_valid_extraction(self, cv_data: Dict[str, Any]) -> bool: """Vérifie si l'extraction contient des données valides""" candidat = cv_data.get("candidat", {}) has_info = bool(candidat.get("informations_personnelles", {}).get("nom", "").strip()) has_skills = bool(candidat.get("compétences", {}).get("hard_skills", []) or candidat.get("compétences", {}).get("soft_skills", [])) has_experience = bool(candidat.get("expériences", [])) return has_info or has_skills or has_experience