Spaces:
Sleeping
Sleeping
Update src/services/cv_service.py
Browse files- src/services/cv_service.py +52 -13
src/services/cv_service.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
import json
|
| 2 |
import logging
|
| 3 |
-
from typing import Dict, Any
|
| 4 |
from src.config import load_pdf
|
| 5 |
from src.agents.cv_agents import CVAgentOrchestrator
|
| 6 |
-
from src.agents.scoring_agent import
|
|
|
|
| 7 |
|
| 8 |
logger = logging.getLogger(__name__)
|
| 9 |
|
|
@@ -11,26 +12,64 @@ class CVParsingService:
|
|
| 11 |
def __init__(self, models: Dict[str, Any]):
|
| 12 |
self.models = models
|
| 13 |
self.orchestrator = CVAgentOrchestrator(models.get("llm"))
|
| 14 |
-
self.scoring_agent =
|
| 15 |
-
|
| 16 |
def parse_cv(self, pdf_path: str) -> Dict[str, Any]:
|
| 17 |
cv_text = load_pdf(pdf_path)
|
| 18 |
-
|
| 19 |
if not cv_text or not cv_text.strip():
|
| 20 |
-
return self._create_fallback_data()
|
| 21 |
-
|
| 22 |
logger.info(f"CV text loaded: {len(cv_text)} characters")
|
| 23 |
sections = self.orchestrator.split_cv_sections(cv_text)
|
| 24 |
logger.info(f"Sections extracted: {list(sections.keys())}")
|
| 25 |
cv_data = self.orchestrator.extract_all_sections(sections)
|
| 26 |
logger.info(f"CV data extracted: {cv_data is not None}")
|
| 27 |
-
if cv_data
|
| 28 |
-
logger.
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
cv_data["candidat"].update(scores)
|
| 31 |
logger.info(f"Scores calculated: {len(scores.get('analyse_competences', []))} skills scored")
|
| 32 |
else:
|
| 33 |
-
logger.warning("No
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
-
return
|
|
|
|
| 1 |
import json
|
| 2 |
import logging
|
| 3 |
+
from typing import Dict, Any, List
|
| 4 |
from src.config import load_pdf
|
| 5 |
from src.agents.cv_agents import CVAgentOrchestrator
|
| 6 |
+
from src.agents.scoring_agent import SimpleScoringAgent
|
| 7 |
+
from src.utils.fallback_extractor import FallbackExtractor
|
| 8 |
|
| 9 |
logger = logging.getLogger(__name__)
|
| 10 |
|
|
|
|
| 12 |
def __init__(self, models: Dict[str, Any]):
|
| 13 |
self.models = models
|
| 14 |
self.orchestrator = CVAgentOrchestrator(models.get("llm"))
|
| 15 |
+
self.scoring_agent = SimpleScoringAgent()
|
| 16 |
+
self.fallback_extractor = FallbackExtractor()
|
| 17 |
def parse_cv(self, pdf_path: str) -> Dict[str, Any]:
|
| 18 |
cv_text = load_pdf(pdf_path)
|
|
|
|
| 19 |
if not cv_text or not cv_text.strip():
|
| 20 |
+
return self._create_fallback_data()
|
|
|
|
| 21 |
logger.info(f"CV text loaded: {len(cv_text)} characters")
|
| 22 |
sections = self.orchestrator.split_cv_sections(cv_text)
|
| 23 |
logger.info(f"Sections extracted: {list(sections.keys())}")
|
| 24 |
cv_data = self.orchestrator.extract_all_sections(sections)
|
| 25 |
logger.info(f"CV data extracted: {cv_data is not None}")
|
| 26 |
+
if not cv_data or not cv_data.get("candidat") or not self._is_valid_extraction(cv_data):
|
| 27 |
+
logger.warning("Agent extraction failed or incomplete, using fallback extraction")
|
| 28 |
+
return self.fallback_extractor.extract_basic_info(cv_text)
|
| 29 |
+
logger.info("Calculating skill levels...")
|
| 30 |
+
scores = self.scoring_agent.calculate_scores(cv_data["candidat"])
|
| 31 |
+
if scores and scores.get("analyse_competences"):
|
| 32 |
+
cv_data["candidat"].update(scores)
|
| 33 |
+
skills_count = len(scores.get("analyse_competences", []))
|
| 34 |
+
levels_summary = self._get_levels_summary(scores.get("analyse_competences", []))
|
| 35 |
+
logger.info(f"Skill levels calculated: {skills_count} skills - {levels_summary}")
|
| 36 |
+
else:
|
| 37 |
+
logger.warning("No skill levels calculated, adding empty analysis")
|
| 38 |
+
cv_data["candidat"]["analyse_competences"] = []
|
| 39 |
+
return cv_data
|
| 40 |
+
|
| 41 |
+
def _get_levels_summary(self, competences: List[Dict[str, Any]]) -> str:
|
| 42 |
+
"""Génère un résumé des niveaux de compétences"""
|
| 43 |
+
levels_count = {}
|
| 44 |
+
for comp in competences:
|
| 45 |
+
level = comp.get("level", "unknown")
|
| 46 |
+
levels_count[level] = levels_count.get(level, 0) + 1
|
| 47 |
+
return ", ".join([f"{count} {level}" for level, count in levels_count.items()])
|
| 48 |
+
|
| 49 |
+
def _is_valid_extraction(self, cv_data: Dict[str, Any]) -> bool:
|
| 50 |
+
"""Vérifie si l'extraction contient des données valides"""
|
| 51 |
+
candidat = cv_data.get("candidat", {})
|
| 52 |
+
has_info = bool(candidat.get("informations_personnelles", {}).get("nom", "").strip())
|
| 53 |
+
has_skills = bool(candidat.get("compétences", {}).get("hard_skills", []) or
|
| 54 |
+
candidat.get("compétences", {}).get("soft_skills", []))
|
| 55 |
+
has_experience = bool(candidat.get("expériences", []))
|
| 56 |
+
return has_info or has_skills or has_experience
|
| 57 |
+
logger.info("Calculating scores...")
|
| 58 |
+
scores = self.scoring_agent.calculate_scores(cv_data["candidat"])
|
| 59 |
+
if scores and scores.get("analyse_competences"):
|
| 60 |
cv_data["candidat"].update(scores)
|
| 61 |
logger.info(f"Scores calculated: {len(scores.get('analyse_competences', []))} skills scored")
|
| 62 |
else:
|
| 63 |
+
logger.warning("No scores calculated, adding empty analysis")
|
| 64 |
+
cv_data["candidat"]["analyse_competences"] = []
|
| 65 |
+
return cv_data
|
| 66 |
+
|
| 67 |
+
def _is_valid_extraction(self, cv_data: Dict[str, Any]) -> bool:
|
| 68 |
+
"""Vérifie si l'extraction contient des données valides"""
|
| 69 |
+
candidat = cv_data.get("candidat", {})
|
| 70 |
+
has_info = bool(candidat.get("informations_personnelles", {}).get("nom", "").strip())
|
| 71 |
+
has_skills = bool(candidat.get("compétences", {}).get("hard_skills", []) or
|
| 72 |
+
candidat.get("compétences", {}).get("soft_skills", []))
|
| 73 |
+
has_experience = bool(candidat.get("expériences", []))
|
| 74 |
|
| 75 |
+
return has_info or has_skills or has_experience
|