Spaces:
Sleeping
Sleeping
Update src/services/cv_service.py
Browse files- src/services/cv_service.py +40 -24
src/services/cv_service.py
CHANGED
|
@@ -1,6 +1,9 @@
|
|
| 1 |
import json
|
| 2 |
import logging
|
|
|
|
|
|
|
| 3 |
from typing import Dict, Any, List
|
|
|
|
| 4 |
from src.config import load_pdf
|
| 5 |
from src.agents.cv_agents import CVAgentOrchestrator
|
| 6 |
from src.agents.scoring_agent import SimpleScoringAgent
|
|
@@ -12,18 +15,31 @@ class CVParsingService:
|
|
| 12 |
self.models = models
|
| 13 |
self.orchestrator = CVAgentOrchestrator(models.get("llm"))
|
| 14 |
self.scoring_agent = SimpleScoringAgent()
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
cv_text = load_pdf(pdf_path)
|
| 17 |
if not cv_text or not cv_text.strip():
|
| 18 |
return self._create_fallback_data()
|
|
|
|
| 19 |
logger.info(f"CV text loaded: {len(cv_text)} characters")
|
| 20 |
sections = self.orchestrator.split_cv_sections(cv_text)
|
| 21 |
logger.info(f"Sections extracted: {list(sections.keys())}")
|
| 22 |
cv_data = self.orchestrator.extract_all_sections(sections)
|
| 23 |
logger.info(f"CV data extracted: {cv_data is not None}")
|
|
|
|
| 24 |
if not cv_data or not cv_data.get("candidat") or not self._is_valid_extraction(cv_data):
|
| 25 |
logger.warning("Agent extraction failed or incomplete, using fallback extraction")
|
| 26 |
-
return self.
|
|
|
|
| 27 |
logger.info("Calculating skill levels...")
|
| 28 |
scores = self.scoring_agent.calculate_scores(cv_data["candidat"])
|
| 29 |
if scores and scores.get("analyse_competences"):
|
|
@@ -34,10 +50,30 @@ class CVParsingService:
|
|
| 34 |
else:
|
| 35 |
logger.warning("No skill levels calculated, adding empty analysis")
|
| 36 |
cv_data["candidat"]["analyse_competences"] = []
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
return cv_data
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
def _get_levels_summary(self, competences: List[Dict[str, Any]]) -> str:
|
| 40 |
-
"""Génère un résumé des niveaux de compétences"""
|
| 41 |
levels_count = {}
|
| 42 |
for comp in competences:
|
| 43 |
level = comp.get("level", "unknown")
|
|
@@ -45,29 +81,9 @@ class CVParsingService:
|
|
| 45 |
return ", ".join([f"{count} {level}" for level, count in levels_count.items()])
|
| 46 |
|
| 47 |
def _is_valid_extraction(self, cv_data: Dict[str, Any]) -> bool:
|
| 48 |
-
"""Vérifie si l'extraction contient des données valides"""
|
| 49 |
candidat = cv_data.get("candidat", {})
|
| 50 |
has_info = bool(candidat.get("informations_personnelles", {}).get("nom", "").strip())
|
| 51 |
has_skills = bool(candidat.get("compétences", {}).get("hard_skills", []) or
|
| 52 |
candidat.get("compétences", {}).get("soft_skills", []))
|
| 53 |
has_experience = bool(candidat.get("expériences", []))
|
| 54 |
-
return has_info or has_skills or has_experience
|
| 55 |
-
logger.info("Calculating scores...")
|
| 56 |
-
scores = self.scoring_agent.calculate_scores(cv_data["candidat"])
|
| 57 |
-
if scores and scores.get("analyse_competences"):
|
| 58 |
-
cv_data["candidat"].update(scores)
|
| 59 |
-
logger.info(f"Scores calculated: {len(scores.get('analyse_competences', []))} skills scored")
|
| 60 |
-
else:
|
| 61 |
-
logger.warning("No scores calculated, adding empty analysis")
|
| 62 |
-
cv_data["candidat"]["analyse_competences"] = []
|
| 63 |
-
return cv_data
|
| 64 |
-
|
| 65 |
-
def _is_valid_extraction(self, cv_data: Dict[str, Any]) -> bool:
|
| 66 |
-
"""Vérifie si l'extraction contient des données valides"""
|
| 67 |
-
candidat = cv_data.get("candidat", {})
|
| 68 |
-
has_info = bool(candidat.get("informations_personnelles", {}).get("nom", "").strip())
|
| 69 |
-
has_skills = bool(candidat.get("compétences", {}).get("hard_skills", []) or
|
| 70 |
-
candidat.get("compétences", {}).get("soft_skills", []))
|
| 71 |
-
has_experience = bool(candidat.get("expériences", []))
|
| 72 |
-
|
| 73 |
return has_info or has_skills or has_experience
|
|
|
|
| 1 |
import json
|
| 2 |
import logging
|
| 3 |
+
import os
|
| 4 |
+
from datetime import datetime
|
| 5 |
from typing import Dict, Any, List
|
| 6 |
+
from pymongo import MongoClient
|
| 7 |
from src.config import load_pdf
|
| 8 |
from src.agents.cv_agents import CVAgentOrchestrator
|
| 9 |
from src.agents.scoring_agent import SimpleScoringAgent
|
|
|
|
| 15 |
self.models = models
|
| 16 |
self.orchestrator = CVAgentOrchestrator(models.get("llm"))
|
| 17 |
self.scoring_agent = SimpleScoringAgent()
|
| 18 |
+
|
| 19 |
+
# Initialisation MongoDB
|
| 20 |
+
try:
|
| 21 |
+
self.client = MongoClient(os.getenv("MONGO_URI"))
|
| 22 |
+
self.db = self.client[os.getenv("MONGO_DB_NAME")]
|
| 23 |
+
self.candidate_collection = self.db[os.getenv("MONGO_CV_COLLECTION")]
|
| 24 |
+
except:
|
| 25 |
+
self.client = None
|
| 26 |
+
self.candidate_collection = None
|
| 27 |
+
|
| 28 |
+
def parse_cv(self, pdf_path: str, user_id: str = None) -> Dict[str, Any]:
|
| 29 |
cv_text = load_pdf(pdf_path)
|
| 30 |
if not cv_text or not cv_text.strip():
|
| 31 |
return self._create_fallback_data()
|
| 32 |
+
|
| 33 |
logger.info(f"CV text loaded: {len(cv_text)} characters")
|
| 34 |
sections = self.orchestrator.split_cv_sections(cv_text)
|
| 35 |
logger.info(f"Sections extracted: {list(sections.keys())}")
|
| 36 |
cv_data = self.orchestrator.extract_all_sections(sections)
|
| 37 |
logger.info(f"CV data extracted: {cv_data is not None}")
|
| 38 |
+
|
| 39 |
if not cv_data or not cv_data.get("candidat") or not self._is_valid_extraction(cv_data):
|
| 40 |
logger.warning("Agent extraction failed or incomplete, using fallback extraction")
|
| 41 |
+
return self._create_fallback_data()
|
| 42 |
+
|
| 43 |
logger.info("Calculating skill levels...")
|
| 44 |
scores = self.scoring_agent.calculate_scores(cv_data["candidat"])
|
| 45 |
if scores and scores.get("analyse_competences"):
|
|
|
|
| 50 |
else:
|
| 51 |
logger.warning("No skill levels calculated, adding empty analysis")
|
| 52 |
cv_data["candidat"]["analyse_competences"] = []
|
| 53 |
+
|
| 54 |
+
# Stockage automatique dans MongoDB
|
| 55 |
+
self._save_profile(cv_data["candidat"], user_id)
|
| 56 |
+
|
| 57 |
return cv_data
|
| 58 |
+
|
| 59 |
+
def _save_profile(self, candidat_data: Dict[str, Any], user_id: str = None):
|
| 60 |
+
if not self.candidate_collection or not isinstance(candidat_data, dict):
|
| 61 |
+
return
|
| 62 |
+
|
| 63 |
+
try:
|
| 64 |
+
profile_data = candidat_data.copy()
|
| 65 |
+
profile_data["created_at"] = datetime.utcnow()
|
| 66 |
+
profile_data["updated_at"] = datetime.utcnow()
|
| 67 |
+
|
| 68 |
+
if user_id:
|
| 69 |
+
profile_data["user_id"] = user_id
|
| 70 |
+
|
| 71 |
+
self.candidate_collection.insert_one(profile_data)
|
| 72 |
+
logger.info("CV stocké dans MongoDB avec succès")
|
| 73 |
+
except Exception as e:
|
| 74 |
+
logger.error(f"Erreur stockage CV: {e}")
|
| 75 |
+
|
| 76 |
def _get_levels_summary(self, competences: List[Dict[str, Any]]) -> str:
|
|
|
|
| 77 |
levels_count = {}
|
| 78 |
for comp in competences:
|
| 79 |
level = comp.get("level", "unknown")
|
|
|
|
| 81 |
return ", ".join([f"{count} {level}" for level, count in levels_count.items()])
|
| 82 |
|
| 83 |
def _is_valid_extraction(self, cv_data: Dict[str, Any]) -> bool:
|
|
|
|
| 84 |
candidat = cv_data.get("candidat", {})
|
| 85 |
has_info = bool(candidat.get("informations_personnelles", {}).get("nom", "").strip())
|
| 86 |
has_skills = bool(candidat.get("compétences", {}).get("hard_skills", []) or
|
| 87 |
candidat.get("compétences", {}).get("soft_skills", []))
|
| 88 |
has_experience = bool(candidat.get("expériences", []))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
return has_info or has_skills or has_experience
|