Spaces:

QuentinL52
/

interview_agents_api

Sleeping

App Files Files Community

QuentinL52 commited on Jul 16, 2025

Commit

f9e10a2

verified ·

1 Parent(s): 1ebeab4

Update src/cv_parsing_agents.py

Browse files

Files changed (1) hide show

src/cv_parsing_agents.py +1 -168

src/cv_parsing_agents.py CHANGED Viewed

@@ -50,59 +50,30 @@ class CvParserAgent:
     Cette classe traite un fichier PDF de CV et en extrait les informations
     structurées (compétences, expériences, formations, etc.)
     """
     def __init__(self, pdf_path: str):
-        """
-        Initialise l'agent de parsing de CV.
-        Args:
-            pdf_path (str): Chemin vers le fichier PDF à traiter
-        Raises:
-            ValueError: Si le chemin du fichier est invalide
-            ImportError: Si les dépendances nécessaires ne sont pas disponibles
-        """
         if not pdf_path or not isinstance(pdf_path, str):
             raise ValueError("Le chemin du fichier PDF doit être une chaîne non vide")
         self.pdf_path = pdf_path
-        # Vérifier que les dépendances sont disponibles
         if not CREW_POOL_AVAILABLE:
             logger.warning("CrewAI crew_pool non disponible - mode dégradé")
         if not CONFIG_AVAILABLE:
             logger.warning("Module config non disponible - mode dégradé")
     def process(self) -> dict:
-        """
-        Traite le fichier PDF pour en extraire le contenu sous forme de JSON.
-        Returns:
-            dict: Dictionnaire contenant les données extraites du CV,
-                  ou données de fallback en cas d'erreur
-        """
         logger.info(f"Début du traitement du CV : {self.pdf_path}")
-        # Vérifier que le fichier existe
         if not os.path.exists(self.pdf_path):
             logger.error(f"Fichier PDF non trouvé: {self.pdf_path}")
             return self._create_fallback_data()
-        # Vérifier les dépendances
         if not CREW_POOL_AVAILABLE or not CONFIG_AVAILABLE:
             logger.error("Dépendances manquantes pour le traitement complet")
             return self._create_fallback_data()
         try:
-            # Charger le contenu du PDF
             cv_text_content = load_pdf(self.pdf_path)
             if not cv_text_content or not cv_text_content.strip():
                 logger.error("Le PDF semble vide ou illisible")
                 return self._create_fallback_data()
             logger.info(f"PDF chargé, {len(cv_text_content)} caractères extraits")
-            # Analyser avec CrewAI
             crew_output = analyse_cv(cv_text_content)
             if not crew_output or not hasattr(crew_output, 'raw') or not crew_output.raw.strip():
@@ -132,26 +103,13 @@ class CvParserAgent:
             return self._create_fallback_data()
     def _clean_json_string(self, raw_string: str) -> str:
-        """
-        Nettoie une chaîne JSON brute en supprimant les blocs de code markdown.
-        Args:
-            raw_string (str): Chaîne brute à nettoyer
-        Returns:
-            str: Chaîne JSON nettoyée
-        """
         json_string_cleaned = raw_string.strip()
-        # Supprimer les blocs de code markdown si présents
         if '```' in raw_string:
             try:
-                # Chercher le bloc json
                 if '```json' in raw_string:
                     json_part = raw_string.split('```json')[1].split('```')[0]
                     json_string_cleaned = json_part.strip()
                 else:
-                    # Prendre le premier bloc de code
                     parts = raw_string.split('```')
                     if len(parts) >= 3:
                         json_string_cleaned = parts[1].strip()
@@ -160,132 +118,7 @@ class CvParserAgent:
         return json_string_cleaned
-    def _create_fallback_data(self) -> dict:
-        """
-        Crée des données de CV de fallback en cas d'erreur de traitement.
-        Returns:
-            dict: Structure de données de CV par défaut
-        """
-        logger.info("Création de données de fallback pour le CV")
-        return {
-            "candidat": {
-                "informations_personnelles": {
-                    "nom": "Candidat Test",
-                    "email": "test@example.com",
-                    "numero_de_telephone": "Non spécifié",
-                    "localisation": "Non spécifiée"
-                },
-                "compétences": {
-                    "hard_skills": ["Python", "FastAPI", "Data Analysis"],
-                    "soft_skills": ["Communication", "Travail d'équipe", "Adaptabilité"]
-                },
-                "expériences": [
-                    {
-                        "Poste": "Développeur",
-                        "Entreprise": "Entreprise Test",
-                        "start_date": "2022",
-                        "end_date": "Aujourd'hui",
-                        "responsabilités": ["Développement d'applications", "Maintenance du code"]
-                    }
-                ],
-                "projets": {
-                    "professional": [
-                        {
-                            "title": "Projet Test",
-                            "role": "Développeur principal",
-                            "technologies": ["Python", "FastAPI"],
-                            "outcomes": ["Application fonctionnelle"]
-                        }
-                    ],
-                    "personal": []
-                },
-                "formations": [
-                    {
-                        "degree": "Formation en Informatique",
-                        "institution": "École Test",
-                        "start_date": "2020",
-                        "end_date": "2022"
-                    }
-                ],
-                "reconversion": {
-                    "is_reconversion": False,
-                    "analysis": "Pas de reconversion détectée - données de test"
-                }
-            }
-        }
-# Fonction utilitaire pour créer des données de fallback
-def create_fallback_cv_data(pdf_path: str = None) -> dict:
-    """
-    Fonction utilitaire pour créer des données de CV de fallback.
-    Args:
-        pdf_path (str, optional): Chemin du fichier PDF (non utilisé dans le fallback)
-    Returns:
-        dict: Structure de données de CV par défaut
-    """
-    return {
-        "candidat": {
-            "informations_personnelles": {
-                "nom": "Candidat Test",
-                "email": "test@example.com",
-                "numero_de_telephone": "Non spécifié",
-                "localisation": "Non spécifiée"
-            },
-            "compétences": {
-                "hard_skills": ["Python", "FastAPI", "Data Analysis"],
-                "soft_skills": ["Communication", "Travail d'équipe", "Adaptabilité"]
-            },
-            "expériences": [
-                {
-                    "Poste": "Développeur",
-                    "Entreprise": "Entreprise Test",
-                    "start_date": "2022",
-                    "end_date": "Aujourd'hui",
-                    "responsabilités": ["Développement d'applications", "Maintenance du code"]
-                }
-            ],
-            "projets": {
-                "professional": [
-                    {
-                        "title": "Projet Test",
-                        "role": "Développeur principal",
-                        "technologies": ["Python", "FastAPI"],
-                        "outcomes": ["Application fonctionnelle"]
-                    }
-                ],
-                "personal": []
-            },
-            "formations": [
-                {
-                    "degree": "Formation en Informatique",
-                    "institution": "École Test",
-                    "start_date": "2020",
-                    "end_date": "2022"
-                }
-            ],
-            "reconversion": {
-                "is_reconversion": False,
-                "analysis": "Pas de reconversion détectée - données de test"
-            }
-        }
-    }
-# Test des imports au chargement du module
 if __name__ == "__main__":
     logger.info("Test du module cv_parsing_agents")
     logger.info(f"CREW_POOL_AVAILABLE: {CREW_POOL_AVAILABLE}")
-    logger.info(f"CONFIG_AVAILABLE: {CONFIG_AVAILABLE}")
-    # Test de création d'une instance
-    try:
-        agent = CvParserAgent("/tmp/test.pdf")
-        logger.info("✅ CvParserAgent créé avec succès")
-    except Exception as e:
-        logger.error(f"❌ Erreur création CvParserAgent: {e}")
-    # Test des données de fallback
-    fallback_data = create_fallback_cv_data()
-    logger.info(f"✅ Données de fallback créées: {len(fallback_data)} clés")

     Cette classe traite un fichier PDF de CV et en extrait les informations
     structurées (compétences, expériences, formations, etc.)
     """
     def __init__(self, pdf_path: str):
         if not pdf_path or not isinstance(pdf_path, str):
             raise ValueError("Le chemin du fichier PDF doit être une chaîne non vide")
         self.pdf_path = pdf_path
         if not CREW_POOL_AVAILABLE:
             logger.warning("CrewAI crew_pool non disponible - mode dégradé")
         if not CONFIG_AVAILABLE:
             logger.warning("Module config non disponible - mode dégradé")
     def process(self) -> dict:
         logger.info(f"Début du traitement du CV : {self.pdf_path}")
         if not os.path.exists(self.pdf_path):
             logger.error(f"Fichier PDF non trouvé: {self.pdf_path}")
             return self._create_fallback_data()
         if not CREW_POOL_AVAILABLE or not CONFIG_AVAILABLE:
             logger.error("Dépendances manquantes pour le traitement complet")
             return self._create_fallback_data()
         try:
             cv_text_content = load_pdf(self.pdf_path)
             if not cv_text_content or not cv_text_content.strip():
                 logger.error("Le PDF semble vide ou illisible")
                 return self._create_fallback_data()
             logger.info(f"PDF chargé, {len(cv_text_content)} caractères extraits")
             crew_output = analyse_cv(cv_text_content)
             if not crew_output or not hasattr(crew_output, 'raw') or not crew_output.raw.strip():
             return self._create_fallback_data()
     def _clean_json_string(self, raw_string: str) -> str:
         json_string_cleaned = raw_string.strip()
         if '```' in raw_string:
             try:
                 if '```json' in raw_string:
                     json_part = raw_string.split('```json')[1].split('```')[0]
                     json_string_cleaned = json_part.strip()
                 else:
                     parts = raw_string.split('```')
                     if len(parts) >= 3:
                         json_string_cleaned = parts[1].strip()
         return json_string_cleaned
 if __name__ == "__main__":
     logger.info("Test du module cv_parsing_agents")
     logger.info(f"CREW_POOL_AVAILABLE: {CREW_POOL_AVAILABLE}")
+    logger.info(f"CONFIG_AVAILABLE: {CONFIG_AVAILABLE}")