Spaces:
Sleeping
Sleeping
| """ | |
| Module pour le parsing de CV avec CrewAI | |
| """ | |
| import os | |
| import json | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| try: | |
| from src.crew.crew_pool import analyse_cv | |
| CREW_POOL_AVAILABLE = True | |
| logger.info("✅ crew_pool importé avec succès") | |
| except ImportError as e: | |
| logger.error(f"❌ Erreur import crew_pool: {e}") | |
| CREW_POOL_AVAILABLE = False | |
| analyse_cv = None | |
| try: | |
| from src.config import load_pdf | |
| CONFIG_AVAILABLE = True | |
| logger.info("✅ config importé avec succès") | |
| except ImportError as e: | |
| logger.error(f"❌ Erreur import config: {e}") | |
| CONFIG_AVAILABLE = False | |
| load_pdf = None | |
| def clean_dict_keys(data): | |
| if isinstance(data, dict): | |
| return {str(key): clean_dict_keys(value) for key, value in data.items()} | |
| elif isinstance(data, list): | |
| return [clean_dict_keys(element) for element in data] | |
| else: | |
| return data | |
| class CvParserAgent: | |
| def __init__(self, pdf_path: str): | |
| if not pdf_path or not isinstance(pdf_path, str): | |
| raise ValueError("Le chemin du fichier PDF doit être une chaîne non vide") | |
| self.pdf_path = pdf_path | |
| if not CREW_POOL_AVAILABLE: | |
| logger.warning("CrewAI crew_pool non disponible - mode dégradé") | |
| if not CONFIG_AVAILABLE: | |
| logger.warning("Module config non disponible - mode dégradé") | |
| def process(self) -> dict: | |
| logger.info(f"Début du traitement du CV : {self.pdf_path}") | |
| if not os.path.exists(self.pdf_path): | |
| logger.error(f"Fichier PDF non trouvé: {self.pdf_path}") | |
| return self._create_fallback_data() | |
| if not CREW_POOL_AVAILABLE or not CONFIG_AVAILABLE: | |
| logger.error("Dépendances manquantes pour le traitement complet") | |
| return self._create_fallback_data() | |
| try: | |
| cv_text_content = load_pdf(self.pdf_path) | |
| if not cv_text_content or not cv_text_content.strip(): | |
| logger.error("Le PDF semble vide ou illisible") | |
| return self._create_fallback_data() | |
| logger.info(f"PDF chargé, {len(cv_text_content)} caractères extraits") | |
| crew_output = analyse_cv(cv_text_content) | |
| if not crew_output or not hasattr(crew_output, 'raw') or not crew_output.raw.strip(): | |
| logger.error("L'analyse par le crew n'a pas retourné de résultat.") | |
| return self._create_fallback_data() | |
| raw_string = crew_output.raw | |
| logger.info(f"Résultat brut du crew: {raw_string[:200]}...") | |
| json_string_cleaned = self._clean_json_string(raw_string) | |
| profile_data = json.loads(json_string_cleaned) | |
| logger.info("Parsing JSON réussi") | |
| return clean_dict_keys(profile_data) | |
| except json.JSONDecodeError as e: | |
| logger.error(f"Erreur de décodage JSON : {e}") | |
| if 'crew_output' in locals(): | |
| logger.error(f"Données brutes reçues : {crew_output.raw}") | |
| return self._create_fallback_data() | |
| except Exception as e: | |
| logger.error(f"Erreur inattendue dans CvParserAgent : {e}", exc_info=True) | |
| return self._create_fallback_data() | |
| def _clean_json_string(self, raw_string: str) -> str: | |
| json_string_cleaned = raw_string.strip() | |
| if '```' in raw_string: | |
| try: | |
| if '```json' in raw_string: | |
| json_part = raw_string.split('```json')[1].split('```')[0] | |
| json_string_cleaned = json_part.strip() | |
| else: | |
| parts = raw_string.split('```') | |
| if len(parts) >= 3: | |
| json_string_cleaned = parts[1].strip() | |
| except IndexError: | |
| logger.warning("Format de code block détecté mais mal formé") | |
| return json_string_cleaned | |
| if __name__ == "__main__": | |
| logger.info("Test du module cv_parsing_agents") | |
| logger.info(f"CREW_POOL_AVAILABLE: {CREW_POOL_AVAILABLE}") | |
| logger.info(f"CONFIG_AVAILABLE: {CONFIG_AVAILABLE}") |