New version
Browse files- api/main.py +176 -37
api/main.py
CHANGED
|
@@ -9,6 +9,7 @@ from contextlib import asynccontextmanager
|
|
| 9 |
import logging
|
| 10 |
import numpy as np
|
| 11 |
import re
|
|
|
|
| 12 |
from typing import List, Dict, Optional
|
| 13 |
from pathlib import Path
|
| 14 |
|
|
@@ -180,50 +181,101 @@ def generate_explanation(offer_text: str, profile_row: pd.Series,
|
|
| 180 |
strengths = []
|
| 181 |
weaknesses = []
|
| 182 |
|
| 183 |
-
#
|
| 184 |
required_skills = extract_skills_from_text(offer_text)
|
| 185 |
-
profile_skills = normalize_skills(profile_row["hard_skills"])
|
| 186 |
|
| 187 |
-
#
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
#
|
| 197 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
|
| 199 |
# Analyser l'expérience
|
| 200 |
exp_years = int(profile_row["exp_years"])
|
| 201 |
-
if exp_years >=
|
|
|
|
|
|
|
| 202 |
strengths.append(f"Expérience solide ({exp_years} ans)")
|
| 203 |
elif exp_years >= 3:
|
| 204 |
strengths.append(f"Bonne expérience ({exp_years} ans)")
|
| 205 |
else:
|
| 206 |
strengths.append(f"Profil junior ({exp_years} ans d'expérience)")
|
| 207 |
|
| 208 |
-
#
|
| 209 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
strengths.append(f"Localisation : {profile_row['localisation']}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
|
| 212 |
-
#
|
| 213 |
-
if
|
| 214 |
-
|
|
|
|
|
|
|
|
|
|
| 215 |
|
| 216 |
-
#
|
| 217 |
-
if
|
| 218 |
-
|
| 219 |
|
| 220 |
-
#
|
| 221 |
-
if
|
| 222 |
strengths.append("Profil correspondant aux critères généraux")
|
| 223 |
|
| 224 |
-
if len(weaknesses) == 0:
|
| 225 |
-
weaknesses.append("Profil très bien adapté à l'offre")
|
| 226 |
-
|
| 227 |
return MatchExplanation(
|
| 228 |
strengths=strengths[:5], # Limiter à 5 points forts
|
| 229 |
weaknesses=weaknesses[:3], # Limiter à 3 points faibles
|
|
@@ -277,7 +329,7 @@ app.add_middleware(
|
|
| 277 |
|
| 278 |
# --- Modèles Pydantic (pour la validation des requêtes) ---
|
| 279 |
class MatchRequest(BaseModel):
|
| 280 |
-
offer_text: str
|
| 281 |
top_k: int = 7
|
| 282 |
|
| 283 |
class ProfileResult(BaseModel):
|
|
@@ -298,6 +350,7 @@ def match_offer_sync(offer_text: str, top_k: int = 7, with_explanation: bool = T
|
|
| 298 |
"""
|
| 299 |
Fonction de matching synchrone avec pondération (50% skills + 50% expérience).
|
| 300 |
"""
|
|
|
|
| 301 |
if "model" not in ml_models or "faiss_index" not in ml_models or "profiles" not in ml_models:
|
| 302 |
raise HTTPException(status_code=503, detail="Les modèles ne sont pas encore prêts. Veuillez réessayer dans quelques instants.")
|
| 303 |
|
|
@@ -305,6 +358,12 @@ def match_offer_sync(offer_text: str, top_k: int = 7, with_explanation: bool = T
|
|
| 305 |
index = ml_models["faiss_index"]
|
| 306 |
df_profiles = ml_models["profiles"]
|
| 307 |
skills_embeddings = ml_models.get("skills_embeddings")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 308 |
|
| 309 |
# Extraire les compétences et l'expérience de l'offre
|
| 310 |
required_skills = extract_skills_from_text(offer_text)
|
|
@@ -404,6 +463,27 @@ def match_offer_sync(offer_text: str, top_k: int = 7, with_explanation: bool = T
|
|
| 404 |
search_k = min(top_k * 5, len(df_profiles)) # Chercher plus large pool (5x top_k)
|
| 405 |
distances, indices = index.search(offer_emb, search_k)
|
| 406 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
# Calculer des attributs de matching pour chaque profil
|
| 408 |
candidates = []
|
| 409 |
for i, idx in enumerate(indices[0]):
|
|
@@ -416,6 +496,14 @@ def match_offer_sync(offer_text: str, top_k: int = 7, with_explanation: bool = T
|
|
| 416 |
if s and s in txt:
|
| 417 |
skills_match_count += 1
|
| 418 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 419 |
# role/title match: vérifier titre profil (`poste_recherche`) + texte complet
|
| 420 |
role_match = False
|
| 421 |
if reqs['role']:
|
|
@@ -502,14 +590,38 @@ def match_offer_sync(offer_text: str, top_k: int = 7, with_explanation: bool = T
|
|
| 502 |
'profile_exp': profile_exp
|
| 503 |
})
|
| 504 |
|
| 505 |
-
#
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 511 |
|
| 512 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 513 |
candidates.sort(key=lambda c: -c.get('profile').score)
|
| 514 |
|
| 515 |
# Retourner les top_k profils
|
|
@@ -519,14 +631,41 @@ def match_offer_sync(offer_text: str, top_k: int = 7, with_explanation: bool = T
|
|
| 519 |
@app.get("/")
|
| 520 |
def read_root():
|
| 521 |
return {"message": "Bienvenue sur l'API de Matching IA"}
|
| 522 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 523 |
@app.post("/match", response_model=MatchResponse)
|
| 524 |
async def match_endpoint(request: MatchRequest):
|
| 525 |
"""
|
| 526 |
Endpoint pour trouver les meilleurs profils correspondant à une offre.
|
|
|
|
| 527 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 528 |
try:
|
| 529 |
-
results = match_offer_sync(
|
| 530 |
return MatchResponse(results=results)
|
| 531 |
except HTTPException as e:
|
| 532 |
# Propage l'exception HTTP si les modèles ne sont pas prêts
|
|
|
|
| 9 |
import logging
|
| 10 |
import numpy as np
|
| 11 |
import re
|
| 12 |
+
import ast # For safe evaluation of string-represented lists
|
| 13 |
from typing import List, Dict, Optional
|
| 14 |
from pathlib import Path
|
| 15 |
|
|
|
|
| 181 |
strengths = []
|
| 182 |
weaknesses = []
|
| 183 |
|
| 184 |
+
# Extract required skills from offer_text
|
| 185 |
required_skills = extract_skills_from_text(offer_text)
|
|
|
|
| 186 |
|
| 187 |
+
# Safely parse profile hard skills (which might be a string representation of a list)
|
| 188 |
+
profile_hard_skills_str = profile_row["hard_skills"]
|
| 189 |
+
try:
|
| 190 |
+
profile_skills_list = ast.literal_eval(profile_hard_skills_str)
|
| 191 |
+
if not isinstance(profile_skills_list, list):
|
| 192 |
+
profile_skills_list = [s.strip() for s in profile_hard_skills_str.split(',')]
|
| 193 |
+
except (ValueError, SyntaxError):
|
| 194 |
+
profile_skills_list = [s.strip() for s in profile_hard_skills_str.split(',')]
|
| 195 |
+
|
| 196 |
+
# Normalize profile skills for comparison
|
| 197 |
+
profile_skills_normalized = []
|
| 198 |
+
for skill_item in profile_skills_list:
|
| 199 |
+
profile_skills_normalized.extend(normalize_skills(skill_item))
|
| 200 |
+
profile_skills_normalized = list(set(profile_skills_normalized)) # Remove duplicates
|
| 201 |
+
|
| 202 |
+
# Analyze skills
|
| 203 |
+
matched_skills = []
|
| 204 |
+
missing_skills = []
|
| 205 |
+
|
| 206 |
+
for req_skill in required_skills:
|
| 207 |
+
found = False
|
| 208 |
+
for prof_skill in profile_skills_normalized:
|
| 209 |
+
# Check for exact match or substring match (e.g., 'python' in 'python_django')
|
| 210 |
+
if req_skill == prof_skill or req_skill in prof_skill or prof_skill in req_skill:
|
| 211 |
+
matched_skills.append(req_skill)
|
| 212 |
+
found = True
|
| 213 |
+
break
|
| 214 |
+
if not found:
|
| 215 |
+
missing_skills.append(req_skill)
|
| 216 |
+
|
| 217 |
+
if matched_skills:
|
| 218 |
+
strengths.append(f"Maîtrise de : {', '.join(list(set(matched_skills))[:5])}") # Use set to avoid duplicates
|
| 219 |
+
|
| 220 |
+
if missing_skills:
|
| 221 |
+
weaknesses.append(f"Compétences à développer : {', '.join(list(set(missing_skills))[:3])}")
|
| 222 |
|
| 223 |
# Analyser l'expérience
|
| 224 |
exp_years = int(profile_row["exp_years"])
|
| 225 |
+
if exp_years >= 10: # More specific thresholds for "solid" vs "good"
|
| 226 |
+
strengths.append(f"Expérience très solide ({exp_years} ans)")
|
| 227 |
+
elif exp_years >= 5:
|
| 228 |
strengths.append(f"Expérience solide ({exp_years} ans)")
|
| 229 |
elif exp_years >= 3:
|
| 230 |
strengths.append(f"Bonne expérience ({exp_years} ans)")
|
| 231 |
else:
|
| 232 |
strengths.append(f"Profil junior ({exp_years} ans d'expérience)")
|
| 233 |
|
| 234 |
+
# Analyze location, mobility, availability based on offer text
|
| 235 |
+
offer_text_lower = offer_text.lower()
|
| 236 |
+
profile_location_lower = profile_row['localisation'].lower()
|
| 237 |
+
|
| 238 |
+
# Location
|
| 239 |
+
loc_required_match = re.search(r"(?:à|au|basé à|depuis)\s+([A-Za-zÀ-ÖØ-öø-ÿ\s\-']{2,})", offer_text_lower, flags=re.IGNORECASE)
|
| 240 |
+
if loc_required_match:
|
| 241 |
+
loc_required_str = loc_required_match.group(1).strip()
|
| 242 |
+
if ',' in loc_required_str:
|
| 243 |
+
loc_required_str = loc_required_str.split(',')[0].strip() # Take first part if comma separated
|
| 244 |
+
if loc_required_str in profile_location_lower:
|
| 245 |
+
strengths.append(f"Localisation : {profile_row['localisation']}")
|
| 246 |
+
else:
|
| 247 |
+
weaknesses.append(f"Localisation différente de l'offre ({profile_row['localisation']})")
|
| 248 |
+
elif "localisation" in offer_text_lower or "localisé" in offer_text_lower or "basé" in offer_text_lower:
|
| 249 |
+
# If offer mentions location generally, and profile has one
|
| 250 |
strengths.append(f"Localisation : {profile_row['localisation']}")
|
| 251 |
+
|
| 252 |
+
# Mobility
|
| 253 |
+
if "mobile" in offer_text_lower or "déplacement" in offer_text_lower:
|
| 254 |
+
if profile_row.get("mobilite") == "Mobile":
|
| 255 |
+
strengths.append("Ouvert à la mobilité")
|
| 256 |
+
else:
|
| 257 |
+
weaknesses.append("Mobilité non compatible avec l'offre")
|
| 258 |
+
elif "télétravail" in offer_text_lower or "remote" in offer_text_lower:
|
| 259 |
+
if profile_row.get("mobilite") == "Ouvert au télétravail":
|
| 260 |
+
strengths.append("Ouvert au télétravail")
|
| 261 |
+
else:
|
| 262 |
+
weaknesses.append("Télétravail non compatible avec l'offre")
|
| 263 |
|
| 264 |
+
# Availability
|
| 265 |
+
if "immédiatement" in offer_text_lower or "disponible de suite" in offer_text_lower:
|
| 266 |
+
if profile_row.get("disponibilite") == "Immédiate":
|
| 267 |
+
strengths.append("Disponibilité immédiate")
|
| 268 |
+
else:
|
| 269 |
+
weaknesses.append(f"Disponibilité ({profile_row['disponibilite']}) non immédiate")
|
| 270 |
|
| 271 |
+
# If no specific weaknesses found, but overall score is not perfect, add a general one
|
| 272 |
+
if not weaknesses and (skills_score < 0.9 or exp_score < 0.9): # Threshold for "very good match"
|
| 273 |
+
weaknesses.append("Quelques légers écarts de compétences ou d'expérience")
|
| 274 |
|
| 275 |
+
# If few strengths, add a generic one if no specific strengths were found
|
| 276 |
+
if not strengths:
|
| 277 |
strengths.append("Profil correspondant aux critères généraux")
|
| 278 |
|
|
|
|
|
|
|
|
|
|
| 279 |
return MatchExplanation(
|
| 280 |
strengths=strengths[:5], # Limiter à 5 points forts
|
| 281 |
weaknesses=weaknesses[:3], # Limiter à 3 points faibles
|
|
|
|
| 329 |
|
| 330 |
# --- Modèles Pydantic (pour la validation des requêtes) ---
|
| 331 |
class MatchRequest(BaseModel):
|
| 332 |
+
offer_text: str | None = None # Now optional
|
| 333 |
top_k: int = 7
|
| 334 |
|
| 335 |
class ProfileResult(BaseModel):
|
|
|
|
| 350 |
"""
|
| 351 |
Fonction de matching synchrone avec pondération (50% skills + 50% expérience).
|
| 352 |
"""
|
| 353 |
+
|
| 354 |
if "model" not in ml_models or "faiss_index" not in ml_models or "profiles" not in ml_models:
|
| 355 |
raise HTTPException(status_code=503, detail="Les modèles ne sont pas encore prêts. Veuillez réessayer dans quelques instants.")
|
| 356 |
|
|
|
|
| 358 |
index = ml_models["faiss_index"]
|
| 359 |
df_profiles = ml_models["profiles"]
|
| 360 |
skills_embeddings = ml_models.get("skills_embeddings")
|
| 361 |
+
|
| 362 |
+
# Get digital job titles for filtering (Suggestion 4)
|
| 363 |
+
df_metiers = ml_models.get("metiers_digital")
|
| 364 |
+
digital_job_titles = []
|
| 365 |
+
if not df_metiers.empty:
|
| 366 |
+
digital_job_titles = df_metiers["Poste"].astype(str).str.lower().unique().tolist()
|
| 367 |
|
| 368 |
# Extraire les compétences et l'expérience de l'offre
|
| 369 |
required_skills = extract_skills_from_text(offer_text)
|
|
|
|
| 463 |
search_k = min(top_k * 5, len(df_profiles)) # Chercher plus large pool (5x top_k)
|
| 464 |
distances, indices = index.search(offer_emb, search_k)
|
| 465 |
|
| 466 |
+
# Extract specific requirements from offer_text for post-filtering (Suggestion 3)
|
| 467 |
+
offer_text_lower = offer_text.lower()
|
| 468 |
+
|
| 469 |
+
loc_required = None
|
| 470 |
+
loc_match_patterns = [
|
| 471 |
+
r"(?:à|au|basé à|depuis)\s+([A-Za-zÀ-ÖØ-öø-ÿ\s\-']{2,})",
|
| 472 |
+
r"localisé\s+en\s+([A-Za-zÀ-ÖØ-öø-ÿ\s\-']{2,})",
|
| 473 |
+
r"localisé\s+à\s+([A-Za-zÀ-ÖØ-öø-ÿ\s\-']{2,})"
|
| 474 |
+
]
|
| 475 |
+
for pattern in loc_match_patterns:
|
| 476 |
+
m = re.search(pattern, offer_text_lower, flags=re.IGNORECASE)
|
| 477 |
+
if m:
|
| 478 |
+
loc_required = m.group(1).strip()
|
| 479 |
+
if ',' in loc_required:
|
| 480 |
+
loc_required = loc_required.split(',')[0].strip()
|
| 481 |
+
break
|
| 482 |
+
|
| 483 |
+
mobil_required_offer = "mobile" in offer_text_lower or "déplacement" in offer_text_lower
|
| 484 |
+
telework_allowed_offer = "télétravail" in offer_text_lower or "remote" in offer_text_lower
|
| 485 |
+
immediate_required_offer = "immédiatement" in offer_text_lower or "disponible de suite" in offer_text_lower
|
| 486 |
+
|
| 487 |
# Calculer des attributs de matching pour chaque profil
|
| 488 |
candidates = []
|
| 489 |
for i, idx in enumerate(indices[0]):
|
|
|
|
| 496 |
if s and s in txt:
|
| 497 |
skills_match_count += 1
|
| 498 |
|
| 499 |
+
# --- Digital profession filter (Suggestion 4) ---
|
| 500 |
+
profile_job_title = str(row.get('poste_recherche', '')).lower()
|
| 501 |
+
# If the profile's stated job title is not in the digital jobs list, skip this profile.
|
| 502 |
+
if digital_job_titles and profile_job_title and profile_job_title not in digital_job_titles:
|
| 503 |
+
continue # Skip this profile, it's not a digital profession
|
| 504 |
+
elif not digital_job_titles and profile_job_title: # If no digital jobs list, but profile has a job title, try to infer
|
| 505 |
+
skills_match_count += 1
|
| 506 |
+
|
| 507 |
# role/title match: vérifier titre profil (`poste_recherche`) + texte complet
|
| 508 |
role_match = False
|
| 509 |
if reqs['role']:
|
|
|
|
| 590 |
'profile_exp': profile_exp
|
| 591 |
})
|
| 592 |
|
| 593 |
+
# Apply post-matching filters (Suggestion 3)
|
| 594 |
+
filtered_candidates = []
|
| 595 |
+
for cand_data in candidates:
|
| 596 |
+
profile_row = df_profiles.iloc[cand_data['profile'].id - 1] # Assuming IDs are 1-indexed and match df index + 1
|
| 597 |
+
|
| 598 |
+
# Location filter
|
| 599 |
+
if loc_required:
|
| 600 |
+
profile_location_lower = profile_row['localisation'].lower()
|
| 601 |
+
if loc_required not in profile_location_lower:
|
| 602 |
+
continue # Skip if required location is not in profile's location
|
| 603 |
+
|
| 604 |
+
# Mobility filter
|
| 605 |
+
if mobil_required_offer and profile_row.get('mobilite') == "Pas mobile":
|
| 606 |
+
continue # Skip if mobility is required but profile is not mobile
|
| 607 |
+
|
| 608 |
+
if telework_allowed_offer and profile_row.get('mobilite') != "Ouvert au télétravail":
|
| 609 |
+
# If telework is explicitly mentioned in offer, and profile doesn't allow it, filter
|
| 610 |
+
continue
|
| 611 |
+
|
| 612 |
+
# Availability filter
|
| 613 |
+
if immediate_required_offer and profile_row.get('disponibilite') != "Immédiate":
|
| 614 |
+
continue # Skip if immediate availability is required but profile is not
|
| 615 |
|
| 616 |
+
filtered_candidates.append(cand_data)
|
| 617 |
+
|
| 618 |
+
# Sort the filtered candidates by final_score décroissant
|
| 619 |
+
filtered_candidates.sort(key=lambda c: -c.get('profile').score)
|
| 620 |
+
|
| 621 |
+
# Return the top_k from the filtered list
|
| 622 |
+
return [c['profile'] for c in filtered_candidates[:top_k]]
|
| 623 |
+
|
| 624 |
+
# Old sorting logic (replaced by filtering and then sorting by final_score)
|
| 625 |
candidates.sort(key=lambda c: -c.get('profile').score)
|
| 626 |
|
| 627 |
# Retourner les top_k profils
|
|
|
|
| 631 |
@app.get("/")
|
| 632 |
def read_root():
|
| 633 |
return {"message": "Bienvenue sur l'API de Matching IA"}
|
| 634 |
+
|
| 635 |
+
# Suggestion 1: Add Support for Structured Offers in JSON
|
| 636 |
+
class MatchRequest(BaseModel):
|
| 637 |
+
offer_text: str | None = None # Original field, now optional
|
| 638 |
+
Poste: str | None = None
|
| 639 |
+
Compétences_techniques: list[str] | None = None
|
| 640 |
+
Expérience_requise: str | None = None
|
| 641 |
+
Localisation: str | None = None
|
| 642 |
+
Type_de_contrat: str | None = None
|
| 643 |
+
Salaire: str | None = None
|
| 644 |
+
top_k: int = 7
|
| 645 |
+
|
| 646 |
@app.post("/match", response_model=MatchResponse)
|
| 647 |
async def match_endpoint(request: MatchRequest):
|
| 648 |
"""
|
| 649 |
Endpoint pour trouver les meilleurs profils correspondant à une offre.
|
| 650 |
+
Supporte les requêtes en texte libre (offer_text) ou structurées en JSON.
|
| 651 |
"""
|
| 652 |
+
query_text = request.offer_text
|
| 653 |
+
if not query_text: # If offer_text is not provided, construct it from structured fields
|
| 654 |
+
parts = []
|
| 655 |
+
if request.Poste: parts.append(f"Poste: {request.Poste}")
|
| 656 |
+
if request.Compétences_techniques: parts.append(f"Compétences techniques: {', '.join(request.Compétences_techniques)}")
|
| 657 |
+
if request.Expérience_requise: parts.append(f"Expérience requise: {request.Expérience_requise}")
|
| 658 |
+
if request.Localisation: parts.append(f"Localisation: {request.Localisation}")
|
| 659 |
+
if request.Type_de_contrat: parts.append(f"Type de contrat: {request.Type_de_contrat}")
|
| 660 |
+
if request.Salaire: parts.append(f"Salaire: {request.Salaire}")
|
| 661 |
+
|
| 662 |
+
if not parts:
|
| 663 |
+
raise HTTPException(status_code=400, detail="Veuillez fournir une description ou au moins un critère de recherche.")
|
| 664 |
+
|
| 665 |
+
query_text = ". ".join(parts)
|
| 666 |
+
|
| 667 |
try:
|
| 668 |
+
results = match_offer_sync(query_text, request.top_k)
|
| 669 |
return MatchResponse(results=results)
|
| 670 |
except HTTPException as e:
|
| 671 |
# Propage l'exception HTTP si les modèles ne sont pas prêts
|