from fastapi import FastAPI from pydantic import BaseModel from typing import List from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity import numpy as np # ----------------------- # Load model ONCE (important for performance) # ----------------------- model = SentenceTransformer("all-MiniLM-L6-v2") app = FastAPI( title="CV Matching API", description="Rank CVs against a Job Description using BERT embeddings", version="1.0" ) # ----------------------- # Request schema # ----------------------- class MatchRequest(BaseModel): job_description: str cvs: List[str] # ----------------------- # Response schema # ----------------------- class CVScore(BaseModel): cv_text: str relevance_score: float class MatchResponse(BaseModel): results: List[CVScore] # ----------------------- # Utility: text cleaning (optional but recommended) # ----------------------- def clean_text(text: str) -> str: return text.replace("\n", " ").strip().lower() # ----------------------- # API endpoint # ----------------------- @app.post("/match", response_model=MatchResponse) def match_cvs(request: MatchRequest): # Clean input jd = clean_text(request.job_description) cvs = [clean_text(cv) for cv in request.cvs] # Embed job description jd_embedding = model.encode([jd]) # Embed CVs cv_embeddings = model.encode(cvs) # Compute cosine similarity scores = cosine_similarity(jd_embedding, cv_embeddings)[0] # Build response results = [] for cv_text, score in zip(request.cvs, scores): results.append( CVScore( cv_text=cv_text, relevance_score=float(score) ) ) # Sort by relevance (descending) results.sort(key=lambda x: x.relevance_score, reverse=True) return MatchResponse(results=results)