| """FastAPI server for the Resume Analysis and Matching System.""" |
|
|
| import os |
| import re |
| import subprocess |
| import json |
| import shutil |
| import subprocess |
| import json |
| import tempfile |
| import uvicorn |
| import ollama |
| from fastapi import FastAPI, File, UploadFile, HTTPException |
| from fastapi.middleware.cors import CORSMiddleware |
| from sentence_transformers import SentenceTransformer |
| from typing import List |
|
|
| |
| from CHROMA_DB.collections import ChromaDBManager |
| from main import extract_job_description, index_directory |
|
|
|
|
| |
|
|
| os.environ["TOKENIZERS_PARALLEILLISM"] = "false" |
| print("Loading embedding model...") |
| model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") |
| print("Model loaded.") |
| main_chroma_manager = ChromaDBManager() |
|
|
|
|
| app = FastAPI( |
| title="Resume Analysis and Matching System", |
| description="An API for matching resumes to job descriptions using a RAG architecture.", |
| version="0.1.0", |
| ) |
|
|
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_credentials=True, |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
|
|
| |
|
|
| def extract_structured_data(resume_text: str) -> dict: |
| """ |
| Extracts name, skills, and years of experience from resume text using regex. |
| """ |
| |
| |
| name_pattern = r"^[A-Z][a-z]+(?: [A-Z][a-z]+(?: [A-Z][a-z]+)?)?" |
| name_match = re.search(name_pattern, resume_text.split('\n')[0]) |
| name = name_match.group(0) if name_match else "Unknown Candidate" |
|
|
| skills_list = [ |
| "python", "java", "c++", "c#", "javascript", "typescript", "react", "angular", "vue", |
| "nodejs", "express", "django", "flask", "fastapi", "ruby", "rails", "php", "laravel", |
| "sql", "mysql", "postgresql", "mongodb", "redis", "docker", "kubernetes", "aws", |
| "azure", "gcp", "terraform", "ansible", "jenkins", "git", "jira", "scrum", "agile", |
| "machine learning", "deep learning", "tensorflow", "pytorch", "scikit-learn", |
| "pandas", "numpy", "data analysis", "data science", "natural language processing", |
| "computer vision", "html", "css", "tailwind", "bootstrap" |
| ] |
| |
| extracted_skills = [] |
| for skill in skills_list: |
| if re.search(r'\b' + re.escape(skill) + r'\b', resume_text, re.IGNORECASE): |
| extracted_skills.append(skill.capitalize()) |
|
|
| experience_pattern = r'(\d+\+?)\s*years? of experience' |
| match = re.search(experience_pattern, resume_text, re.IGNORECASE) |
| experience = match.group(1) + "+ years" if match else "Not specified" |
|
|
| return { |
| "name": name, |
| "skills": list(set(extracted_skills)), |
| "experience": experience |
| } |
|
|
| def summarize_matches_with_llm_api(job_text: str, matches: dict) -> str: |
| """ |
| Uses a local LLM via Ollama to generate a summary and returns it. |
| If it fails, it returns a user-friendly error message. |
| """ |
| print("\n\n🤖 Generating AI Summary for Top Matches...") |
|
|
| context = "" |
| for i, (fname, match) in enumerate(matches.items(), 1): |
| context += f"--- Resume {i}: {fname} ---\n" |
| context += f"Relevance: {match['match_percentage']}%\n" |
| context += f"Matching Section ({match['section_name']}):\n{match['text']}\n\n" |
|
|
| prompt = f""" |
| You are an expert HR assistant. Your task is to analyze the following resumes and provide a summary of why they are a good fit for the given job description. |
| |
| **Job Description:** |
| {job_text} |
| |
| **Top Matching Resumes:** |
| {context} |
| |
| **Your Task:** |
| Based on the job description and the provided resume snippets, write a concise summary for each of the top 2-3 candidates. Highlight their key qualifications, relevant experience, and skills that align with the job requirements. Keep it brief and to the point. |
| """ |
|
|
| |
| return "LLM summarization temporarily disabled." |
|
|
|
|
| |
|
|
| @app.get("/api/status", tags=["Monitoring"]) |
| async def get_status(): |
| """A simple endpoint to confirm the API is running.""" |
| return {"status": "ok", "message": "API is running."} |
|
|
|
|
| @app.post("/api/match-resumes", tags=["Matching"]) |
| async def match_resumes( |
| job_description: UploadFile = File(...), |
| resumes: List[UploadFile] = File(...) |
| ): |
| """ |
| Upload a job description and resumes, perform on-the-fly indexing and matching, and return results. |
| """ |
| temp_dir = tempfile.mkdtemp() |
| try: |
| jd_path = os.path.join(temp_dir, job_description.filename) |
| with open(jd_path, "wb") as buffer: |
| shutil.copyfileobj(job_description.file, buffer) |
|
|
| resumes_dir = os.path.join(temp_dir, "resumes") |
| os.makedirs(resumes_dir) |
| |
| resume_full_texts = {} |
| for resume in resumes: |
| resume_path = os.path.join(resumes_dir, resume.filename) |
| with open(resume_path, "wb") as buffer: |
| shutil.copyfileobj(resume.file, buffer) |
| |
| |
| |
| |
| from KNOWLEDGE_EXTRACTOR.universal_parser import UniversalParser |
| parser = UniversalParser() |
| try: |
| parsed_data = parser.parse_file(resume_path) |
| if parsed_data and parsed_data.get("text"): |
| resume_full_texts[resume.filename] = parsed_data["text"] |
| else: |
| print(f"Warning: Could not extract text from {resume.filename} using UniversalParser.") |
| resume_full_texts[resume.filename] = "" |
| except Exception as e: |
| print(f"Error parsing {resume.filename} with UniversalParser: {e}") |
| resume_full_texts[resume.filename] = "" |
|
|
|
|
| temp_collection_name = f"temp_collection_{os.urandom(8).hex()}" |
| temp_sections_collection_name = f"temp_sections_collection_{os.urandom(8).hex()}" |
| temp_chroma_manager = ChromaDBManager( |
| in_memory=True, |
| collection_name=temp_collection_name, |
| sections_collection_name=temp_sections_collection_name |
| ) |
| |
| print(f"Starting on-the-fly indexing for {len(resumes)} resumes into collection '{temp_collection_name}'...") |
| index_directory(resumes_dir, model, temp_chroma_manager) |
| print("On-the-fly indexing complete.") |
|
|
| job_text, job_embedding = extract_job_description(jd_path, model) |
|
|
| results = temp_chroma_manager.query( |
| query_text=job_text, |
| query_embedding=job_embedding, |
| top_k=20, |
| min_similarity=0.1, |
| ) |
|
|
| if not results or not results.get("matches"): |
| return { |
| "job_text": job_text, |
| "matches": [], |
| "summary": "No matching resumes found in the uploaded files.", |
| "overall_scores": {} |
| } |
|
|
| |
| best_matches = {} |
| for match in results["matches"]: |
| fname = match["filename"] |
| if fname not in best_matches or match["match_percentage"] > best_matches[fname]["match_percentage"]: |
| |
| full_resume_text = resume_full_texts.get(fname, "") |
| structured_data = extract_structured_data(full_resume_text) |
| |
| match['name'] = structured_data['name'] |
| match['skills'] = structured_data['skills'] |
| match['experience'] = structured_data['experience'] |
| best_matches[fname] = match |
|
|
| |
| sorted_matches = sorted( |
| best_matches.items(), |
| key=lambda item: item[1]['match_percentage'], |
| reverse=True |
| ) |
|
|
| |
| overall_scores = {} |
| if results.get("resume_scores"): |
| overall_scores = { |
| match['filename']: results['resume_scores'].get(match['resume_id'], 0) |
| for match in best_matches.values() |
| } |
| |
| overall_scores = dict( |
| sorted(overall_scores.items(), key=lambda x: x[1], reverse=True) |
| ) |
|
|
| summary = summarize_matches_with_llm_api(job_text, dict(sorted_matches)) |
|
|
| return { |
| "job_text": job_text, |
| "matches": [ |
| { |
| "filename": filename, |
| "name": match["name"], |
| "relevance": match["match_percentage"], |
| "best_section": match["section_name"], |
| "section_text": match["text"], |
| "skills": match["skills"], |
| "experience": match["experience"] |
| } |
| for filename, match in sorted_matches |
| ], |
| "overall_scores": overall_scores, |
| "summary": summary |
| } |
|
|
| except Exception as e: |
| import traceback |
| traceback.print_exc() |
| raise HTTPException(status_code=500, detail=str(e)) |
| finally: |
| shutil.rmtree(temp_dir) |
|
|
|
|
| @app.post("/api/index-resumes", tags=["Indexing"]) |
| async def index_resumes_endpoint(resumes_path: str = "DATA_resume"): |
| """ |
| Triggers the indexing of resumes from the specified directory. |
| """ |
| if not os.path.isdir(resumes_path): |
| raise HTTPException(status_code=404, detail=f"Directory not found: {resumes_path}") |
| |
| try: |
| print(f"Starting indexing for directory: {resumes_path} into main database.") |
| index_directory(resumes_path, model, main_chroma_manager) |
| return {"status": "success", "message": f"Indexing complete for {resumes_path}."} |
| except Exception as e: |
| raise HTTPException(status_code=500, detail=f"Indexing failed: {e}") |
|
|
| @app.get("/api/resume-embedding/{resume_id}", tags=["Resumes"]) |
| async def get_resume_embedding(resume_id: str): |
| """Retrieve the full resume text embedding given a resume ID.""" |
| embedding = main_chroma_manager.get_resume_embedding(resume_id) |
| if not embedding: |
| raise HTTPException(status_code=404, detail=f"Resume with ID '{resume_id}' not found.") |
| return {"embedding": embedding} |
|
|
| @app.post("/api/summarize-resume", tags=["Resumes"]) |
| async def summarize_resume(resume_embedding: dict, job_description: str): |
| """Summarize the resume information using the LLM.""" |
| try: |
| |
| with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".json") as f: |
| json.dump(resume_embedding, f) |
| temp_file_path = f.name |
|
|
| |
| command = [ |
| "python", |
| "/Users/deepandee/Desktop/RAG/SLM_manager/augemented_generation.py", |
| "--resume_file", |
| temp_file_path, |
| "--job_description", |
| job_description, |
| ] |
| process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
| stdout, stderr = process.communicate() |
|
|
| |
| if stderr: |
| print(f"Error summarizing resume: {stderr.decode()}") |
| raise HTTPException(status_code=500, detail=f"Error summarizing resume: {stderr.decode()}") |
|
|
| |
| summary = stdout.decode().strip() |
|
|
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |
| finally: |
| |
| os.remove(temp_file_path) |
|
|
| return {"summary": summary} |
|
|
| |
| if __name__ == "__main__": |
|
|
| uvicorn.run("api:app", host="0.0.0.0", port=8000, reload=True) |
|
|