|
|
import os |
|
|
import glob |
|
|
import gradio as gr |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
from sentence_transformers import SentenceTransformer |
|
|
from PyPDF2 import PdfReader |
|
|
import docx |
|
|
import re |
|
|
from collections import Counter |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
DATASET_FOLDER = "resumes" |
|
|
TOP_K = 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_text_from_pdf(file): |
|
|
text = "" |
|
|
try: |
|
|
reader = PdfReader(file) |
|
|
for page in reader.pages: |
|
|
page_text = page.extract_text() |
|
|
if page_text: |
|
|
text += page_text + " " |
|
|
except: |
|
|
pass |
|
|
return text |
|
|
|
|
|
def extract_text_from_docx(file): |
|
|
text = "" |
|
|
try: |
|
|
doc = docx.Document(file) |
|
|
text = " ".join([para.text for para in doc.paragraphs]) |
|
|
except: |
|
|
pass |
|
|
return text |
|
|
|
|
|
def extract_text(file): |
|
|
ext = file.name.split('.')[-1].lower() if hasattr(file, "name") else "txt" |
|
|
if ext == "pdf": |
|
|
return extract_text_from_pdf(file) |
|
|
elif ext == "docx": |
|
|
return extract_text_from_docx(file) |
|
|
elif ext == "txt": |
|
|
try: |
|
|
file.seek(0) |
|
|
return file.read().decode("utf-8") |
|
|
except: |
|
|
return "" |
|
|
else: |
|
|
return "" |
|
|
|
|
|
def load_resume_dataset(folder_path): |
|
|
resumes = [] |
|
|
names = [] |
|
|
paths = glob.glob(os.path.join(folder_path, "*")) |
|
|
for path in paths: |
|
|
text = "" |
|
|
ext = path.split('.')[-1].lower() |
|
|
try: |
|
|
if ext == "pdf": |
|
|
text = extract_text_from_pdf(path) |
|
|
elif ext == "docx": |
|
|
text = extract_text_from_docx(path) |
|
|
elif ext == "txt": |
|
|
with open(path, 'r', encoding='utf-8', errors='ignore') as f: |
|
|
text = f.read() |
|
|
except: |
|
|
continue |
|
|
if text.strip(): |
|
|
resumes.append(text) |
|
|
names.append(os.path.basename(path)) |
|
|
return names, resumes |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def infer_job_from_text(text): |
|
|
""" |
|
|
Extract probable job/role from resume text. |
|
|
Uses heuristics: first lines, capitalized phrases, or frequent nouns. |
|
|
""" |
|
|
lines = text.split("\n") |
|
|
|
|
|
candidate_lines = lines[:5] |
|
|
pattern = re.compile(r'\b[A-Z][a-zA-Z &/-]{2,}\b') |
|
|
roles = [] |
|
|
for line in candidate_lines: |
|
|
matches = pattern.findall(line) |
|
|
roles.extend(matches) |
|
|
|
|
|
if roles: |
|
|
most_common = Counter(roles).most_common(1) |
|
|
return most_common[0][0] |
|
|
else: |
|
|
|
|
|
return "Other" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st_model = SentenceTransformer('all-MiniLM-L6-v2') |
|
|
resume_names, resume_texts = load_resume_dataset(DATASET_FOLDER) |
|
|
resume_embeddings = st_model.encode(resume_texts, convert_to_numpy=True) |
|
|
|
|
|
|
|
|
resume_roles = {name: infer_job_from_text(text) for name, text in zip(resume_names, resume_texts)} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def match_resume(file): |
|
|
input_text = extract_text(file) |
|
|
if not input_text.strip(): |
|
|
return pd.DataFrame([{"Error": "Could not extract text from this resume."}]) |
|
|
|
|
|
input_emb = st_model.encode([input_text], convert_to_numpy=True) |
|
|
sims = cosine_similarity(input_emb, resume_embeddings)[0] |
|
|
top_indices = sims.argsort()[-TOP_K:][::-1] |
|
|
|
|
|
results = [] |
|
|
for idx in top_indices: |
|
|
matched_resume_name = resume_names[idx] |
|
|
similarity_score = sims[idx] |
|
|
recommended_job = resume_roles[matched_resume_name] |
|
|
results.append({ |
|
|
"Matched Resume": matched_resume_name, |
|
|
"Recommended Job": recommended_job, |
|
|
"Confidence Score": f"{similarity_score*100:.2f}%" |
|
|
}) |
|
|
return pd.DataFrame(results) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
css = """ |
|
|
body {background-color: #f7f9fc;} |
|
|
h1 {color: #333; text-align: center;} |
|
|
.gr-button {background-color: #4CAF50; color: white;} |
|
|
""" |
|
|
|
|
|
title = "<h1>AI Resume Analyzer & Job Matcher</h1>" |
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=match_resume, |
|
|
inputs=gr.File(label="Upload Your Resume (PDF, DOCX, TXT)"), |
|
|
outputs=gr.Dataframe(label="Top Job Matches"), |
|
|
title="AI Resume Analyzer & Job Matcher", |
|
|
description="Upload a resume to get top 3 job recommendations with confidence scores.", |
|
|
css=css, |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
iface.launch() |
|
|
|