Upload app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
import numpy as np
|
| 5 |
+
import pandas as pd
|
| 6 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 7 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 8 |
+
import PyPDF2, docx
|
| 9 |
+
|
| 10 |
+
# ----------- CONFIG ------------
|
| 11 |
+
RESUME_DIR = Path("resumes")
|
| 12 |
+
MAX_FEATURES = 20000
|
| 13 |
+
# -------------------------------
|
| 14 |
+
|
| 15 |
+
def extract_text(path):
|
| 16 |
+
if path.suffix.lower() == ".pdf":
|
| 17 |
+
pdf = PyPDF2.PdfReader(open(path, "rb"))
|
| 18 |
+
return "\n".join([p.extract_text() or "" for p in pdf.pages])
|
| 19 |
+
elif path.suffix.lower() in [".docx", ".doc"]:
|
| 20 |
+
d = docx.Document(path)
|
| 21 |
+
return "\n".join([p.text for p in d.paragraphs])
|
| 22 |
+
else:
|
| 23 |
+
return path.read_text(encoding="utf-8", errors="ignore")
|
| 24 |
+
|
| 25 |
+
def load_resumes():
|
| 26 |
+
texts, names = [], []
|
| 27 |
+
for p in RESUME_DIR.glob("**/*"):
|
| 28 |
+
if p.suffix.lower() in [".pdf", ".docx", ".doc", ".txt"]:
|
| 29 |
+
try:
|
| 30 |
+
txt = extract_text(p)
|
| 31 |
+
texts.append(" ".join(txt.lower().split()))
|
| 32 |
+
names.append(p.name)
|
| 33 |
+
except Exception:
|
| 34 |
+
pass
|
| 35 |
+
return names, texts
|
| 36 |
+
|
| 37 |
+
filenames, texts = load_resumes()
|
| 38 |
+
vectorizer = TfidfVectorizer(stop_words="english", max_features=MAX_FEATURES)
|
| 39 |
+
X = vectorizer.fit_transform(texts)
|
| 40 |
+
|
| 41 |
+
def match_resume(job_description):
|
| 42 |
+
jd = " ".join(job_description.lower().split())
|
| 43 |
+
jd_vec = vectorizer.transform([jd])
|
| 44 |
+
sims = cosine_similarity(jd_vec, X).flatten()
|
| 45 |
+
mean, std = sims.mean(), sims.std() or 1e-6
|
| 46 |
+
conf = 1 / (1 + np.exp(-((sims - mean) / std))) # sigmoid confidence
|
| 47 |
+
df = pd.DataFrame({
|
| 48 |
+
"Resume": filenames,
|
| 49 |
+
"Match (%)": (sims * 100).round(2),
|
| 50 |
+
"Confidence (%)": (conf * 100).round(2)
|
| 51 |
+
}).sort_values("Match (%)", ascending=False).head(10)
|
| 52 |
+
return df
|
| 53 |
+
|
| 54 |
+
iface = gr.Interface(
|
| 55 |
+
fn=match_resume,
|
| 56 |
+
inputs=gr.Textbox(lines=6, label="Paste Job Description"),
|
| 57 |
+
outputs=gr.Dataframe(label="Top Matching Resumes"),
|
| 58 |
+
title="AI Resume Analyzer + Job Matcher",
|
| 59 |
+
description="Upload your resume dataset and match against a job description using NLP (TF-IDF + Confidence Scoring)."
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
iface.launch()
|