srinikesh1432 commited on
Commit
f0afd6e
·
verified ·
1 Parent(s): a9154c3

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -0
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from pathlib import Path
4
+ import numpy as np
5
+ import pandas as pd
6
+ from sklearn.feature_extraction.text import TfidfVectorizer
7
+ from sklearn.metrics.pairwise import cosine_similarity
8
+ import PyPDF2, docx
9
+
10
+ # ----------- CONFIG ------------
11
+ RESUME_DIR = Path("resumes")
12
+ MAX_FEATURES = 20000
13
+ # -------------------------------
14
+
15
+ def extract_text(path):
16
+ if path.suffix.lower() == ".pdf":
17
+ pdf = PyPDF2.PdfReader(open(path, "rb"))
18
+ return "\n".join([p.extract_text() or "" for p in pdf.pages])
19
+ elif path.suffix.lower() in [".docx", ".doc"]:
20
+ d = docx.Document(path)
21
+ return "\n".join([p.text for p in d.paragraphs])
22
+ else:
23
+ return path.read_text(encoding="utf-8", errors="ignore")
24
+
25
+ def load_resumes():
26
+ texts, names = [], []
27
+ for p in RESUME_DIR.glob("**/*"):
28
+ if p.suffix.lower() in [".pdf", ".docx", ".doc", ".txt"]:
29
+ try:
30
+ txt = extract_text(p)
31
+ texts.append(" ".join(txt.lower().split()))
32
+ names.append(p.name)
33
+ except Exception:
34
+ pass
35
+ return names, texts
36
+
37
+ filenames, texts = load_resumes()
38
+ vectorizer = TfidfVectorizer(stop_words="english", max_features=MAX_FEATURES)
39
+ X = vectorizer.fit_transform(texts)
40
+
41
+ def match_resume(job_description):
42
+ jd = " ".join(job_description.lower().split())
43
+ jd_vec = vectorizer.transform([jd])
44
+ sims = cosine_similarity(jd_vec, X).flatten()
45
+ mean, std = sims.mean(), sims.std() or 1e-6
46
+ conf = 1 / (1 + np.exp(-((sims - mean) / std))) # sigmoid confidence
47
+ df = pd.DataFrame({
48
+ "Resume": filenames,
49
+ "Match (%)": (sims * 100).round(2),
50
+ "Confidence (%)": (conf * 100).round(2)
51
+ }).sort_values("Match (%)", ascending=False).head(10)
52
+ return df
53
+
54
+ iface = gr.Interface(
55
+ fn=match_resume,
56
+ inputs=gr.Textbox(lines=6, label="Paste Job Description"),
57
+ outputs=gr.Dataframe(label="Top Matching Resumes"),
58
+ title="AI Resume Analyzer + Job Matcher",
59
+ description="Upload your resume dataset and match against a job description using NLP (TF-IDF + Confidence Scoring)."
60
+ )
61
+
62
+ iface.launch()