mahmodGendy commited on
Commit
903a1b0
·
verified ·
1 Parent(s): a1116ce

Upload 6 files

Browse files
Files changed (6) hide show
  1. app.py +71 -0
  2. logger.py +22 -0
  3. logs.csv +0 -0
  4. model_logic.py +214 -0
  5. requirements.txt +10 -0
  6. resume_parser.py +79 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import gradio as gr
4
+ from fastapi import FastAPI, UploadFile, File, Form
5
+
6
+ from resume_parser import parse_resume
7
+ from model_logic import score_resume_by_title
8
+ from logger import log_decision
9
+
10
+
11
+ app = FastAPI()
12
+
13
+
14
+ UPLOAD_DIR = "uploads"
15
+ os.makedirs(UPLOAD_DIR, exist_ok=True)
16
+
17
+
18
+ def process_resume(file, title, level):
19
+
20
+ path = os.path.join(UPLOAD_DIR, file.name)
21
+
22
+ with open(path, "wb") as f:
23
+ shutil.copyfileobj(file, f)
24
+
25
+ text = parse_resume(path)
26
+
27
+ result = score_resume_by_title(text, title, level)
28
+
29
+ log_decision(title, result["decision"])
30
+
31
+ os.remove(path)
32
+
33
+ return result
34
+
35
+
36
+ @app.post("/analyze_resume")
37
+ async def analyze_resume(
38
+ file: UploadFile = File(...),
39
+ title: str = Form(...),
40
+ level: str = Form(...)
41
+ ):
42
+
43
+ result = process_resume(file.file, title, level)
44
+
45
+ return result
46
+
47
+
48
+ def gradio_interface(file, title, level):
49
+
50
+ result = process_resume(file, title, level)
51
+
52
+ return result
53
+
54
+
55
+ demo = gr.Interface(
56
+ fn=gradio_interface,
57
+ inputs=[
58
+ gr.File(label="Upload Resume PDF"),
59
+ gr.Textbox(label="Job Title"),
60
+ gr.Dropdown(
61
+ ["entry","junior","mid","senior"],
62
+ label="Job Level"
63
+ )
64
+ ],
65
+ outputs="json",
66
+ title="AI Resume Screening System"
67
+ )
68
+
69
+
70
+ if __name__ == "__main__":
71
+ demo.launch()
logger.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import uuid
3
+ from datetime import datetime
4
+
5
+ LOG_FILE = "logs.csv"
6
+
7
+
8
+ def log_decision(job_title, decision):
9
+
10
+ uid = str(uuid.uuid4())[:8]
11
+
12
+ row = [
13
+ uid,
14
+ datetime.now().isoformat(),
15
+ job_title,
16
+ decision
17
+ ]
18
+
19
+ with open(LOG_FILE, "a", newline="") as f:
20
+
21
+ writer = csv.writer(f)
22
+ writer.writerow(row)
logs.csv ADDED
File without changes
model_logic.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer, util
2
+ import re
3
+
4
+ model = SentenceTransformer("all-MiniLM-L6-v2")
5
+
6
+
7
+ def extract_years_of_experience(text):
8
+
9
+ pattern = r"(\d+)\s+years"
10
+ matches = re.findall(pattern, text.lower())
11
+
12
+ if matches:
13
+ return max([int(x) for x in matches])
14
+
15
+ return 0
16
+
17
+
18
+ def skills_from_text(text):
19
+
20
+ text = text.lower()
21
+
22
+ skills = [
23
+ # programming
24
+ "python","java","c++","c#","javascript","typescript","go","rust","scala",
25
+
26
+ # data science
27
+ "machine learning","deep learning","data science","data analysis",
28
+ "data mining","statistical analysis","statistics","predictive modeling",
29
+
30
+ # ml / ai frameworks
31
+ "tensorflow","pytorch","keras","scikit-learn","xgboost","lightgbm",
32
+
33
+ # data tools
34
+ "pandas","numpy","matplotlib","seaborn","plotly",
35
+
36
+ # databases
37
+ "sql","postgresql","mysql","mongodb","redis","oracle",
38
+
39
+ # cloud
40
+ "aws","azure","gcp","docker","kubernetes",
41
+
42
+ # backend
43
+ "fastapi","flask","django","spring","node.js","express",
44
+
45
+ # frontend
46
+ "react","angular","vue","html","css","bootstrap","tailwind",
47
+
48
+ # nlp
49
+ "nlp","natural language processing","transformers","bert","llm",
50
+
51
+ # devops
52
+ "ci/cd","jenkins","git","github","gitlab","terraform",
53
+
54
+ # data engineering
55
+ "spark","hadoop","kafka","airflow","etl","data pipelines",
56
+
57
+ # analytics tools
58
+ "power bi","tableau","excel"
59
+ ]
60
+
61
+ found = []
62
+
63
+ for skill in skills:
64
+ if skill in text:
65
+ found.append(skill)
66
+
67
+ return list(set(found))
68
+
69
+
70
+ def get_job_requirements(title):
71
+
72
+ title = title.lower()
73
+
74
+ mapping = {
75
+
76
+ "data scientist": [
77
+ "python","machine learning","statistics","pandas",
78
+ "numpy","sql","data visualization","scikit-learn"
79
+ ],
80
+
81
+ "machine learning engineer": [
82
+ "python","machine learning","deep learning",
83
+ "pytorch","tensorflow","docker","mlops"
84
+ ],
85
+
86
+ "ml engineer": [
87
+ "python","machine learning","deep learning",
88
+ "pytorch","tensorflow","docker"
89
+ ],
90
+
91
+ "data analyst": [
92
+ "sql","python","excel","tableau",
93
+ "power bi","data analysis","statistics"
94
+ ],
95
+
96
+ "data engineer": [
97
+ "python","sql","spark","hadoop",
98
+ "etl","data pipelines","airflow"
99
+ ],
100
+
101
+ "backend developer": [
102
+ "python","fastapi","flask","django",
103
+ "sql","api","docker"
104
+ ],
105
+
106
+ "backend engineer": [
107
+ "python","fastapi","django",
108
+ "sql","microservices","docker"
109
+ ],
110
+
111
+ "software engineer": [
112
+ "python","java","c++","git",
113
+ "algorithms","data structures"
114
+ ],
115
+
116
+ "frontend developer": [
117
+ "javascript","react","html",
118
+ "css","typescript","frontend"
119
+ ],
120
+
121
+ "full stack developer": [
122
+ "javascript","react","node.js",
123
+ "sql","html","css","api"
124
+ ],
125
+
126
+ "devops engineer": [
127
+ "docker","kubernetes","aws",
128
+ "ci/cd","terraform","linux"
129
+ ],
130
+
131
+ "ai engineer": [
132
+ "python","deep learning","pytorch",
133
+ "tensorflow","transformers","nlp"
134
+ ],
135
+
136
+ "nlp engineer": [
137
+ "python","nlp","transformers",
138
+ "bert","machine learning"
139
+ ],
140
+
141
+ "cloud engineer": [
142
+ "aws","azure","gcp",
143
+ "docker","kubernetes"
144
+ ]
145
+ }
146
+
147
+ for key in mapping:
148
+ if key in title:
149
+ return mapping[key]
150
+
151
+ return []
152
+
153
+
154
+ def calculate_skills_score(resume_skills, job_skills):
155
+
156
+ if not job_skills:
157
+ return 0
158
+
159
+ matches = set(resume_skills).intersection(set(job_skills))
160
+
161
+ return len(matches) / len(job_skills)
162
+
163
+
164
+ def score_resume_by_title(text, title, level):
165
+
166
+ job_skills = get_job_requirements(title)
167
+
168
+ resume_skills = skills_from_text(text)
169
+
170
+ skill_score = calculate_skills_score(resume_skills, job_skills)
171
+
172
+ exp = extract_years_of_experience(text)
173
+
174
+ title_embedding = model.encode(title)
175
+ resume_embedding = model.encode(text[:2000])
176
+
177
+ similarity = util.cos_sim(title_embedding, resume_embedding).item()
178
+
179
+ # level logic
180
+ level = level.lower()
181
+
182
+ if level == "entry":
183
+ required_exp = 0
184
+ elif level == "junior":
185
+ required_exp = 1
186
+ elif level == "mid":
187
+ required_exp = 3
188
+ else:
189
+ required_exp = 5
190
+
191
+ decision = "ACCEPT"
192
+ reasons = []
193
+
194
+ if skill_score < 0.4:
195
+ decision = "REJECT"
196
+ reasons.append("Low skill match")
197
+
198
+ if exp < required_exp:
199
+ decision = "REJECT"
200
+ reasons.append("Insufficient experience")
201
+
202
+ if similarity < 0.3:
203
+ decision = "REJECT"
204
+ reasons.append("Low semantic match with job title")
205
+
206
+ return {
207
+ "decision": decision,
208
+ "skill_score": round(skill_score,3),
209
+ "similarity": round(similarity,3),
210
+ "experience_years": exp,
211
+ "resume_skills": resume_skills,
212
+ "job_skills": job_skills,
213
+ "reasons": reasons
214
+ }
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ gradio
4
+ pandas
5
+ sentence-transformers
6
+ pytesseract
7
+ pypdf2
8
+ pdf2image
9
+ python-docx
10
+ Pillow
resume_parser.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import tempfile
4
+ import pytesseract
5
+ import PyPDF2
6
+ import docx
7
+
8
+ from PIL import Image
9
+ from pdf2image import convert_from_path
10
+
11
+
12
+ def clean_text(text):
13
+ if not text:
14
+ return ""
15
+ text = str(text)
16
+ text = re.sub(r"\s+", " ", text)
17
+ return text.strip()
18
+
19
+
20
+ def extract_text_from_image(file_path):
21
+ try:
22
+ img = Image.open(file_path)
23
+ text = pytesseract.image_to_string(img)
24
+ return clean_text(text)
25
+ except:
26
+ return ""
27
+
28
+
29
+ def extract_text_from_docx(file_path):
30
+ try:
31
+ doc = docx.Document(file_path)
32
+ text = " ".join([p.text for p in doc.paragraphs])
33
+ return clean_text(text)
34
+ except:
35
+ return ""
36
+
37
+
38
+ def extract_text_from_pdf(file_path):
39
+ text = ""
40
+
41
+ try:
42
+ with open(file_path, "rb") as f:
43
+ reader = PyPDF2.PdfReader(f)
44
+
45
+ for page in reader.pages:
46
+ page_text = page.extract_text()
47
+ if page_text:
48
+ text += page_text + " "
49
+ except:
50
+ pass
51
+
52
+ if len(text.strip()) < 100:
53
+ try:
54
+ images = convert_from_path(file_path)
55
+
56
+ for image in images:
57
+ with tempfile.NamedTemporaryFile(suffix=".png") as tmp:
58
+ image.save(tmp.name)
59
+ text += extract_text_from_image(tmp.name)
60
+ except:
61
+ pass
62
+
63
+ return clean_text(text)
64
+
65
+
66
+ def parse_resume(file_path):
67
+
68
+ ext = os.path.splitext(file_path)[1].lower()
69
+
70
+ if ext == ".pdf":
71
+ return extract_text_from_pdf(file_path)
72
+
73
+ if ext in [".docx", ".doc"]:
74
+ return extract_text_from_docx(file_path)
75
+
76
+ if ext in [".png", ".jpg", ".jpeg"]:
77
+ return extract_text_from_image(file_path)
78
+
79
+ return ""