DreamStream-1 commited on
Commit
e616c89
·
verified ·
1 Parent(s): 253f865

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -39
app.py CHANGED
@@ -4,6 +4,8 @@ import docx
4
  import os
5
  from PyPDF2 import PdfReader
6
  import re
 
 
7
 
8
  # Load pre-trained model for sentence embedding
9
  model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
@@ -11,43 +13,11 @@ model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
11
  # Define maximum number of resumes
12
  MAX_RESUMES = 10
13
 
14
- # Function to load job description from file path
15
- def load_job_description(job_desc_file):
16
- if not os.path.exists(job_desc_file):
17
- return "Job description file not found."
18
- with open(job_desc_file, 'r') as file:
19
- job_description = file.read()
20
- if not job_description.strip():
21
- return "Job description is empty."
22
- return job_description
23
-
24
- # Function to check similarity between resumes and job description
25
- def check_similarity(job_description, resume_files):
26
- results = []
27
- job_emb = model.encode(job_description, convert_to_tensor=True)
28
-
29
- for resume_file in resume_files:
30
- resume_text = extract_text_from_resume(resume_file)
31
- if not resume_text:
32
- results.append((resume_file.name, 0, "Not Eligible", None, "No leadership experience"))
33
- continue
34
- resume_emb = model.encode(resume_text, convert_to_tensor=True)
35
- similarity_score = util.pytorch_cos_sim(job_emb, resume_emb)[0][0].item()
36
-
37
- # Convert similarity score to percentage
38
- similarity_percentage = similarity_score * 100
39
-
40
- # Identify leadership experience from resume
41
- leadership_experience = extract_leadership_experience(resume_text)
42
-
43
- # Set a higher similarity threshold for eligibility
44
- if similarity_score >= 0.50:
45
- candidate_name = extract_candidate_name(resume_text)
46
- results.append((resume_file.name, similarity_percentage, "Eligible", candidate_name, leadership_experience))
47
- else:
48
- results.append((resume_file.name, similarity_percentage, "Not Eligible", None, leadership_experience))
49
-
50
- return results
51
 
52
  # Extract text from resume (handles .txt, .pdf, .docx)
53
  def extract_text_from_resume(resume_file):
@@ -90,6 +60,56 @@ def extract_candidate_name(resume_text):
90
  return matches[0] # Returns the first match
91
  return "Unknown Candidate"
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  # Extract leadership experience (looking for keywords like manager, team lead, leadership)
94
  def extract_leadership_experience(resume_text):
95
  leadership_keywords = ['manager', 'management', 'team lead', 'supervised', 'leadership', 'head', 'coordinator']
@@ -98,12 +118,71 @@ def extract_leadership_experience(resume_text):
98
  return "Has leadership experience"
99
  return "No leadership experience"
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  # Gradio Interface Components
102
  job_desc_input = gr.File(label="Upload Job Description (TXT)", type="filepath")
103
  resumes_input = gr.Files(label="Upload Resumes (TXT, DOCX, PDF)", type="filepath")
104
 
105
  # Gradio Outputs
106
- results_output = gr.Dataframe(headers=["Resume File", "Similarity Score (%)", "Eligibility", "Candidate Name", "Leadership Experience"], label="Analysis Results")
 
 
 
 
 
 
 
 
 
107
 
108
  # Gradio Interface
109
  interface = gr.Interface(
@@ -111,7 +190,7 @@ interface = gr.Interface(
111
  inputs=[job_desc_input, resumes_input],
112
  outputs=[results_output],
113
  title="HR Assistant - Resume Screening & Leadership Experience",
114
- description="Upload job description and resumes to screen candidates for managerial and team leadership roles."
115
  )
116
 
117
  interface.launch()
 
4
  import os
5
  from PyPDF2 import PdfReader
6
  import re
7
+ from google.cloud import language_v1
8
+ from google.oauth2 import service_account
9
 
10
  # Load pre-trained model for sentence embedding
11
  model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
 
13
  # Define maximum number of resumes
14
  MAX_RESUMES = 10
15
 
16
+ # Google Cloud NLP Client Initialization
17
+ def init_nlp_client():
18
+ credentials = service_account.Credentials.from_service_account_info(gr.Secret('GOOGLE_API_KEY_SECRET'))
19
+ client = language_v1.LanguageServiceClient(credentials=credentials)
20
+ return client
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  # Extract text from resume (handles .txt, .pdf, .docx)
23
  def extract_text_from_resume(resume_file):
 
60
  return matches[0] # Returns the first match
61
  return "Unknown Candidate"
62
 
63
+ # Function to extract email and contact from resume using regex
64
+ def extract_contact_info(resume_text):
65
+ contact_info = {}
66
+
67
+ # Extract email using regex
68
+ email_regex = r'[\w\.-]+@[\w\.-]+'
69
+ emails = re.findall(email_regex, resume_text)
70
+ if emails:
71
+ contact_info['email'] = emails[0] # Take the first email found
72
+
73
+ # Extract phone numbers using regex (basic phone number formats)
74
+ phone_regex = r'\+?\d{1,4}[\s\-]?\(?\d{1,3}\)?[\s\-]?\d{3,4}[\s\-]?\d{4}'
75
+ phone_numbers = re.findall(phone_regex, resume_text)
76
+ if phone_numbers:
77
+ contact_info['contact'] = phone_numbers[0] # Take the first phone number found
78
+
79
+ return contact_info
80
+
81
+ # Function to extract entities using Google NLP API with a prompt
82
+ def extract_entities(resume_text):
83
+ client = init_nlp_client()
84
+
85
+ # Prepare the text for analysis
86
+ document = language_v1.Document(content=resume_text, type_=language_v1.Document.Type.PLAIN_TEXT)
87
+
88
+ # Create a system prompt asking to extract name, contact, and email
89
+ system_prompt = """
90
+ Please extract the candidate's name, contact information (phone number), and email address from the resume.
91
+ The resume text is provided below. If no email or contact is found, return 'No Email' or 'No Contact'.
92
+ Please also provide the candidate's full name if it can be identified.
93
+ """
94
+
95
+ # Append the prompt and resume text together
96
+ full_text = system_prompt + "\n\n" + resume_text
97
+
98
+ # Use Google NLP API to analyze entities
99
+ response = client.analyze_entities(request={'document': document})
100
+
101
+ entities = {}
102
+ for entity in response.entities:
103
+ entity_type = language_v1.Entity.Type(entity.type_).name
104
+ if entity_type == 'PERSON':
105
+ entities['name'] = entity.name
106
+ if entity_type == 'PHONE_NUMBER':
107
+ entities['contact'] = entity.name
108
+ if entity_type == 'EMAIL':
109
+ entities['email'] = entity.name
110
+
111
+ return entities
112
+
113
  # Extract leadership experience (looking for keywords like manager, team lead, leadership)
114
  def extract_leadership_experience(resume_text):
115
  leadership_keywords = ['manager', 'management', 'team lead', 'supervised', 'leadership', 'head', 'coordinator']
 
118
  return "Has leadership experience"
119
  return "No leadership experience"
120
 
121
+ # Function to check similarity between resumes and job description
122
+ def check_similarity(job_description, resume_files):
123
+ results = []
124
+ job_emb = model.encode(job_description, convert_to_tensor=True)
125
+
126
+ for resume_file in resume_files:
127
+ resume_text = extract_text_from_resume(resume_file)
128
+ if not resume_text:
129
+ results.append((resume_file.name, 0, "Not Eligible", None, "No leadership experience"))
130
+ continue
131
+
132
+ # Check for similarity between resume and job description
133
+ resume_emb = model.encode(resume_text, convert_to_tensor=True)
134
+ similarity_score = util.pytorch_cos_sim(job_emb, resume_emb)[0][0].item()
135
+
136
+ # Convert similarity score to percentage
137
+ similarity_percentage = similarity_score * 100
138
+
139
+ # Extract leadership experience
140
+ leadership_experience = extract_leadership_experience(resume_text)
141
+
142
+ # Extract name, email, and contact using Google NLP or regex
143
+ contact_info = extract_contact_info(resume_text)
144
+ nlp_entities = extract_entities(resume_text)
145
+
146
+ # Set a higher similarity threshold for eligibility
147
+ if similarity_score >= 0.50:
148
+ candidate_name = nlp_entities.get('name', extract_candidate_name(resume_text))
149
+ results.append((
150
+ resume_file.name,
151
+ similarity_percentage,
152
+ "Eligible",
153
+ candidate_name,
154
+ leadership_experience,
155
+ contact_info.get('email', 'No Email'),
156
+ contact_info.get('contact', 'No Contact')
157
+ ))
158
+ else:
159
+ results.append((
160
+ resume_file.name,
161
+ similarity_percentage,
162
+ "Not Eligible",
163
+ None,
164
+ leadership_experience,
165
+ contact_info.get('email', 'No Email'),
166
+ contact_info.get('contact', 'No Contact')
167
+ ))
168
+
169
+ return results
170
+
171
  # Gradio Interface Components
172
  job_desc_input = gr.File(label="Upload Job Description (TXT)", type="filepath")
173
  resumes_input = gr.Files(label="Upload Resumes (TXT, DOCX, PDF)", type="filepath")
174
 
175
  # Gradio Outputs
176
+ results_output = gr.Dataframe(headers=[
177
+ "Resume File",
178
+ "Similarity Score (%)",
179
+ "Eligibility",
180
+ "Candidate Name",
181
+ "Leadership Experience",
182
+ "Email",
183
+ "Contact"],
184
+ label="Analysis Results"
185
+ )
186
 
187
  # Gradio Interface
188
  interface = gr.Interface(
 
190
  inputs=[job_desc_input, resumes_input],
191
  outputs=[results_output],
192
  title="HR Assistant - Resume Screening & Leadership Experience",
193
+ description="Upload job description and resumes to screen candidates for managerial and team leadership roles and extract candidate details."
194
  )
195
 
196
  interface.launch()